1 package Molecule; 2 # 3 # $RCSfile: Molecule.pm,v $ 4 # $Date: 2008/04/25 00:00:46 $ 5 # $Revision: 1.31 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 use 5.006; 29 use strict; 30 use Carp; 31 use Exporter; 32 use Scalar::Util (); 33 use Storable (); 34 use ObjectProperty; 35 use Graph; 36 use Atom; 37 use Bond; 38 use MolecularFormula; 39 use MathUtil; 40 41 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 42 43 $VERSION = '1.00'; 44 @ISA = qw(Graph ObjectProperty Exporter); 45 @EXPORT = qw(IsMolecule); 46 @EXPORT_OK = qw(FormatElementalCompositionInformation); 47 48 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 49 50 # Setup class variables... 51 my($ClassName, $ObjectID); 52 _InitializeClass(); 53 54 # Overload Perl functions... 55 use overload '""' => 'StringifyMolecule'; 56 57 # Class constructor... 58 sub new { 59 my($Class, %NamesAndValues) = @_; 60 61 # Initialize object... 62 my $This = $Class->SUPER::new(); 63 bless $This, ref($Class) || $Class; 64 $This->_InitializeMolecule(); 65 66 if (keys %NamesAndValues) { $This->_InitializeMoleculeProperties(%NamesAndValues); } 67 68 return $This; 69 } 70 71 # Initialize object data... 72 sub _InitializeMolecule { 73 my($This) = @_; 74 my($ObjectID) = _GetNewObjectID(); 75 76 # All other property names and values along with all Set/Get<PropertyName> methods 77 # are implemented on-demand using ObjectProperty class. 78 $This->{ID} = $ObjectID; 79 $This->{Name} = "Molecule ${ObjectID}"; 80 } 81 82 # Initialize molecule properties... 83 sub _InitializeMoleculeProperties { 84 my($This, %NamesAndValues) = @_; 85 86 my($Name, $Value, $MethodName); 87 while (($Name, $Value) = each %NamesAndValues) { 88 $MethodName = "Set${Name}"; 89 $This->$MethodName($Value); 90 } 91 } 92 93 # Initialize class ... 94 sub _InitializeClass { 95 #Class name... 96 $ClassName = __PACKAGE__; 97 98 # ID to keep track of objects... 99 $ObjectID = 0; 100 } 101 102 # Setup an explicit SetID method to block setting of ID by AUTOLOAD function... 103 sub SetID { 104 my($This, $Value) = @_; 105 106 carp "Warning: ${ClassName}->SetID: Object ID can't be changed: it's used for internal tracking..."; 107 108 return $This; 109 } 110 111 # Add an atom... 112 sub AddAtom { 113 my($This, $Atom) = @_; 114 115 if (!defined $Atom) { 116 carp "Warning: ${ClassName}->AddAtom: No atom added: Atom must be specified..."; 117 return undef; 118 } 119 if ($This->HasAtom($Atom)) { 120 carp "Warning: ${ClassName}->AddAtom: No atom added: Atom already exists..."; 121 return undef; 122 } 123 return $This->_AddAtom($Atom); 124 } 125 126 # Add an atom... 127 sub _AddAtom { 128 my($This, $Atom) = @_; 129 130 # Assign atom to this molecule... 131 $Atom->SetProperty('Molecule', $This); 132 133 # Add it to the graph as a vertex... 134 my($AtomID); 135 $AtomID = $Atom->GetID(); 136 $This->AddVertex($AtomID); 137 $This->SetVertexProperty('Atom', $Atom, $AtomID); 138 139 return $This; 140 } 141 142 # Add atoms... 143 sub AddAtoms { 144 my($This, @Atoms) = @_; 145 146 if (!@Atoms) { 147 carp "Warning: ${ClassName}->AddAtoms: No atoms added: Atoms list must be specified..."; 148 return undef; 149 } 150 my($Atom); 151 for $Atom (@Atoms) { 152 $This->AddAtom($Atom); 153 } 154 return $This; 155 } 156 157 # Create an atom and add it to molecule... 158 sub NewAtom { 159 my($This, %NamesAndValues) = @_; 160 my($Atom); 161 162 $Atom = new Atom(%NamesAndValues); 163 $This->AddAtom($Atom); 164 165 return $Atom; 166 } 167 168 # Delete an atom... 169 sub DeleteAtom { 170 my($This, $Atom) = @_; 171 172 if (!defined $Atom) { 173 carp "Warning: ${ClassName}->DeleteAtom: No atom deleted: Atom must be specified..."; 174 return undef; 175 } 176 # Does the atom exist in molecule? 177 if (!$This->HasAtom($Atom)) { 178 carp "Warning: ${ClassName}->DeleteAtom: No atom deleted: Atom doesn't exist..."; 179 return undef; 180 } 181 return $This->_DeleteAtom($Atom); 182 } 183 184 # Delete atom... 185 sub _DeleteAtom { 186 my($This, $Atom) = @_; 187 188 my($AtomID); 189 $AtomID = $Atom->GetID(); 190 $This->DeleteVertex($AtomID); 191 192 return $This; 193 } 194 195 # Delete atoms... 196 sub DeleteAtoms { 197 my($This, @Atoms) = @_; 198 199 if (!@Atoms) { 200 carp "Warning: ${ClassName}->DeleteAtoms: No atoms added: Atoms list must be specified..."; 201 return undef; 202 } 203 my($Atom); 204 for $Atom (@Atoms) { 205 $This->DeleteAtom($Atom); 206 } 207 208 return $This; 209 } 210 211 # Is this atom present? 212 sub HasAtom { 213 my($This, $Atom) = @_; 214 215 if (!defined $Atom) { 216 return 0; 217 } 218 if (!$Atom->HasProperty('Molecule')) { 219 # It's not in molecule... 220 return 0; 221 } 222 my($AtomID); 223 $AtomID = $Atom->GetID(); 224 if (!$This->HasVertex($AtomID)) { 225 # It's not in molecule... 226 return 0; 227 } 228 my($Molecule); 229 $Molecule = $Atom->GetProperty('Molecule'); 230 231 return ($This->HasVertex($AtomID) && $This->GetID() == $Molecule->GetID()) ? 1 : 0; 232 } 233 234 # Return an array of atoms. In scalar context, it returns number of atoms. Additionally, 235 # atoms array can be filtered by any user specifiable Atom class method... 236 # 237 sub GetAtoms { 238 my($This, $AtomCheckMethodName) = @_; 239 my(@Atoms, @AtomIDs); 240 241 @Atoms = (); @AtomIDs = (); 242 243 @AtomIDs = $This->GetVertices(); 244 if (!@AtomIDs) { 245 return wantarray ? @Atoms : scalar @Atoms; 246 } 247 248 @Atoms = $This->GetVerticesProperty('Atom', @AtomIDs); 249 250 if (!defined $AtomCheckMethodName) { 251 return wantarray ? @Atoms : scalar @Atoms; 252 } 253 254 # Filter out atoms... 255 my($Atom, @FilteredAtoms); 256 @FilteredAtoms = (); 257 for $Atom (@Atoms) { 258 if ($Atom->$AtomCheckMethodName($Atom)) { 259 push @FilteredAtoms, $Atom; 260 } 261 } 262 return wantarray ? @FilteredAtoms : scalar @FilteredAtoms; 263 } 264 265 # Return an array of bonds. In scalar context, it returns number of bonds... 266 sub GetBonds { 267 my($This) = @_; 268 my(@Bonds, @EdgesAtomsIDs); 269 270 @Bonds = (); @EdgesAtomsIDs = (); 271 272 @EdgesAtomsIDs = $This->GetEdges(); 273 if (@EdgesAtomsIDs) { 274 @Bonds = $This->GetEdgesProperty('Bond', @EdgesAtomsIDs); 275 } 276 return wantarray ? @Bonds : scalar @Bonds; 277 } 278 279 # Get number of atoms in molecule... 280 sub GetNumOfAtoms { 281 my($This) = @_; 282 my($NumOfAtoms); 283 284 $NumOfAtoms = $This->GetAtoms(); 285 286 return $NumOfAtoms; 287 } 288 289 # Get number of bonds in molecule... 290 sub GetNumOfBonds { 291 my($This) = @_; 292 my($NumOfBonds); 293 294 $NumOfBonds = $This->GetBonds(); 295 296 return $NumOfBonds; 297 } 298 299 # Get number of heavy atoms in molecule... 300 sub GetNumOfHeavyAtoms { 301 my($This) = @_; 302 303 return $This->GetNumOfNonHydrogenAtoms(); 304 } 305 306 # Get number of non-hydrogen atoms in molecule... 307 sub GetNumOfNonHydrogenAtoms { 308 my($This) = @_; 309 my($NumOfNonHydrogenAtoms, $Atom, @Atoms); 310 311 @Atoms = $This->GetAtoms(); 312 $NumOfNonHydrogenAtoms = 0; 313 for $Atom (@Atoms) { 314 if (!$Atom->IsHydrogen()) { 315 $NumOfNonHydrogenAtoms++; 316 } 317 } 318 return $NumOfNonHydrogenAtoms; 319 } 320 321 # Get number of hydrogen atoms in molecule... 322 sub GetNumOfHydrogenAtoms { 323 my($This) = @_; 324 my($NumOfHydrogenAtoms, $Atom, @Atoms); 325 326 @Atoms = $This->GetAtoms(); 327 $NumOfHydrogenAtoms = 0; 328 for $Atom (@Atoms) { 329 if ($Atom->IsHydrogen()) { 330 $NumOfHydrogenAtoms++; 331 } 332 } 333 return $NumOfHydrogenAtoms; 334 } 335 336 # Get number of missing hydrogen atoms in molecule... 337 sub GetNumOfMissingHydrogenAtoms { 338 my($This) = @_; 339 my($NumOfMissingHydrogenAtoms, $Atom, @Atoms); 340 341 @Atoms = $This->GetAtoms(); 342 $NumOfMissingHydrogenAtoms = 0; 343 for $Atom (@Atoms) { 344 if (!$Atom->IsHydrogen()) { 345 $NumOfMissingHydrogenAtoms += $Atom->GetNumOfMissingHydrogens(); 346 } 347 } 348 return $NumOfMissingHydrogenAtoms; 349 } 350 351 # Add bond... 352 sub AddBond { 353 my($This, $Bond) = @_; 354 my($Atom1, $Atom2); 355 356 ($Atom1, $Atom2) = $Bond->GetAtoms(); 357 if (!(defined($Atom1) && defined($Atom2))) { 358 carp "Warning: ${ClassName}->AddBond: No bond added: Both atoms must be specified..."; 359 return undef; 360 } 361 if (!($This->HasAtom($Atom1) && $This->HasAtom($Atom2))) { 362 carp "Warning: ${ClassName}->AddBond: No bond added: Both atoms must be present..."; 363 return undef; 364 } 365 if ($This->HasBond($Bond)) { 366 carp "Warning: ${ClassName}->AddBond: No bond added: Bond already exists..."; 367 return undef; 368 } 369 return $This->_AddBond($Bond); 370 } 371 372 # Add bond... 373 sub _AddBond { 374 my($This, $Bond) = @_; 375 376 # Assign atom to this molecule... 377 $Bond->SetProperty('Molecule', $This); 378 379 # Add it to the graph as an edge... 380 my($Atom1, $Atom2, $AtomID1, $AtomID2); 381 ($Atom1, $Atom2) = $Bond->GetAtoms(); 382 $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID(); 383 $This->AddEdge($AtomID1, $AtomID2); 384 $This->SetEdgeProperty('Bond', $Bond, $AtomID1, $AtomID2); 385 386 return $This; 387 } 388 389 # Add Bonds... 390 sub AddBonds { 391 my($This, @Bonds) = @_; 392 393 if (!@Bonds) { 394 carp "Warning: ${ClassName}->AddBonds: No bonds added: Bonds list must be specified..."; 395 return undef; 396 } 397 my($Bond); 398 for $Bond (@Bonds) { 399 $This->AddBond($Bond); 400 } 401 return $This; 402 } 403 404 # Create a bond and add it to molecule... 405 sub NewBond { 406 my($This, %NamesAndValues) = @_; 407 my($Bond); 408 409 $Bond = new Bond(%NamesAndValues); 410 $This->AddBond($Bond); 411 412 return $Bond; 413 } 414 415 # Delete a bond... 416 sub DeleteBond { 417 my($This, $Bond) = @_; 418 419 if (!defined $Bond) { 420 carp "Warning: ${ClassName}->DeleteBond: No bond deleted: Bond must be specified..."; 421 return undef; 422 } 423 # Does the bond exist in molecule? 424 if (!$This->HasBond($Bond)) { 425 carp "Warning: ${ClassName}->DeleteBond: No bond deleted: Bond doesn't exist..."; 426 return undef; 427 } 428 return $This->_DeleteBond($Bond); 429 } 430 431 # Delete bond... 432 sub _DeleteBond { 433 my($This, $Bond) = @_; 434 435 my($Atom1, $Atom2, $AtomID1, $AtomID2); 436 ($Atom1, $Atom2) = $Bond->GetAtoms(); 437 $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID(); 438 $This->DeleteEdge($AtomID1, $AtomID2); 439 440 return $This; 441 } 442 443 # Delete bonds... 444 sub DeleteBonds { 445 my($This, @Bonds) = @_; 446 447 if (!@Bonds) { 448 carp "Warning: ${ClassName}->DeleteBonds: No bonds added: Bonds list must be specified..."; 449 return undef; 450 } 451 my($Bond); 452 for $Bond (@Bonds) { 453 $This->DeleteBond($Bond); 454 } 455 456 return $This; 457 } 458 459 # Has bond... 460 sub HasBond { 461 my($This, $Bond) = @_; 462 my($Atom1, $Atom2); 463 464 ($Atom1, $Atom2) = $Bond->GetAtoms(); 465 if (!($This->HasAtom($Atom1) && $This->HasAtom($Atom2))) { 466 return 0; 467 } 468 if (!$Bond->HasProperty('Molecule')) { 469 # It's not in molecule... 470 return 0; 471 } 472 my($AtomID1, $AtomID2, $Molecule); 473 $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID(); 474 $Molecule = $Bond->GetMolecule(); 475 476 return ($This->HasEdge($AtomID1, $AtomID2) && $This->GetID() == $Molecule->GetID()) ? 1 : 0; 477 } 478 479 # Get atom neighbors... 480 sub _GetAtomNeighbors { 481 my($This, $Atom) = @_; 482 483 my($AtomID, @Atoms, @AtomIDs); 484 485 @Atoms = (); @AtomIDs = (); 486 $AtomID = $Atom->GetID(); 487 @AtomIDs = $This->GetNeighbors($AtomID); 488 if (@AtomIDs) { 489 @Atoms = $This->GetVerticesProperty('Atom', @AtomIDs); 490 } 491 return wantarray ? @Atoms : scalar @Atoms; 492 } 493 494 # Get atom bonds... 495 sub _GetAtomBonds { 496 my($This, $Atom) = @_; 497 my($AtomID, @AtomIDs, @Bonds); 498 499 @Bonds = (); @AtomIDs = (); 500 $AtomID = $Atom->GetID(); 501 @AtomIDs = $This->GetEdges($AtomID); 502 if (@AtomIDs) { 503 @Bonds = $This->GetEdgesProperty('Bond', @AtomIDs); 504 } 505 return wantarray ? @Bonds : scalar @Bonds; 506 } 507 508 # Get bond to specified atom... 509 sub _GetBondToAtom { 510 my($This, $Atom1, $Atom2) = @_; 511 my($AtomID1, $AtomID2); 512 513 $AtomID1 = $Atom1->GetID(); 514 $AtomID2 = $Atom2->GetID(); 515 516 return $This->GetEdgeProperty('Bond', $AtomID1, $AtomID2); 517 } 518 519 # Add hydrogens to each atoms in molecule and return total number of hydrogens added... 520 sub AddHydrogens { 521 my($This) = @_; 522 523 return $This->_AddHydrogens(); 524 } 525 526 # Add hydrogens to polar atoms (N, O, P, S) in molecule and return total number of hydrogens added... 527 sub AddPolarHydrogens { 528 my($This) = @_; 529 my($PolarHydrogensOnly) = 1; 530 531 return $This->_AddHydrogens($PolarHydrogensOnly); 532 } 533 534 # Add all the hydrogens or hydrogens for polar atoms only... 535 sub _AddHydrogens { 536 my($This, $PolarHydrogensOnly) = @_; 537 my($Atom, $NumOfHydrogensAdded, $HydrogenPositionsWarning, @Atoms); 538 539 carp "Warning: ${ClassName}->_AddHydrogens: The current release of MayaChemTools doesn't assign any hydrogen positions..."; 540 541 if (! defined $PolarHydrogensOnly) { 542 $PolarHydrogensOnly = 0; 543 } 544 545 $NumOfHydrogensAdded = 0; 546 @Atoms = $This->GetAtoms(); 547 $HydrogenPositionsWarning = 0; 548 549 ATOM: for $Atom (@Atoms) { 550 if ($PolarHydrogensOnly) { 551 if (!$Atom->IsPolarAtom()) { 552 next ATOM; 553 } 554 } 555 $NumOfHydrogensAdded += $Atom->AddHydrogens($HydrogenPositionsWarning); 556 } 557 return $NumOfHydrogensAdded; 558 } 559 560 # Delete all hydrogens atoms in molecule and return total number of hydrogens removed... 561 sub DeleteHydrogens { 562 my($This) = @_; 563 564 return $This->_DeleteHydrogens(); 565 } 566 567 # Delete hydrogens to polar atoms (N, O, P, S) in molecule and return total number of hydrogens removed... 568 sub DeletePolarHydrogens { 569 my($This) = @_; 570 my($PolarHydrogensOnly) = 1; 571 572 return $This->_DeleteHydrogens($PolarHydrogensOnly); 573 } 574 575 # Delete all hydrogens atoms in molecule and return total number of hydrogens removed... 576 sub _DeleteHydrogens { 577 my($This, $PolarHydrogensOnly) = @_; 578 my($Atom, $NumOfHydrogensRemoved, @Atoms); 579 580 if (! defined $PolarHydrogensOnly) { 581 $PolarHydrogensOnly = 0; 582 } 583 584 $NumOfHydrogensRemoved = 0; 585 @Atoms = $This->GetAtoms(); 586 587 ATOM: for $Atom (@Atoms) { 588 if ($PolarHydrogensOnly) { 589 if (!$Atom->IsPolarHydrogen()) { 590 next ATOM; 591 } 592 } 593 elsif (!$Atom->IsHydrogen()) { 594 next ATOM; 595 } 596 $This->_DeleteAtom($Atom); 597 $NumOfHydrogensRemoved++; 598 } 599 return $NumOfHydrogensRemoved; 600 } 601 602 # Get molecular weight by summing up atomic weights of all the atoms... 603 sub GetMolecularWeight { 604 my($This) = @_; 605 my($MolecularWeight, $AtomicWeight, @Atoms, $Atom); 606 607 $MolecularWeight = 0; 608 @Atoms = $This->GetAtoms(); 609 for $Atom (@Atoms) { 610 $AtomicWeight = $Atom->GetAtomicWeight(); 611 if (defined $AtomicWeight) { 612 $MolecularWeight += $AtomicWeight; 613 } 614 } 615 616 return $MolecularWeight; 617 } 618 619 # Get exact mass by summing up exact masses of all the atoms... 620 sub GetExactMass { 621 my($This) = @_; 622 my($ExactMass, $AtomicMass, @Atoms, $Atom); 623 624 $ExactMass = 0; 625 @Atoms = $This->GetAtoms(); 626 for $Atom (@Atoms) { 627 $AtomicMass = $Atom->GetExactMass(); 628 if (defined $AtomicMass) { 629 $ExactMass += $AtomicMass; 630 } 631 } 632 633 return $ExactMass; 634 } 635 636 # Get net formal charge on the molecule using one of the following two methods: 637 # . Using explicitly set FormalCharge property 638 # . Adding up formal charge on each atom in the molecule 639 # 640 # Caveats: 641 # . FormalCharge is different from Charge property of the molecule which corresponds to 642 # sum of partial atomic charges explicitly set for each atom using a specific methodology. 643 # 644 sub GetFormalCharge { 645 my($This) = @_; 646 647 # Is FormalCharge property explicitly set? 648 if ($This->HasProperty('FormalCharge')) { 649 return $This->GetProperty('FormalCharge'); 650 } 651 my($FormalCharge, $AtomicFormalCharge, @Atoms, $Atom); 652 653 $FormalCharge = 0; 654 @Atoms = $This->GetAtoms(); 655 for $Atom (@Atoms) { 656 $AtomicFormalCharge = $Atom->GetFormalCharge(); 657 if (defined $AtomicFormalCharge) { 658 $FormalCharge += $AtomicFormalCharge; 659 } 660 } 661 return $FormalCharge; 662 } 663 664 # Get net charge on the molecule using one of the following two methods: 665 # . Using explicitly set Charge property 666 # . Adding up charge on each atom in the molecule 667 # 668 # Caveats: 669 # . FormalCharge is different from Charge property of the molecule which corresponds to 670 # sum of partial atomic charges explicitly set for each atom using a specific methodology. 671 # 672 sub GetCharge { 673 my($This) = @_; 674 675 # Is Charge property explicitly set? 676 if ($This->HasProperty('Charge')) { 677 return $This->GetProperty('Charge'); 678 } 679 my($Charge, $AtomicCharge, @Atoms, $Atom); 680 681 $Charge = 0; 682 @Atoms = $This->GetAtoms(); 683 for $Atom (@Atoms) { 684 $AtomicCharge = $Atom->GetCharge(); 685 if (defined $AtomicCharge) { 686 $Charge += $AtomicCharge; 687 } 688 } 689 return $Charge; 690 } 691 692 # Get net SpinMultiplicity on the molecule using one of the following two methods: 693 # . Using explicitly set SpinMultiplicity property 694 # . Adding up SpinMultiplicity on each atom in the molecule 695 # 696 # 697 sub GetSpinMultiplicity { 698 my($This) = @_; 699 700 # Is SpinMultiplicity property explicitly set? 701 if ($This->HasProperty('SpinMultiplicity')) { 702 return $This->GetProperty('SpinMultiplicity'); 703 } 704 my($AtomicSpinMultiplicity, $SpinMultiplicity, @Atoms, $Atom); 705 706 $SpinMultiplicity = 0; 707 @Atoms = $This->GetAtoms(); 708 for $Atom (@Atoms) { 709 $AtomicSpinMultiplicity = $Atom->GetSpinMultiplicity(); 710 if (defined $AtomicSpinMultiplicity) { 711 $SpinMultiplicity += $AtomicSpinMultiplicity; 712 } 713 } 714 return $SpinMultiplicity; 715 } 716 717 # Get molecular formula by collecting information about all atoms in the molecule and 718 # composing the formula using Hills ordering system: 719 # . C shows up first and H follows assuming C is present 720 # . All other standard elements are sorted alphanumerically 721 # . All other non-stanard atom symbols are also sorted alphanumerically and follow standard elements 722 # 723 # Caveats: 724 # . By default, missing hydrogens and nonelements are also included 725 # . Elements for disconnected fragments are combined into the same formula 726 # 727 sub GetMolecularFormula { 728 my($This, $IncludeMissingHydrogens, $IncludeNonElements) = @_; 729 my($MolecularFormula, $AtomSymbol, $ElementsCountRef, $NonElementsCountRef); 730 731 $IncludeMissingHydrogens = defined($IncludeMissingHydrogens) ? $IncludeMissingHydrogens : 1; 732 $IncludeNonElements = defined($IncludeNonElements) ? $IncludeNonElements : 1; 733 734 # Get elements count and setup molecular formula... 735 ($ElementsCountRef, $NonElementsCountRef) = $This->GetElementsAndNonElements($IncludeMissingHydrogens); 736 $MolecularFormula = ''; 737 738 # Count C and H first... 739 if (exists $ElementsCountRef->{C} ) { 740 $MolecularFormula .= 'C' . ($ElementsCountRef->{C} > 1 ? $ElementsCountRef->{C} : ''); 741 delete $ElementsCountRef->{C}; 742 743 if (exists $ElementsCountRef->{H} ) { 744 $MolecularFormula .= 'H' . ($ElementsCountRef->{H} > 1 ? $ElementsCountRef->{H} : ''); 745 delete $ElementsCountRef->{H}; 746 } 747 } 748 749 # Sort elements... 750 for $AtomSymbol (sort {$a cmp $b} keys %{$ElementsCountRef}) { 751 $MolecularFormula .= $AtomSymbol . ($ElementsCountRef->{$AtomSymbol} > 1 ? $ElementsCountRef->{$AtomSymbol} : ''); 752 } 753 if (!$IncludeNonElements) { 754 return $MolecularFormula; 755 } 756 757 # Sort non-elements... 758 for $AtomSymbol (sort {$a cmp $b} keys %{$NonElementsCountRef}) { 759 $MolecularFormula .= $AtomSymbol . ($NonElementsCountRef->{$AtomSymbol} > 1 ? $NonElementsCountRef->{$AtomSymbol} : ''); 760 } 761 762 # Check formal charge... 763 my($FormalCharge); 764 $FormalCharge = $This->GetFormalCharge(); 765 if ($FormalCharge) { 766 # Setup formal charge string... 767 my($FormalChargeString); 768 if ($FormalCharge == 1 ) { 769 $FormalChargeString = "+"; 770 } 771 elsif ($FormalCharge == -1 ) { 772 $FormalChargeString = "-"; 773 } 774 else { 775 $FormalChargeString = ($FormalCharge > 0) ? ("+" . abs($FormalCharge)) : ("-" . abs($FormalCharge)); 776 } 777 $MolecularFormula = "${MolecularFormula}${FormalChargeString}"; 778 } 779 780 return $MolecularFormula; 781 } 782 783 # Count elements and non-elements in molecule and return references to hashes 784 # containing count of elements and non-elements respectively. By default, missing 785 # hydrogens are not added to the element hash. 786 # 787 # 788 sub GetElementsAndNonElements { 789 my($This, $IncludeMissingHydrogens) = @_; 790 my($Atom, $AtomicNumber, $AtomSymbol, $NumOfMissingHydrogens, @Atoms, %ElementsCount, %NonElementsCount); 791 792 $IncludeMissingHydrogens = defined($IncludeMissingHydrogens) ? $IncludeMissingHydrogens : 0; 793 794 %ElementsCount = (); %NonElementsCount = (); 795 $NumOfMissingHydrogens = 0; 796 797 # Count elements and non elements... 798 @Atoms = $This->GetAtoms(); 799 for $Atom (@Atoms) { 800 $AtomicNumber = $Atom->GetAtomicNumber(); 801 $AtomSymbol = $Atom->GetAtomSymbol(); 802 if ($AtomicNumber) { 803 if (exists $ElementsCount{$AtomSymbol}) { 804 $ElementsCount{$AtomSymbol} += 1; 805 } 806 else { 807 $ElementsCount{$AtomSymbol} = 1; 808 } 809 if ($IncludeMissingHydrogens) { 810 if (!$Atom->IsHydrogen()) { 811 $NumOfMissingHydrogens += $Atom->GetNumOfMissingHydrogens(); 812 } 813 } 814 } 815 else { 816 if (exists $NonElementsCount{$AtomSymbol}) { 817 $NonElementsCount{$AtomSymbol} += 1; 818 } 819 else { 820 $NonElementsCount{$AtomSymbol} = 1; 821 } 822 } 823 } 824 if ($IncludeMissingHydrogens && $NumOfMissingHydrogens) { 825 $AtomSymbol = 'H'; 826 if (exists $ElementsCount{$AtomSymbol}) { 827 $ElementsCount{$AtomSymbol} += $NumOfMissingHydrogens; 828 } 829 else { 830 $ElementsCount{$AtomSymbol} = $NumOfMissingHydrogens; 831 } 832 } 833 834 return (\%ElementsCount, \%NonElementsCount); 835 } 836 837 # Calculate elemental composition and return reference to arrays 838 # containing elements and their percent composition. 839 # 840 # Caveats: 841 # . By default, missing hydrogens are included 842 # . Non elemnents are ignored 843 # . Mass number are ignored 844 # 845 sub GetElementalComposition { 846 my($This, $IncludeMissingHydrogens) = @_; 847 my($MolecularFormula, $IncludeNonElements, $ElementsCountRef, $NonElementsCountRef, $ElementsRef, $ElementsCompositionRef); 848 849 $IncludeMissingHydrogens = defined($IncludeMissingHydrogens) ? $IncludeMissingHydrogens : 1; 850 851 $IncludeNonElements = 0; 852 ($ElementsCountRef, $NonElementsCountRef) = $This->GetElementsAndNonElements($IncludeMissingHydrogens); 853 if (keys %{$NonElementsCountRef}) { 854 my($NonElementSymbols); 855 $NonElementSymbols = join ",", sort keys %{$NonElementsCountRef}; 856 carp "Warning: ${ClassName}->GetElementalComposition: Ignoring atoms with non-elemental atom symbol(s): $NonElementSymbols ..."; 857 } 858 859 $MolecularFormula = $This->GetMolecularFormula($IncludeMissingHydrogens, $IncludeNonElements); 860 861 ($ElementsRef, $ElementsCompositionRef) = MolecularFormula::CalculateElementalComposition($MolecularFormula); 862 863 return ($ElementsRef, $ElementsCompositionRef); 864 } 865 866 # Using refernece to element and its composition arrays, format composition information 867 # as: Element: Composition;... 868 # 869 sub FormatElementalCompositionInformation { 870 my($FirstParameter, $SecondParameter, $ThirdParameter, $FourthParameter) = @_; 871 my($This, $ElementsRef, $ElementCompositionRef, $Precision); 872 873 if (_IsMolecule($FirstParameter)) { 874 ($This, $ElementsRef, $ElementCompositionRef, $Precision) = ($FirstParameter, $SecondParameter, $ThirdParameter, $FourthParameter); 875 } 876 else { 877 ($ElementsRef, $ElementCompositionRef, $Precision) = ($FirstParameter, $SecondParameter, $ThirdParameter); 878 } 879 my($FormattedInfo) = ''; 880 881 if (!(defined($ElementsRef) && @{$ElementsRef})) { 882 carp "Warning: ${ClassName}->FormatElementalCompositionInformation: Elements list is not defined or empty..."; 883 return undef; 884 } 885 if (!(defined($ElementCompositionRef) && @{$ElementCompositionRef})) { 886 carp "Warning: ${ClassName}->FormatElementalCompositionInformation: Elements composition list is not defined or empty..."; 887 return undef; 888 } 889 890 if (!defined $Precision) { 891 $Precision = 2; 892 } 893 894 $FormattedInfo = MolecularFormula::FormatCompositionInfomation($ElementsRef, $ElementCompositionRef, $Precision); 895 896 return $FormattedInfo; 897 } 898 899 # Copy molecule and its associated data using Storable::dclone and update: 900 # 901 # o Atom references corresponding atoms and bonds objects 902 # o Bond object references 903 # 904 # Object IDs for Atoms and Bonds don't get changed. So there is no need to clear 905 # up any exisiting ring data attached to molecule via graph as vertex IDs. 906 # 907 sub Copy { 908 my($This) = @_; 909 my($NewMolecule, $Atom, $NewAtom, $AtomID, @Atoms, @AtomIDs, %AtomsIDsToNewAtoms); 910 911 $NewMolecule = Storable::dclone($This); 912 913 # Update atom references stored as vertex property... 914 915 @Atoms = (); @AtomIDs = (); 916 %AtomsIDsToNewAtoms = (); 917 918 @AtomIDs = $This->GetVertices(); 919 if (@AtomIDs) { 920 @Atoms = $This->GetVerticesProperty('Atom', @AtomIDs); 921 } 922 923 for $Atom (@Atoms) { 924 $AtomID = $Atom->GetID(); 925 926 # Setup a reference to copied atom object... 927 $NewAtom = $Atom->Copy(); 928 $AtomsIDsToNewAtoms{$AtomID} = $NewAtom; 929 930 # Update atom reference to new atom object... 931 $NewMolecule->UpdateVertexProperty('Atom', $NewAtom, $AtomID); 932 } 933 934 # Update bond object and bond atom references stored as edge property... 935 my($Index, $AtomID1, $AtomID2, $Bond, $NewBond, $NewAtom1, $NewAtom2, @EdgesAtomsIDs); 936 @EdgesAtomsIDs = (); 937 @EdgesAtomsIDs = $This->GetEdges(); 938 for ($Index = 0; $Index < $#EdgesAtomsIDs; $Index += 2) { 939 $AtomID1 = $EdgesAtomsIDs[$Index]; $AtomID2 = $EdgesAtomsIDs[$Index + 1]; 940 941 # Get reference to bond object... 942 $Bond = $This->GetEdgeProperty('Bond', $AtomID1, $AtomID2); 943 944 # Make a new bond object and update its atom references... 945 $NewBond = $Bond->Copy(); 946 $NewAtom1 = $AtomsIDsToNewAtoms{$AtomID1}; 947 $NewAtom2 = $AtomsIDsToNewAtoms{$AtomID2}; 948 $NewBond->SetAtoms($NewAtom1, $NewAtom2); 949 950 # Update bond object reference in the new molecule... 951 $NewMolecule->UpdateEdgeProperty('Bond', $NewBond, $AtomID1, $AtomID2); 952 } 953 954 return $NewMolecule; 955 } 956 957 # Get number of connected components... 958 # 959 sub GetNumOfConnectedComponents { 960 my($This) = @_; 961 my($NumOfComponents); 962 963 $NumOfComponents = $This->GetConnectedComponentsVertices(); 964 965 return $NumOfComponents; 966 } 967 968 # Return a reference to an array containing molecules corresponding 969 # to connected components sorted in decreasing order of component size... 970 # 971 sub GetConnectedComponents { 972 my($This) = @_; 973 my($Index, @ComponentMolecules, @ConnectedComponents); 974 975 @ConnectedComponents = (); 976 @ConnectedComponents = $This->GetConnectedComponentsVertices(); 977 @ComponentMolecules = (); 978 979 for $Index (0 .. $#ConnectedComponents) { 980 push @ComponentMolecules, $This->_GetConnectedComponent(\@ConnectedComponents, $Index); 981 } 982 return @ComponentMolecules; 983 } 984 985 # Return a reference to largest connected component as a molecule object... 986 # 987 sub GetLargestConnectedComponent { 988 my($This) = @_; 989 my($LargestComponentIndex, @ConnectedComponents); 990 991 $LargestComponentIndex = 0; 992 @ConnectedComponents = (); 993 @ConnectedComponents = $This->GetConnectedComponentsVertices(); 994 995 return $This->_GetConnectedComponent(\@ConnectedComponents, $LargestComponentIndex); 996 } 997 998 # Return connected component as a molecule... 999 # 1000 sub _GetConnectedComponent { 1001 my($This, $ConnectedComponentsRef, $ComponentIndex) = @_; 1002 my($ComponentMolecule); 1003 1004 # Copy existing molecule... 1005 $ComponentMolecule = $This->Copy(); 1006 1007 # Delete all atoms besides the ones in specified component... 1008 $ComponentMolecule->_DeleteConnectedComponents($ConnectedComponentsRef, $ComponentIndex); 1009 1010 # Clear any deteced rings... 1011 if ($ComponentMolecule->HasRings()) { 1012 $ComponentMolecule->ClearRings(); 1013 } 1014 return $ComponentMolecule; 1015 } 1016 1017 # Delete atoms corresponding to all connected components except the one specified... 1018 # 1019 sub _DeleteConnectedComponents { 1020 my($This, $ConnectedComponentsRef, $KeepComponentIndex) = @_; 1021 my($Index, $AtomID); 1022 1023 INDEX: for $Index (0 .. $#{$ConnectedComponentsRef}) { 1024 if ($Index == $KeepComponentIndex) { 1025 next INDEX; 1026 } 1027 for $AtomID (@{$ConnectedComponentsRef->[$Index]}) { 1028 $This->DeleteVertex($AtomID); 1029 } 1030 } 1031 return $This; 1032 } 1033 1034 # Return an array containing references to atom arrays corresponding to atoms of 1035 # connected components sorted in order of their decreasing size... 1036 # 1037 sub GetConnectedComponentsAtoms { 1038 my($This) = @_; 1039 my($Index, @ComponentsAtoms, @ConnectedComponents); 1040 1041 @ConnectedComponents = (); 1042 @ConnectedComponents = $This->GetConnectedComponentsVertices(); 1043 1044 @ComponentsAtoms = (); 1045 for $Index (0 .. $#ConnectedComponents) { 1046 my(@ComponentAtoms); 1047 1048 @ComponentAtoms = (); 1049 @ComponentAtoms = $This->_GetConnectedComponentAtoms(\@ConnectedComponents, $Index); 1050 push @ComponentsAtoms, \@ComponentAtoms; 1051 } 1052 return @ComponentsAtoms; 1053 } 1054 1055 # Return an array containing atoms correspondig to largest connected component... 1056 # 1057 sub GetLargestConnectedComponentAtoms { 1058 my($This) = @_; 1059 my($LargestComponentIndex, @ConnectedComponents); 1060 1061 $LargestComponentIndex = 0; 1062 @ConnectedComponents = (); 1063 @ConnectedComponents = $This->GetConnectedComponentsVertices(); 1064 1065 return $This->_GetConnectedComponentAtoms(\@ConnectedComponents, $LargestComponentIndex); 1066 } 1067 1068 # Return an array containing atoms corresponding to specified connected component... 1069 # 1070 sub _GetConnectedComponentAtoms { 1071 my($This, $ConnectedComponentsRef, $ComponentIndex) = @_; 1072 my($AtomID, @AtomIDs, @ComponentAtoms); 1073 1074 @ComponentAtoms = (); 1075 @AtomIDs = (); 1076 1077 for $AtomID (@{$ConnectedComponentsRef->[$ComponentIndex]}) { 1078 push @AtomIDs, $AtomID; 1079 } 1080 @ComponentAtoms = $This->_GetAtomsFromAtomIDs(@AtomIDs); 1081 1082 return @ComponentAtoms; 1083 } 1084 1085 # Except for the largest connected component, delete atoms corresponding to all other 1086 # connected components... 1087 # 1088 sub KeepLargestComponent { 1089 my($This) = @_; 1090 my($LargestComponentIndex, @ConnectedComponents); 1091 1092 @ConnectedComponents = (); 1093 @ConnectedComponents = $This->GetConnectedComponentsVertices(); 1094 if (@ConnectedComponents == 1) { 1095 return $This; 1096 } 1097 $LargestComponentIndex = 0; 1098 $This->_DeleteConnectedComponents(\@ConnectedComponents, $LargestComponentIndex); 1099 1100 # Clear any deteced rings... 1101 if ($This->HasRings()) { 1102 $This->ClearRings(); 1103 } 1104 1105 return $This; 1106 } 1107 1108 # Get an array of topologically sorted atoms starting from a specified atom or 1109 # an arbitrary atom in the molecule... 1110 # 1111 sub GetTopologicallySortedAtoms { 1112 my($This, $StartAtom) = @_; 1113 my(@SortedAtoms); 1114 1115 @SortedAtoms = (); 1116 if (defined($StartAtom) && !$This->HasAtom($StartAtom)) { 1117 carp "Warning: ${ClassName}->_GetTopologicallySortedAtoms: No atoms retrieved: Start atom doesn't exist..."; 1118 return @SortedAtoms; 1119 } 1120 my($StartAtomID, @AtomIDs); 1121 1122 @AtomIDs = (); 1123 $StartAtomID = defined($StartAtom) ? $StartAtom->GetID() : undef; 1124 1125 @AtomIDs = $This->GetTopologicallySortedVertices($StartAtomID); 1126 @SortedAtoms = $This->_GetAtomsFromAtomIDs(@AtomIDs); 1127 1128 return @SortedAtoms; 1129 } 1130 1131 # Detect rings in molecule... 1132 # 1133 sub DetectRings { 1134 my($This) = @_; 1135 1136 # Use graph method to detect all cycles and associate 'em to graph as graph 1137 # and vertex properties... 1138 $This->DetectCycles(); 1139 1140 return $This; 1141 } 1142 1143 # Clear rings in molecule... 1144 # 1145 sub ClearRings { 1146 my($This) = @_; 1147 1148 # Use graph method to clear all cycles... 1149 $This->ClearCycles(); 1150 1151 return $This; 1152 } 1153 1154 1155 # Setup rings type paths to use during all ring related methods. Possible values: 1156 # Independent or All. Default is to use Independent rings. 1157 # 1158 sub SetActiveRings { 1159 my($This, $RingsType) = @_; 1160 1161 if (!defined $This->SetActiveCyclicPaths(