MayaChemTools

   1 package Molecule;
   2 #
   3 # $RCSfile: Molecule.pm,v $
   4 # $Date: 2008/04/25 00:00:46 $
   5 # $Revision: 1.31 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 use 5.006;
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use Storable ();
  34 use ObjectProperty;
  35 use Graph;
  36 use Atom;
  37 use Bond;
  38 use MolecularFormula;
  39 use MathUtil;
  40 
  41 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  42 
  43 $VERSION = '1.00';
  44 @ISA = qw(Graph ObjectProperty Exporter);
  45 @EXPORT = qw(IsMolecule);
  46 @EXPORT_OK = qw(FormatElementalCompositionInformation);
  47 
  48 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  49 
  50 # Setup class variables...
  51 my($ClassName, $ObjectID);
  52 _InitializeClass();
  53 
  54 # Overload Perl functions...
  55 use overload '""' => 'StringifyMolecule';
  56 
  57 # Class constructor...
  58 sub new {
  59   my($Class, %NamesAndValues) = @_;
  60 
  61   # Initialize object...
  62   my $This = $Class->SUPER::new();
  63   bless $This, ref($Class) || $Class;
  64   $This->_InitializeMolecule();
  65 
  66   if (keys %NamesAndValues) { $This->_InitializeMoleculeProperties(%NamesAndValues); }
  67 
  68   return $This;
  69 }
  70 
  71 # Initialize object data...
  72 sub _InitializeMolecule {
  73   my($This) = @_;
  74   my($ObjectID) = _GetNewObjectID();
  75 
  76   # All other property names and values along with all Set/Get<PropertyName> methods
  77   # are implemented on-demand using ObjectProperty class.
  78   $This->{ID} = $ObjectID;
  79   $This->{Name} = "Molecule ${ObjectID}";
  80 }
  81 
  82 # Initialize molecule properties...
  83 sub _InitializeMoleculeProperties {
  84   my($This, %NamesAndValues) = @_;
  85 
  86   my($Name, $Value, $MethodName);
  87   while (($Name, $Value) = each  %NamesAndValues) {
  88     $MethodName = "Set${Name}";
  89     $This->$MethodName($Value);
  90   }
  91 }
  92 
  93 # Initialize class ...
  94 sub _InitializeClass {
  95   #Class name...
  96   $ClassName = __PACKAGE__;
  97 
  98   # ID to keep track of objects...
  99   $ObjectID = 0;
 100 }
 101 
 102 # Setup an explicit SetID method to block setting of ID by AUTOLOAD function...
 103 sub SetID {
 104   my($This, $Value) = @_;
 105 
 106   carp "Warning: ${ClassName}->SetID: Object ID can't be changed: it's used for internal tracking...";
 107 
 108   return $This;
 109 }
 110 
 111 # Add an atom...
 112 sub AddAtom {
 113   my($This, $Atom) = @_;
 114 
 115   if (!defined $Atom) {
 116     carp "Warning: ${ClassName}->AddAtom: No atom added: Atom must be specified...";
 117     return undef;
 118   }
 119   if ($This->HasAtom($Atom)) {
 120     carp "Warning: ${ClassName}->AddAtom: No atom added: Atom already exists...";
 121     return undef;
 122   }
 123   return $This->_AddAtom($Atom);
 124 }
 125 
 126 # Add an atom...
 127 sub _AddAtom {
 128   my($This, $Atom) = @_;
 129 
 130   # Assign atom to this molecule...
 131   $Atom->SetProperty('Molecule', $This);
 132 
 133   # Add it to the graph as a vertex...
 134   my($AtomID);
 135   $AtomID = $Atom->GetID();
 136   $This->AddVertex($AtomID);
 137   $This->SetVertexProperty('Atom', $Atom, $AtomID);
 138 
 139   return $This;
 140 }
 141 
 142 # Add atoms...
 143 sub AddAtoms {
 144   my($This, @Atoms) = @_;
 145 
 146   if (!@Atoms) {
 147     carp "Warning: ${ClassName}->AddAtoms: No atoms added: Atoms list must be specified...";
 148     return undef;
 149   }
 150   my($Atom);
 151   for $Atom (@Atoms) {
 152     $This->AddAtom($Atom);
 153   }
 154   return $This;
 155 }
 156 
 157 # Create an atom and add it to molecule...
 158 sub NewAtom {
 159   my($This, %NamesAndValues) = @_;
 160   my($Atom);
 161 
 162   $Atom = new Atom(%NamesAndValues);
 163   $This->AddAtom($Atom);
 164 
 165   return $Atom;
 166 }
 167 
 168 # Delete an atom...
 169 sub DeleteAtom {
 170   my($This, $Atom) = @_;
 171 
 172   if (!defined $Atom) {
 173     carp "Warning: ${ClassName}->DeleteAtom: No atom deleted: Atom must be specified...";
 174     return undef;
 175   }
 176   # Does the atom exist in  molecule?
 177   if (!$This->HasAtom($Atom)) {
 178     carp "Warning: ${ClassName}->DeleteAtom: No atom deleted: Atom doesn't exist...";
 179     return undef;
 180   }
 181   return $This->_DeleteAtom($Atom);
 182 }
 183 
 184 # Delete atom...
 185 sub _DeleteAtom {
 186   my($This, $Atom) = @_;
 187 
 188   my($AtomID);
 189   $AtomID = $Atom->GetID();
 190   $This->DeleteVertex($AtomID);
 191 
 192   return $This;
 193 }
 194 
 195 # Delete atoms...
 196 sub DeleteAtoms {
 197   my($This, @Atoms) = @_;
 198 
 199   if (!@Atoms) {
 200     carp "Warning: ${ClassName}->DeleteAtoms: No atoms added: Atoms list must be specified...";
 201     return undef;
 202   }
 203   my($Atom);
 204   for $Atom (@Atoms) {
 205     $This->DeleteAtom($Atom);
 206   }
 207 
 208   return $This;
 209 }
 210 
 211 # Is this atom present?
 212 sub HasAtom {
 213   my($This, $Atom) = @_;
 214 
 215   if (!defined $Atom) {
 216     return 0;
 217   }
 218   if (!$Atom->HasProperty('Molecule')) {
 219     # It's not in molecule...
 220     return 0;
 221   }
 222   my($AtomID);
 223   $AtomID = $Atom->GetID();
 224   if (!$This->HasVertex($AtomID)) {
 225     # It's not in molecule...
 226     return 0;
 227   }
 228   my($Molecule);
 229   $Molecule = $Atom->GetProperty('Molecule');
 230 
 231   return ($This->HasVertex($AtomID) && $This->GetID() == $Molecule->GetID()) ? 1 : 0;
 232 }
 233 
 234 # Return an array of atoms. In scalar context,  it returns number of atoms. Additionally,
 235 # atoms array can be filtered by any user specifiable Atom class method...
 236 #
 237 sub GetAtoms {
 238   my($This, $AtomCheckMethodName) = @_;
 239   my(@Atoms, @AtomIDs);
 240 
 241   @Atoms = (); @AtomIDs = ();
 242 
 243   @AtomIDs = $This->GetVertices();
 244   if (!@AtomIDs) {
 245     return wantarray ? @Atoms : scalar @Atoms;
 246   }
 247 
 248   @Atoms = $This->GetVerticesProperty('Atom', @AtomIDs);
 249 
 250   if (!defined $AtomCheckMethodName) {
 251     return wantarray ? @Atoms : scalar @Atoms;
 252   }
 253 
 254   # Filter out atoms...
 255   my($Atom, @FilteredAtoms);
 256   @FilteredAtoms = ();
 257   for $Atom (@Atoms) {
 258     if ($Atom->$AtomCheckMethodName($Atom)) {
 259       push @FilteredAtoms, $Atom;
 260     }
 261   }
 262   return wantarray ? @FilteredAtoms : scalar @FilteredAtoms;
 263 }
 264 
 265 # Return an array of bonds. In scalar context, it returns number of bonds...
 266 sub GetBonds {
 267   my($This) = @_;
 268   my(@Bonds, @EdgesAtomsIDs);
 269 
 270   @Bonds = (); @EdgesAtomsIDs = ();
 271 
 272   @EdgesAtomsIDs = $This->GetEdges();
 273   if (@EdgesAtomsIDs) {
 274     @Bonds = $This->GetEdgesProperty('Bond', @EdgesAtomsIDs);
 275   }
 276   return wantarray ? @Bonds : scalar @Bonds;
 277 }
 278 
 279 # Get number of atoms in molecule...
 280 sub GetNumOfAtoms {
 281   my($This) = @_;
 282   my($NumOfAtoms);
 283 
 284   $NumOfAtoms = $This->GetAtoms();
 285 
 286   return $NumOfAtoms;
 287 }
 288 
 289 # Get number of bonds in molecule...
 290 sub GetNumOfBonds {
 291   my($This) = @_;
 292   my($NumOfBonds);
 293 
 294   $NumOfBonds = $This->GetBonds();
 295 
 296   return $NumOfBonds;
 297 }
 298 
 299 # Get number of heavy atoms in molecule...
 300 sub GetNumOfHeavyAtoms {
 301   my($This) = @_;
 302 
 303   return $This->GetNumOfNonHydrogenAtoms();
 304 }
 305 
 306 # Get number of non-hydrogen atoms in molecule...
 307 sub GetNumOfNonHydrogenAtoms {
 308   my($This) = @_;
 309   my($NumOfNonHydrogenAtoms, $Atom, @Atoms);
 310 
 311   @Atoms = $This->GetAtoms();
 312   $NumOfNonHydrogenAtoms = 0;
 313   for $Atom (@Atoms) {
 314     if (!$Atom->IsHydrogen()) {
 315       $NumOfNonHydrogenAtoms++;
 316     }
 317   }
 318   return $NumOfNonHydrogenAtoms;
 319 }
 320 
 321 # Get number of hydrogen atoms in molecule...
 322 sub GetNumOfHydrogenAtoms {
 323   my($This) = @_;
 324   my($NumOfHydrogenAtoms, $Atom, @Atoms);
 325 
 326   @Atoms = $This->GetAtoms();
 327   $NumOfHydrogenAtoms = 0;
 328   for $Atom (@Atoms) {
 329     if ($Atom->IsHydrogen()) {
 330       $NumOfHydrogenAtoms++;
 331     }
 332   }
 333   return $NumOfHydrogenAtoms;
 334 }
 335 
 336 # Get number of missing hydrogen atoms in molecule...
 337 sub GetNumOfMissingHydrogenAtoms {
 338   my($This) = @_;
 339   my($NumOfMissingHydrogenAtoms, $Atom, @Atoms);
 340 
 341   @Atoms = $This->GetAtoms();
 342   $NumOfMissingHydrogenAtoms = 0;
 343   for $Atom (@Atoms) {
 344     if (!$Atom->IsHydrogen()) {
 345       $NumOfMissingHydrogenAtoms += $Atom->GetNumOfMissingHydrogens();
 346     }
 347   }
 348   return $NumOfMissingHydrogenAtoms;
 349 }
 350 
 351 # Add bond...
 352 sub AddBond {
 353   my($This, $Bond) = @_;
 354   my($Atom1, $Atom2);
 355 
 356   ($Atom1, $Atom2) = $Bond->GetAtoms();
 357   if (!(defined($Atom1) && defined($Atom2))) {
 358     carp "Warning: ${ClassName}->AddBond: No bond added: Both atoms must be specified...";
 359     return undef;
 360   }
 361   if (!($This->HasAtom($Atom1) && $This->HasAtom($Atom2))) {
 362     carp "Warning: ${ClassName}->AddBond: No bond added: Both atoms must be present...";
 363     return undef;
 364   }
 365   if ($This->HasBond($Bond)) {
 366     carp "Warning: ${ClassName}->AddBond: No bond added: Bond already exists...";
 367     return undef;
 368   }
 369   return $This->_AddBond($Bond);
 370 }
 371 
 372 # Add bond...
 373 sub _AddBond {
 374   my($This, $Bond) = @_;
 375 
 376   # Assign atom to this molecule...
 377   $Bond->SetProperty('Molecule', $This);
 378 
 379   # Add it to the graph as an edge...
 380   my($Atom1, $Atom2, $AtomID1, $AtomID2);
 381   ($Atom1, $Atom2) = $Bond->GetAtoms();
 382   $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID();
 383   $This->AddEdge($AtomID1, $AtomID2);
 384   $This->SetEdgeProperty('Bond', $Bond, $AtomID1, $AtomID2);
 385 
 386   return $This;
 387 }
 388 
 389 # Add Bonds...
 390 sub AddBonds {
 391   my($This, @Bonds) = @_;
 392 
 393   if (!@Bonds) {
 394     carp "Warning: ${ClassName}->AddBonds: No bonds added: Bonds list must be specified...";
 395     return undef;
 396   }
 397   my($Bond);
 398   for $Bond (@Bonds) {
 399     $This->AddBond($Bond);
 400   }
 401   return $This;
 402 }
 403 
 404 # Create a bond and add it to molecule...
 405 sub NewBond {
 406   my($This, %NamesAndValues) = @_;
 407   my($Bond);
 408 
 409   $Bond = new Bond(%NamesAndValues);
 410   $This->AddBond($Bond);
 411 
 412   return $Bond;
 413 }
 414 
 415 # Delete a bond...
 416 sub DeleteBond {
 417   my($This, $Bond) = @_;
 418 
 419   if (!defined $Bond) {
 420     carp "Warning: ${ClassName}->DeleteBond: No bond deleted: Bond must be specified...";
 421     return undef;
 422   }
 423   # Does the bond exist in molecule?
 424   if (!$This->HasBond($Bond)) {
 425     carp "Warning: ${ClassName}->DeleteBond: No bond deleted: Bond doesn't exist...";
 426     return undef;
 427   }
 428   return $This->_DeleteBond($Bond);
 429 }
 430 
 431 # Delete bond...
 432 sub _DeleteBond {
 433   my($This, $Bond) = @_;
 434 
 435   my($Atom1, $Atom2, $AtomID1, $AtomID2);
 436   ($Atom1, $Atom2) = $Bond->GetAtoms();
 437   $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID();
 438   $This->DeleteEdge($AtomID1, $AtomID2);
 439 
 440   return $This;
 441 }
 442 
 443 # Delete bonds...
 444 sub DeleteBonds {
 445   my($This, @Bonds) = @_;
 446 
 447   if (!@Bonds) {
 448     carp "Warning: ${ClassName}->DeleteBonds: No bonds added: Bonds list must be specified...";
 449     return undef;
 450   }
 451   my($Bond);
 452   for $Bond (@Bonds) {
 453     $This->DeleteBond($Bond);
 454   }
 455 
 456   return $This;
 457 }
 458 
 459 # Has bond...
 460 sub HasBond {
 461   my($This, $Bond) = @_;
 462   my($Atom1, $Atom2);
 463 
 464   ($Atom1, $Atom2) = $Bond->GetAtoms();
 465   if (!($This->HasAtom($Atom1) && $This->HasAtom($Atom2))) {
 466     return 0;
 467   }
 468   if (!$Bond->HasProperty('Molecule')) {
 469     # It's not in molecule...
 470     return 0;
 471   }
 472   my($AtomID1, $AtomID2, $Molecule);
 473   $AtomID1 = $Atom1->GetID(); $AtomID2 = $Atom2->GetID();
 474   $Molecule = $Bond->GetMolecule();
 475 
 476   return ($This->HasEdge($AtomID1, $AtomID2) && $This->GetID() == $Molecule->GetID()) ? 1 : 0;
 477 }
 478 
 479 # Get atom neighbors...
 480 sub _GetAtomNeighbors {
 481   my($This, $Atom) = @_;
 482 
 483   my($AtomID, @Atoms, @AtomIDs);
 484 
 485   @Atoms = (); @AtomIDs = ();
 486   $AtomID = $Atom->GetID();
 487   @AtomIDs = $This->GetNeighbors($AtomID);
 488   if (@AtomIDs) {
 489     @Atoms = $This->GetVerticesProperty('Atom', @AtomIDs);
 490   }
 491   return wantarray ? @Atoms : scalar @Atoms;
 492 }
 493 
 494 # Get atom bonds...
 495 sub _GetAtomBonds {
 496   my($This, $Atom) = @_;
 497   my($AtomID, @AtomIDs, @Bonds);
 498 
 499   @Bonds = (); @AtomIDs = ();
 500   $AtomID = $Atom->GetID();
 501   @AtomIDs = $This->GetEdges($AtomID);
 502   if (@AtomIDs) {
 503     @Bonds = $This->GetEdgesProperty('Bond', @AtomIDs);
 504   }
 505   return wantarray ? @Bonds : scalar @Bonds;
 506 }
 507 
 508 # Get bond to specified atom...
 509 sub _GetBondToAtom {
 510   my($This, $Atom1, $Atom2) = @_;
 511   my($AtomID1, $AtomID2);
 512 
 513   $AtomID1 = $Atom1->GetID();
 514   $AtomID2 = $Atom2->GetID();
 515 
 516   return $This->GetEdgeProperty('Bond', $AtomID1, $AtomID2);
 517 }
 518 
 519 # Add hydrogens to each atoms in molecule and return total number of hydrogens added...
 520 sub AddHydrogens {
 521   my($This) = @_;
 522 
 523   return $This->_AddHydrogens();
 524 }
 525 
 526 # Add hydrogens to polar atoms (N, O, P, S) in molecule and return total number of hydrogens added...
 527 sub AddPolarHydrogens {
 528   my($This) = @_;
 529   my($PolarHydrogensOnly) = 1;
 530 
 531   return $This->_AddHydrogens($PolarHydrogensOnly);
 532 }
 533 
 534 # Add all the hydrogens or hydrogens for polar atoms only...
 535 sub _AddHydrogens {
 536   my($This, $PolarHydrogensOnly) = @_;
 537   my($Atom, $NumOfHydrogensAdded, $HydrogenPositionsWarning, @Atoms);
 538 
 539   carp "Warning: ${ClassName}->_AddHydrogens: The current release of MayaChemTools doesn't assign any hydrogen positions...";
 540 
 541   if (! defined $PolarHydrogensOnly) {
 542     $PolarHydrogensOnly = 0;
 543   }
 544 
 545   $NumOfHydrogensAdded = 0;
 546   @Atoms = $This->GetAtoms();
 547   $HydrogenPositionsWarning = 0;
 548 
 549   ATOM: for $Atom (@Atoms) {
 550     if ($PolarHydrogensOnly) {
 551       if (!$Atom->IsPolarAtom()) {
 552 	next ATOM;
 553       }
 554     }
 555     $NumOfHydrogensAdded += $Atom->AddHydrogens($HydrogenPositionsWarning);
 556   }
 557   return $NumOfHydrogensAdded;
 558 }
 559 
 560 # Delete all hydrogens atoms in molecule and return total number of hydrogens removed...
 561 sub DeleteHydrogens {
 562   my($This) = @_;
 563 
 564   return $This->_DeleteHydrogens();
 565 }
 566 
 567 # Delete hydrogens to polar atoms (N, O, P, S) in molecule and return total number of hydrogens removed...
 568 sub DeletePolarHydrogens {
 569   my($This) = @_;
 570   my($PolarHydrogensOnly) = 1;
 571 
 572   return $This->_DeleteHydrogens($PolarHydrogensOnly);
 573 }
 574 
 575 # Delete all hydrogens atoms in molecule and return total number of hydrogens removed...
 576 sub _DeleteHydrogens {
 577   my($This, $PolarHydrogensOnly) = @_;
 578   my($Atom, $NumOfHydrogensRemoved, @Atoms);
 579 
 580   if (! defined $PolarHydrogensOnly) {
 581     $PolarHydrogensOnly = 0;
 582   }
 583 
 584   $NumOfHydrogensRemoved = 0;
 585   @Atoms = $This->GetAtoms();
 586 
 587   ATOM: for $Atom (@Atoms) {
 588     if ($PolarHydrogensOnly) {
 589       if (!$Atom->IsPolarHydrogen()) {
 590 	next ATOM;
 591       }
 592     }
 593     elsif (!$Atom->IsHydrogen()) {
 594       next ATOM;
 595     }
 596     $This->_DeleteAtom($Atom);
 597     $NumOfHydrogensRemoved++;
 598   }
 599   return $NumOfHydrogensRemoved;
 600 }
 601 
 602 # Get molecular weight by summing up atomic weights of all the atoms...
 603 sub GetMolecularWeight {
 604   my($This) = @_;
 605   my($MolecularWeight, $AtomicWeight, @Atoms, $Atom);
 606 
 607   $MolecularWeight = 0;
 608   @Atoms = $This->GetAtoms();
 609   for $Atom (@Atoms) {
 610     $AtomicWeight = $Atom->GetAtomicWeight();
 611     if (defined $AtomicWeight) {
 612       $MolecularWeight += $AtomicWeight;
 613     }
 614   }
 615 
 616   return $MolecularWeight;
 617 }
 618 
 619 # Get exact mass by summing up exact masses of all the atoms...
 620 sub GetExactMass {
 621   my($This) = @_;
 622   my($ExactMass, $AtomicMass, @Atoms, $Atom);
 623 
 624   $ExactMass = 0;
 625   @Atoms = $This->GetAtoms();
 626   for $Atom (@Atoms) {
 627     $AtomicMass = $Atom->GetExactMass();
 628     if (defined $AtomicMass) {
 629       $ExactMass += $AtomicMass;
 630     }
 631   }
 632 
 633   return $ExactMass;
 634 }
 635 
 636 # Get net formal charge on the molecule using one of the following two methods:
 637 #   . Using explicitly set FormalCharge property
 638 #   . Adding up formal charge on each atom in the molecule
 639 #
 640 # Caveats:
 641 #   . FormalCharge is different from Charge property of the molecule which corresponds to
 642 #     sum of partial atomic charges explicitly set for each atom using a specific methodology.
 643 #
 644 sub GetFormalCharge {
 645   my($This) = @_;
 646 
 647   # Is FormalCharge property explicitly set?
 648   if ($This->HasProperty('FormalCharge')) {
 649     return $This->GetProperty('FormalCharge');
 650   }
 651   my($FormalCharge, $AtomicFormalCharge, @Atoms, $Atom);
 652 
 653   $FormalCharge = 0;
 654   @Atoms = $This->GetAtoms();
 655   for $Atom (@Atoms) {
 656     $AtomicFormalCharge = $Atom->GetFormalCharge();
 657     if (defined $AtomicFormalCharge) {
 658       $FormalCharge += $AtomicFormalCharge;
 659     }
 660   }
 661   return $FormalCharge;
 662 }
 663 
 664 # Get net charge on the molecule using one of the following two methods:
 665 #   . Using explicitly set Charge property
 666 #   . Adding up charge on each atom in the molecule
 667 #
 668 # Caveats:
 669 #   . FormalCharge is different from Charge property of the molecule which corresponds to
 670 #     sum of partial atomic charges explicitly set for each atom using a specific methodology.
 671 #
 672 sub GetCharge {
 673   my($This) = @_;
 674 
 675   # Is Charge property explicitly set?
 676   if ($This->HasProperty('Charge')) {
 677     return $This->GetProperty('Charge');
 678   }
 679   my($Charge, $AtomicCharge, @Atoms, $Atom);
 680 
 681   $Charge = 0;
 682   @Atoms = $This->GetAtoms();
 683   for $Atom (@Atoms) {
 684     $AtomicCharge = $Atom->GetCharge();
 685     if (defined $AtomicCharge) {
 686       $Charge += $AtomicCharge;
 687     }
 688   }
 689   return $Charge;
 690 }
 691 
 692 # Get net SpinMultiplicity on the molecule using one of the following two methods:
 693 #   . Using explicitly set SpinMultiplicity property
 694 #   . Adding up SpinMultiplicity on each atom in the molecule
 695 #
 696 #
 697 sub GetSpinMultiplicity {
 698   my($This) = @_;
 699 
 700   # Is SpinMultiplicity property explicitly set?
 701   if ($This->HasProperty('SpinMultiplicity')) {
 702     return $This->GetProperty('SpinMultiplicity');
 703   }
 704   my($AtomicSpinMultiplicity, $SpinMultiplicity, @Atoms, $Atom);
 705 
 706   $SpinMultiplicity = 0;
 707   @Atoms = $This->GetAtoms();
 708   for $Atom (@Atoms) {
 709     $AtomicSpinMultiplicity = $Atom->GetSpinMultiplicity();
 710     if (defined $AtomicSpinMultiplicity) {
 711       $SpinMultiplicity += $AtomicSpinMultiplicity;
 712     }
 713   }
 714   return $SpinMultiplicity;
 715 }
 716 
 717 # Get molecular formula by collecting information about all atoms in the molecule and
 718 # composing the formula using Hills ordering system:
 719 #   . C shows up first and H follows assuming C is present
 720 #   . All other standard elements are sorted alphanumerically
 721 #   . All other non-stanard atom symbols are also sorted alphanumerically and follow standard elements
 722 #
 723 # Caveats:
 724 #   . By default, missing hydrogens and nonelements are also included
 725 #   . Elements for disconnected fragments are combined into the same formula
 726 #
 727 sub GetMolecularFormula {
 728   my($This, $IncludeMissingHydrogens, $IncludeNonElements) = @_;
 729   my($MolecularFormula, $AtomSymbol, $ElementsCountRef, $NonElementsCountRef);
 730 
 731   $IncludeMissingHydrogens = defined($IncludeMissingHydrogens) ? $IncludeMissingHydrogens : 1;
 732   $IncludeNonElements = defined($IncludeNonElements) ? $IncludeNonElements : 1;
 733 
 734   # Get elements count and setup molecular formula...
 735   ($ElementsCountRef, $NonElementsCountRef) = $This->GetElementsAndNonElements($IncludeMissingHydrogens);
 736   $MolecularFormula = '';
 737 
 738   # Count C and H first...
 739   if (exists $ElementsCountRef->{C} ) {
 740     $MolecularFormula .= 'C' . ($ElementsCountRef->{C} > 1 ? $ElementsCountRef->{C} : '');
 741     delete $ElementsCountRef->{C};
 742 
 743     if (exists $ElementsCountRef->{H} ) {
 744       $MolecularFormula .= 'H' . ($ElementsCountRef->{H} > 1 ? $ElementsCountRef->{H} : '');
 745       delete $ElementsCountRef->{H};
 746     }
 747   }
 748 
 749   # Sort elements...
 750   for $AtomSymbol (sort {$a cmp $b} keys %{$ElementsCountRef}) {
 751     $MolecularFormula .= $AtomSymbol . ($ElementsCountRef->{$AtomSymbol} > 1 ? $ElementsCountRef->{$AtomSymbol} : '');
 752   }
 753   if (!$IncludeNonElements) {
 754     return $MolecularFormula;
 755   }
 756 
 757   # Sort non-elements...
 758   for $AtomSymbol (sort {$a cmp $b} keys %{$NonElementsCountRef}) {
 759     $MolecularFormula .= $AtomSymbol . ($NonElementsCountRef->{$AtomSymbol} > 1 ? $NonElementsCountRef->{$AtomSymbol} : '');
 760   }
 761 
 762   # Check formal charge...
 763   my($FormalCharge);
 764   $FormalCharge = $This->GetFormalCharge();
 765   if ($FormalCharge) {
 766     # Setup formal charge string...
 767     my($FormalChargeString);
 768     if ($FormalCharge == 1 ) {
 769       $FormalChargeString =  "+";
 770     }
 771     elsif ($FormalCharge == -1 ) {
 772       $FormalChargeString =  "-";
 773     }
 774     else {
 775       $FormalChargeString = ($FormalCharge > 0) ? ("+" . abs($FormalCharge)) : ("-" . abs($FormalCharge));
 776     }
 777     $MolecularFormula = "${MolecularFormula}${FormalChargeString}";
 778   }
 779 
 780   return $MolecularFormula;
 781 }
 782 
 783 # Count elements and non-elements in molecule and return references to hashes
 784 # containing count of elements and non-elements respectively. By default, missing
 785 # hydrogens are not added to the element hash.
 786 #
 787 #
 788 sub GetElementsAndNonElements {
 789   my($This, $IncludeMissingHydrogens) = @_;
 790   my($Atom, $AtomicNumber, $AtomSymbol, $NumOfMissingHydrogens, @Atoms, %ElementsCount, %NonElementsCount);
 791 
 792   $IncludeMissingHydrogens = defined($IncludeMissingHydrogens) ? $IncludeMissingHydrogens : 0;
 793 
 794   %ElementsCount = (); %NonElementsCount = ();
 795   $NumOfMissingHydrogens = 0;
 796 
 797   # Count elements and non elements...
 798   @Atoms = $This->GetAtoms();
 799   for $Atom (@Atoms) {
 800     $AtomicNumber = $Atom->GetAtomicNumber();
 801     $AtomSymbol = $Atom->GetAtomSymbol();
 802     if ($AtomicNumber) {
 803       if (exists $ElementsCount{$AtomSymbol}) {
 804 	$ElementsCount{$AtomSymbol} += 1;
 805       }
 806       else {
 807 	$ElementsCount{$AtomSymbol} = 1;
 808       }
 809       if ($IncludeMissingHydrogens) {
 810 	if (!$Atom->IsHydrogen()) {
 811 	  $NumOfMissingHydrogens += $Atom->GetNumOfMissingHydrogens();
 812 	}
 813       }
 814     }
 815     else {
 816       if (exists $NonElementsCount{$AtomSymbol}) {
 817 	$NonElementsCount{$AtomSymbol} += 1;
 818       }
 819       else {
 820 	$NonElementsCount{$AtomSymbol} = 1;
 821       }
 822     }
 823   }
 824   if ($IncludeMissingHydrogens && $NumOfMissingHydrogens) {
 825     $AtomSymbol = 'H';
 826     if (exists $ElementsCount{$AtomSymbol}) {
 827       $ElementsCount{$AtomSymbol} += $NumOfMissingHydrogens;
 828     }
 829     else {
 830       $ElementsCount{$AtomSymbol} = $NumOfMissingHydrogens;
 831     }
 832   }
 833 
 834   return (\%ElementsCount, \%NonElementsCount);
 835 }
 836 
 837 # Calculate elemental composition and return reference to arrays
 838 # containing elements and their percent composition.
 839 #
 840 # Caveats:
 841 #   . By default, missing hydrogens are included
 842 #   . Non elemnents are ignored
 843 #   . Mass number are ignored
 844 #
 845 sub GetElementalComposition {
 846   my($This, $IncludeMissingHydrogens) = @_;
 847   my($MolecularFormula, $IncludeNonElements, $ElementsCountRef, $NonElementsCountRef, $ElementsRef, $ElementsCompositionRef);
 848 
 849   $IncludeMissingHydrogens = defined($IncludeMissingHydrogens) ? $IncludeMissingHydrogens : 1;
 850 
 851   $IncludeNonElements = 0;
 852   ($ElementsCountRef, $NonElementsCountRef) = $This->GetElementsAndNonElements($IncludeMissingHydrogens);
 853   if (keys %{$NonElementsCountRef}) {
 854     my($NonElementSymbols);
 855     $NonElementSymbols = join ",", sort keys %{$NonElementsCountRef};
 856     carp "Warning: ${ClassName}->GetElementalComposition: Ignoring atoms with non-elemental atom symbol(s): $NonElementSymbols ...";
 857   }
 858 
 859   $MolecularFormula = $This->GetMolecularFormula($IncludeMissingHydrogens, $IncludeNonElements);
 860 
 861   ($ElementsRef, $ElementsCompositionRef) = MolecularFormula::CalculateElementalComposition($MolecularFormula);
 862 
 863   return ($ElementsRef, $ElementsCompositionRef);
 864 }
 865 
 866 # Using refernece to element and its composition arrays, format composition information
 867 # as: Element: Composition;...
 868 #
 869 sub FormatElementalCompositionInformation {
 870   my($FirstParameter, $SecondParameter, $ThirdParameter, $FourthParameter) = @_;
 871   my($This, $ElementsRef, $ElementCompositionRef, $Precision);
 872 
 873   if (_IsMolecule($FirstParameter)) {
 874     ($This, $ElementsRef, $ElementCompositionRef, $Precision) = ($FirstParameter, $SecondParameter, $ThirdParameter, $FourthParameter);
 875   }
 876   else {
 877     ($ElementsRef, $ElementCompositionRef, $Precision) = ($FirstParameter, $SecondParameter, $ThirdParameter);
 878   }
 879   my($FormattedInfo) = '';
 880 
 881   if (!(defined($ElementsRef) && @{$ElementsRef})) {
 882     carp "Warning: ${ClassName}->FormatElementalCompositionInformation: Elements list is not defined or empty...";
 883     return undef;
 884   }
 885   if (!(defined($ElementCompositionRef) && @{$ElementCompositionRef})) {
 886     carp "Warning: ${ClassName}->FormatElementalCompositionInformation: Elements composition list is not defined or empty...";
 887     return undef;
 888   }
 889 
 890   if (!defined $Precision) {
 891     $Precision = 2;
 892   }
 893 
 894   $FormattedInfo = MolecularFormula::FormatCompositionInfomation($ElementsRef, $ElementCompositionRef, $Precision);
 895 
 896   return $FormattedInfo;
 897 }
 898 
 899 # Copy molecule and its associated data using Storable::dclone and update:
 900 #
 901 #   o Atom references corresponding atoms and bonds objects
 902 #   o Bond object references
 903 #
 904 # Object IDs for Atoms and Bonds don't get changed. So there is no need to clear
 905 # up any exisiting ring data attached to molecule via graph as vertex IDs.
 906 #
 907 sub Copy {
 908   my($This) = @_;
 909   my($NewMolecule, $Atom, $NewAtom, $AtomID, @Atoms, @AtomIDs, %AtomsIDsToNewAtoms);
 910 
 911   $NewMolecule = Storable::dclone($This);
 912 
 913   # Update atom references stored as vertex property...
 914 
 915   @Atoms = (); @AtomIDs = ();
 916   %AtomsIDsToNewAtoms = ();
 917 
 918   @AtomIDs = $This->GetVertices();
 919   if (@AtomIDs) {
 920     @Atoms = $This->GetVerticesProperty('Atom', @AtomIDs);
 921   }
 922 
 923   for $Atom (@Atoms) {
 924     $AtomID = $Atom->GetID();
 925 
 926     # Setup a reference to copied atom object...
 927     $NewAtom = $Atom->Copy();
 928     $AtomsIDsToNewAtoms{$AtomID} = $NewAtom;
 929 
 930     # Update atom reference to new atom object...
 931     $NewMolecule->UpdateVertexProperty('Atom', $NewAtom, $AtomID);
 932   }
 933 
 934   # Update bond object and bond atom references stored as edge property...
 935   my($Index, $AtomID1, $AtomID2, $Bond, $NewBond, $NewAtom1, $NewAtom2, @EdgesAtomsIDs);
 936   @EdgesAtomsIDs = ();
 937   @EdgesAtomsIDs = $This->GetEdges();
 938   for ($Index = 0; $Index < $#EdgesAtomsIDs; $Index += 2) {
 939     $AtomID1 = $EdgesAtomsIDs[$Index]; $AtomID2 = $EdgesAtomsIDs[$Index + 1];
 940 
 941     # Get reference to bond object...
 942     $Bond = $This->GetEdgeProperty('Bond', $AtomID1, $AtomID2);
 943 
 944     # Make a new bond object and update its atom references...
 945     $NewBond = $Bond->Copy();
 946     $NewAtom1 = $AtomsIDsToNewAtoms{$AtomID1};
 947     $NewAtom2 = $AtomsIDsToNewAtoms{$AtomID2};
 948     $NewBond->SetAtoms($NewAtom1, $NewAtom2);
 949 
 950     # Update bond object reference in the new molecule...
 951     $NewMolecule->UpdateEdgeProperty('Bond', $NewBond, $AtomID1, $AtomID2);
 952   }
 953 
 954   return $NewMolecule;
 955 }
 956 
 957 # Get number of connected components...
 958 #
 959 sub GetNumOfConnectedComponents {
 960   my($This) = @_;
 961   my($NumOfComponents);
 962 
 963   $NumOfComponents = $This->GetConnectedComponentsVertices();
 964 
 965   return $NumOfComponents;
 966 }
 967 
 968 # Return a reference to an array containing molecules corresponding
 969 # to connected components sorted in decreasing order of component size...
 970 #
 971 sub GetConnectedComponents {
 972   my($This) = @_;
 973   my($Index, @ComponentMolecules, @ConnectedComponents);
 974 
 975   @ConnectedComponents = ();
 976   @ConnectedComponents = $This->GetConnectedComponentsVertices();
 977   @ComponentMolecules = ();
 978 
 979   for $Index (0 .. $#ConnectedComponents) {
 980     push @ComponentMolecules, $This->_GetConnectedComponent(\@ConnectedComponents, $Index);
 981   }
 982   return @ComponentMolecules;
 983 }
 984 
 985 # Return a reference to largest connected component as a molecule object...
 986 #
 987 sub GetLargestConnectedComponent {
 988   my($This) = @_;
 989   my($LargestComponentIndex, @ConnectedComponents);
 990 
 991   $LargestComponentIndex = 0;
 992   @ConnectedComponents = ();
 993   @ConnectedComponents = $This->GetConnectedComponentsVertices();
 994 
 995   return $This->_GetConnectedComponent(\@ConnectedComponents, $LargestComponentIndex);
 996 }
 997 
 998 # Return connected component as a molecule...
 999 #
1000 sub _GetConnectedComponent {
1001   my($This, $ConnectedComponentsRef, $ComponentIndex) = @_;
1002   my($ComponentMolecule);
1003 
1004   # Copy existing molecule...
1005   $ComponentMolecule = $This->Copy();
1006 
1007   # Delete all atoms besides the ones in specified component...
1008   $ComponentMolecule->_DeleteConnectedComponents($ConnectedComponentsRef, $ComponentIndex);
1009 
1010   # Clear any deteced rings...
1011   if ($ComponentMolecule->HasRings()) {
1012     $ComponentMolecule->ClearRings();
1013   }
1014   return $ComponentMolecule;
1015 }
1016 
1017 # Delete atoms corresponding to all connected components except the one specified...
1018 #
1019 sub _DeleteConnectedComponents {
1020   my($This, $ConnectedComponentsRef, $KeepComponentIndex) = @_;
1021   my($Index, $AtomID);
1022 
1023   INDEX: for $Index (0 .. $#{$ConnectedComponentsRef}) {
1024     if ($Index == $KeepComponentIndex) {
1025       next INDEX;
1026     }
1027     for $AtomID (@{$ConnectedComponentsRef->[$Index]}) {
1028       $This->DeleteVertex($AtomID);
1029     }
1030   }
1031   return $This;
1032 }
1033 
1034 # Return an array containing references to atom arrays corresponding to atoms of
1035 # connected components sorted in order of their decreasing size...
1036 #
1037 sub GetConnectedComponentsAtoms {
1038   my($This) = @_;
1039   my($Index, @ComponentsAtoms, @ConnectedComponents);
1040 
1041   @ConnectedComponents = ();
1042   @ConnectedComponents = $This->GetConnectedComponentsVertices();
1043 
1044   @ComponentsAtoms = ();
1045   for $Index (0 .. $#ConnectedComponents) {
1046     my(@ComponentAtoms);
1047 
1048     @ComponentAtoms = ();
1049     @ComponentAtoms = $This->_GetConnectedComponentAtoms(\@ConnectedComponents, $Index);
1050     push @ComponentsAtoms, \@ComponentAtoms;
1051   }
1052   return @ComponentsAtoms;
1053 }
1054 
1055 # Return an array containing atoms correspondig to largest connected component...
1056 #
1057 sub GetLargestConnectedComponentAtoms {
1058   my($This) = @_;
1059   my($LargestComponentIndex, @ConnectedComponents);
1060 
1061   $LargestComponentIndex = 0;
1062   @ConnectedComponents = ();
1063   @ConnectedComponents = $This->GetConnectedComponentsVertices();
1064 
1065   return $This->_GetConnectedComponentAtoms(\@ConnectedComponents, $LargestComponentIndex);
1066 }
1067 
1068 # Return an array containing atoms corresponding to specified connected component...
1069 #
1070 sub _GetConnectedComponentAtoms {
1071   my($This, $ConnectedComponentsRef, $ComponentIndex) = @_;
1072   my($AtomID, @AtomIDs, @ComponentAtoms);
1073 
1074   @ComponentAtoms = ();
1075   @AtomIDs = ();
1076 
1077   for $AtomID (@{$ConnectedComponentsRef->[$ComponentIndex]}) {
1078     push @AtomIDs, $AtomID;
1079   }
1080   @ComponentAtoms = $This->_GetAtomsFromAtomIDs(@AtomIDs);
1081 
1082   return @ComponentAtoms;
1083 }
1084 
1085 # Except for the largest connected component, delete atoms corresponding to all other
1086 # connected components...
1087 #
1088 sub KeepLargestComponent {
1089   my($This) = @_;
1090   my($LargestComponentIndex, @ConnectedComponents);
1091 
1092   @ConnectedComponents = ();
1093   @ConnectedComponents = $This->GetConnectedComponentsVertices();
1094   if (@ConnectedComponents == 1) {
1095     return $This;
1096   }
1097   $LargestComponentIndex = 0;
1098   $This->_DeleteConnectedComponents(\@ConnectedComponents, $LargestComponentIndex);
1099 
1100   # Clear any deteced rings...
1101   if ($This->HasRings()) {
1102     $This->ClearRings();
1103   }
1104 
1105   return $This;
1106 }
1107 
1108 # Get an array of topologically sorted atoms starting from a specified atom or
1109 # an arbitrary atom in the molecule...
1110 #
1111 sub GetTopologicallySortedAtoms {
1112   my($This, $StartAtom) = @_;
1113   my(@SortedAtoms);
1114 
1115   @SortedAtoms = ();
1116   if (defined($StartAtom) && !$This->HasAtom($StartAtom)) {
1117     carp "Warning: ${ClassName}->_GetTopologicallySortedAtoms: No atoms retrieved: Start atom doesn't exist...";
1118     return @SortedAtoms;
1119   }
1120   my($StartAtomID, @AtomIDs);
1121 
1122   @AtomIDs = ();
1123   $StartAtomID = defined($StartAtom) ? $StartAtom->GetID() : undef;
1124 
1125   @AtomIDs = $This->GetTopologicallySortedVertices($StartAtomID);
1126   @SortedAtoms = $This->_GetAtomsFromAtomIDs(@AtomIDs);
1127 
1128   return @SortedAtoms;
1129 }
1130 
1131 # Detect rings in molecule...
1132 #
1133 sub DetectRings {
1134   my($This) = @_;
1135 
1136   # Use graph method to detect all cycles and associate 'em to graph as graph
1137   # and vertex properties...
1138   $This->DetectCycles();
1139 
1140   return $This;
1141 }
1142 
1143 # Clear rings in molecule...
1144 #
1145 sub ClearRings {
1146   my($This) = @_;
1147 
1148   # Use graph method to clear all cycles...
1149   $This->ClearCycles();
1150 
1151   return $This;
1152 }
1153 
1154 
1155 # Setup rings type paths to use during all ring related methods. Possible values:
1156 # Independent or All. Default is to use Independent rings.
1157 #
1158 sub SetActiveRings {
1159   my($This, $RingsType) = @_;
1160 
1161   if (!defined $This->SetActiveCyclicPaths(