MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: CalculatePhysicochemicalProperties.pl,v $
   4 # $Date: 2011/12/27 20:27:02 $
   5 # $Revision: 1.13 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2012 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use Text::ParseWords;
  34 use Benchmark;
  35 use FileUtil;
  36 use TextUtil;
  37 use SDFileUtil;
  38 use MoleculeFileIO;
  39 use Molecule;
  40 use AtomTypes::AtomicInvariantsAtomTypes;
  41 use AtomTypes::FunctionalClassAtomTypes;
  42 use MolecularDescriptors::MolecularDescriptorsGenerator;
  43 
  44 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  45 
  46 # Autoflush STDOUT
  47 $| = 1;
  48 
  49 # Starting message...
  50 $ScriptName = basename($0);
  51 print "\n$ScriptName: Starting...\n\n";
  52 $StartTime = new Benchmark;
  53 
  54 # Get the options and setup script...
  55 SetupScriptUsage();
  56 if ($Options{help} || @ARGV < 1) {
  57   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  58 }
  59 
  60 my(@SDFilesList);
  61 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
  62 
  63 # Process options...
  64 print "Processing options...\n";
  65 my(%OptionsInfo);
  66 ProcessOptions();
  67 
  68 # Setup information about input files...
  69 print "Checking input SD file(s)...\n";
  70 my(%SDFilesInfo);
  71 RetrieveSDFilesInfo();
  72 
  73 # Process input files..
  74 my($FileIndex);
  75 if (@SDFilesList > 1) {
  76   print "\nProcessing SD files...\n";
  77 }
  78 for $FileIndex (0 .. $#SDFilesList) {
  79   if ($SDFilesInfo{FileOkay}[$FileIndex]) {
  80     print "\nProcessing file $SDFilesList[$FileIndex]...\n";
  81     CalculatePhysicochemicalProperties($FileIndex);
  82   }
  83 }
  84 print "\n$ScriptName:Done...\n\n";
  85 
  86 $EndTime = new Benchmark;
  87 $TotalTime = timediff ($EndTime, $StartTime);
  88 print "Total time: ", timestr($TotalTime), "\n";
  89 
  90 ###############################################################################
  91 
  92 # Calculate physicochemical properties for a SD file...
  93 #
  94 sub CalculatePhysicochemicalProperties {
  95   my($FileIndex) = @_;
  96   my($CmpdCount, $IgnoredCmpdCount, $RuleOf5ViolationsCount, $RuleOf3ViolationsCount, $SDFile, $MoleculeFileIO, $Molecule, $MolecularDescriptorsGenerator, $PhysicochemicalPropertiesDataRef, $NewSDFileRef, $NewTextFileRef);
  97 
  98   $SDFile = $SDFilesList[$FileIndex];
  99 
 100   # Setup output files...
 101   $NewSDFileRef = ''; $NewTextFileRef = '';
 102   ($NewSDFileRef, $NewTextFileRef) = SetupAndOpenOutputFiles($FileIndex);
 103 
 104   # Setup molecular descriptor generator to calculate property values for specifed
 105   # property names...
 106   $MolecularDescriptorsGenerator = SetupMolecularDescriptorsGenerator();
 107 
 108   ($CmpdCount, $IgnoredCmpdCount, $RuleOf5ViolationsCount, $RuleOf3ViolationsCount) = ('0') x 4;
 109 
 110   $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile);
 111   $MoleculeFileIO->Open();
 112 
 113   COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) {
 114     $CmpdCount++;
 115 
 116     # Filter compound data before calculating physiochemical properties...
 117     if ($OptionsInfo{Filter}) {
 118       if (CheckAndFilterCompound($CmpdCount, $Molecule)) {
 119         $IgnoredCmpdCount++;
 120         next COMPOUND;
 121       }
 122     }
 123 
 124     # Calculate properties...
 125     $PhysicochemicalPropertiesDataRef = CalculateMoleculeProperties($MolecularDescriptorsGenerator, $Molecule);
 126 
 127     if (!defined($PhysicochemicalPropertiesDataRef)) {
 128       $IgnoredCmpdCount++;
 129       ProcessIgnoredCompound('PropertiesCalculationFailed', $CmpdCount, $Molecule);
 130       next COMPOUND;
 131     }
 132 
 133     # Calculate any rule violations...
 134     if ($OptionsInfo{RuleOf5Violations} && $PhysicochemicalPropertiesDataRef->{RuleOf5Violations}) {
 135       $RuleOf5ViolationsCount++;
 136     }
 137 
 138     if ($OptionsInfo{RuleOf3Violations} && $PhysicochemicalPropertiesDataRef->{RuleOf3Violations}) {
 139       $RuleOf3ViolationsCount++;
 140     }
 141 
 142     # Write out calculate properties...
 143     WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $PhysicochemicalPropertiesDataRef, $NewSDFileRef, $NewTextFileRef);
 144   }
 145   $MoleculeFileIO->Close();
 146 
 147   if ($OptionsInfo{SDOutput} && $NewSDFileRef) {
 148     close $NewSDFileRef;
 149   }
 150   if ($OptionsInfo{TextOutput} && $NewTextFileRef) {
 151     close $NewTextFileRef;
 152   }
 153 
 154   WriteCalculationSummaryStatistics($CmpdCount, $IgnoredCmpdCount, $RuleOf5ViolationsCount, $RuleOf3ViolationsCount);
 155 }
 156 
 157 # Process compound being ignored due to problems in physicochemical properties calculation...
 158 #
 159 sub ProcessIgnoredCompound {
 160   my($Mode, $CmpdCount, $Molecule) = @_;
 161   my($CmpdID, $DataFieldLabelAndValuesRef);
 162 
 163   $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 164   $CmpdID = SetupCmpdIDForTextFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 165 
 166   MODE: {
 167     if ($Mode =~ /^ContainsNonElementalData$/i) {
 168       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n";
 169       next MODE;
 170     }
 171 
 172     if ($Mode =~ /^ContainsNoElementalData$/i) {
 173       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n";
 174       next MODE;
 175     }
 176 
 177     if ($Mode =~ /^PropertiesCalculationFailed$/i) {
 178       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Physicochemical properties calculation didn't succeed...\n\n";
 179       next MODE;
 180     }
 181     warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Physicochemical properties calculation didn't succeed...\n\n";
 182   }
 183 }
 184 
 185 # Check and filter compounds....
 186 #
 187 sub CheckAndFilterCompound {
 188   my($CmpdCount, $Molecule) = @_;
 189   my($ElementCount, $NonElementCount);
 190 
 191   ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements();
 192 
 193   if ($NonElementCount) {
 194     ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule);
 195     return 1;
 196   }
 197 
 198   if (!$ElementCount) {
 199     ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule);
 200     return 1;
 201   }
 202 
 203   return 0;
 204 }
 205 
 206 # Write out compounds physicochemical properties calculation summary statistics...
 207 #
 208 sub WriteCalculationSummaryStatistics {
 209   my($CmpdCount, $IgnoredCmpdCount, $RuleOf5ViolationsCount, $RuleOf3ViolationsCount) = @_;
 210   my($ProcessedCmpdCount);
 211 
 212   $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount;
 213 
 214   print "\nNumber of compounds: $CmpdCount\n";
 215   print "Number of compounds processed successfully during physicochemical properties calculation: $ProcessedCmpdCount\n";
 216   print "Number of compounds ignored during physicochemical properties calculation: $IgnoredCmpdCount\n";
 217 
 218   if ($OptionsInfo{RuleOf5Violations}) {
 219     print "Number of compounds with one or more RuleOf5 violations: $RuleOf5ViolationsCount\n";
 220   }
 221 
 222   if ($OptionsInfo{RuleOf3Violations}) {
 223     print "Number of compounds with one or more RuleOf3 violations: $RuleOf3ViolationsCount\n";
 224   }
 225 
 226 }
 227 
 228 # Open output files...
 229 #
 230 sub SetupAndOpenOutputFiles {
 231   my($FileIndex) = @_;
 232   my($NewSDFile, $NewTextFile, $NewSDFileRef, $NewTextFileRef);
 233 
 234   $NewSDFileRef = '';
 235   $NewTextFileRef = '';
 236 
 237   if ($OptionsInfo{SDOutput}) {
 238     $NewSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex];
 239     print "Generating SD file $NewSDFile...\n";
 240     open NEWSDFILE, ">$NewSDFile" or die "Error: Couldn't open $NewSDFile: $! \n";
 241     $NewSDFileRef = \*NEWSDFILE;
 242   }
 243   if ($OptionsInfo{TextOutput}) {
 244     $NewTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex];
 245     print "Generating text file $NewTextFile...\n";
 246     open NEWTEXTFILE, ">$NewTextFile" or die "Error: Couldn't open $NewTextFile: $! \n";
 247     WriteTextFileCoulmnLabels($FileIndex, \*NEWTEXTFILE);
 248     $NewTextFileRef = \*NEWTEXTFILE;
 249   }
 250   return ($NewSDFileRef, $NewTextFileRef);
 251 }
 252 
 253 # Write calculated physicochemical properties and other data to appropriate output files...
 254 #
 255 sub WriteDataToOutputFiles {
 256   my($FileIndex, $CmpdCount, $Molecule, $PhysicochemicalPropertiesDataRef, $NewSDFileRef, $NewTextFileRef) = @_;
 257   my($PropertyName, $PropertyValue);
 258 
 259   if ($OptionsInfo{SDOutput}) {
 260     # Retrieve input compound string used to create molecule and write it out
 261     # without last line containing a delimiter...
 262     my($CmpdString);
 263     $CmpdString = $Molecule->GetInputMoleculeString();
 264     $CmpdString =~ s/\$\$\$\$$//;
 265     print $NewSDFileRef "$CmpdString";
 266 
 267     # Write out calculated physicochemical properties data...
 268     for $PropertyName (@{$OptionsInfo{SpecifiedPropertyNames}}) {
 269       $PropertyValue = $PhysicochemicalPropertiesDataRef->{$PropertyName};
 270       print $NewSDFileRef  ">  <$PropertyName>\n$PropertyValue\n\n";
 271     }
 272 
 273     # Write out RuleOf5 violations for molecule....
 274     if ($OptionsInfo{RuleOf5Violations}) {
 275       $PropertyValue = $PhysicochemicalPropertiesDataRef->{RuleOf5Violations};
 276       print $NewSDFileRef  ">  <RuleOf5Violations>\n$PropertyValue\n\n";
 277     }
 278 
 279     # Write out RuleOf3 violations for molecule....
 280     if ($OptionsInfo{RuleOf3Violations}) {
 281       $PropertyValue = $PhysicochemicalPropertiesDataRef->{RuleOf3Violations};
 282       print $NewSDFileRef  ">  <RuleOf3Violations>\n$PropertyValue\n\n";
 283     }
 284 
 285     # Write out delimiter...
 286     print $NewSDFileRef "\$\$\$\$\n";
 287   }
 288 
 289   if ($OptionsInfo{TextOutput}) {
 290     my($Line, $DataFieldLabelAndValuesRef, $DataFieldLabel, $DataFieldValue, @LineWords,);
 291 
 292     $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 293     @LineWords = ();
 294     if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
 295       push @LineWords, SetupCmpdIDForTextFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 296     }
 297     elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
 298       @LineWords = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
 299     }
 300     elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
 301       @LineWords = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
 302     }
 303     elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
 304       @LineWords = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}};
 305     }
 306 
 307     # Append calculated physicochemical properties data...
 308     for $PropertyName (@{$OptionsInfo{SpecifiedPropertyNames}}) {
 309       $PropertyValue = $PhysicochemicalPropertiesDataRef->{$PropertyName};
 310       push @LineWords, $PropertyValue;
 311     }
 312 
 313     # Write out RuleOf5 violations for molecule....
 314     if ($OptionsInfo{RuleOf5Violations}) {
 315       $PropertyValue = $PhysicochemicalPropertiesDataRef->{RuleOf5Violations};
 316       push @LineWords, $PropertyValue;
 317     }
 318 
 319     # Write out RuleOf3 violations for molecule....
 320     if ($OptionsInfo{RuleOf3Violations}) {
 321       $PropertyValue = $PhysicochemicalPropertiesDataRef->{RuleOf3Violations};
 322       push @LineWords, $PropertyValue;
 323     }
 324 
 325     $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 326     print $NewTextFileRef "$Line\n";
 327   }
 328 }
 329 
 330 # Write out approriate column labels to text file...
 331 sub WriteTextFileCoulmnLabels {
 332   my($FileIndex, $NewTextFileRef) = @_;
 333   my($Line, @LineWords);
 334 
 335   @LineWords = ();
 336   if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
 337     push @LineWords, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
 338   }
 339   elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
 340     push @LineWords, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
 341   }
 342   elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
 343     push @LineWords, @{$OptionsInfo{SpecifiedDataFields}};
 344   }
 345   elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
 346     push @LineWords, $OptionsInfo{CompoundIDLabel};
 347   }
 348   my($SpecifiedPropertyName);
 349 
 350   # Append physicochemical properties column labels...
 351   push @LineWords,  @{$OptionsInfo{SpecifiedPropertyNames}};
 352 
 353   # Write out RuleOf5 violations label...
 354   if ($OptionsInfo{RuleOf5Violations}) {
 355     push @LineWords, 'RuleOf5Violations';
 356   }
 357 
 358   # Write out RuleOf3 violations label...
 359   if ($OptionsInfo{RuleOf3Violations}) {
 360     push @LineWords, 'RuleOf3Violations';
 361   }
 362 
 363   $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 364   print $NewTextFileRef "$Line\n";
 365 }
 366 
 367 # Generate compound ID for text files..
 368 #
 369 sub SetupCmpdIDForTextFiles {
 370   my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
 371   my($CmpdID);
 372 
 373   $CmpdID = '';
 374   if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
 375     my($MolName);
 376     $MolName = $Molecule->GetName();
 377     $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}";
 378   }
 379   elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) {
 380     $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}";
 381   }
 382   elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) {
 383     my($SpecifiedDataField);
 384     $SpecifiedDataField = $OptionsInfo{CompoundID};
 385     $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : '';
 386   }
 387   elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) {
 388     $CmpdID = $Molecule->GetName();
 389   }
 390   return $CmpdID;
 391 }
 392 
 393 # Calculate physicochemical properties for molecule...
 394 #
 395 sub CalculateMoleculeProperties {
 396   my($MolecularDescriptorsGenerator, $Molecule) = @_;
 397   my($PropertyName, $PropertyValue, $MolecularDescriptorsObject, %CalculatedPhysicochemicalProperties);
 398 
 399   %CalculatedPhysicochemicalProperties = ();
 400 
 401   if ($OptionsInfo{KeepLargestComponent}) {
 402     $Molecule->KeepLargestComponent();
 403   }
 404 
 405   if (!$Molecule->DetectRings()) {
 406     return undef;
 407   }
 408   $Molecule->DetectAromaticity();
 409 
 410   if ($OptionsInfo{AddHydrogens}) {
 411     $Molecule->AddHydrogens();
 412   }
 413 
 414   # Calculate physicochemical properties...
 415   $MolecularDescriptorsGenerator->SetMolecule($Molecule);
 416   $MolecularDescriptorsGenerator->GenerateDescriptors();
 417 
 418   if (!$MolecularDescriptorsGenerator->IsDescriptorsGenerationSuccessful()) {
 419     return undef;
 420   }
 421 
 422   %CalculatedPhysicochemicalProperties = $MolecularDescriptorsGenerator->GetDescriptorNamesAndValues();
 423 
 424   # Count RuleOf3 violations...
 425   if ($OptionsInfo{RuleOf3Violations}) {
 426     CalculateRuleViolationsCount('RuleOf3Violations', \%CalculatedPhysicochemicalProperties);
 427   }
 428 
 429   # Count RuleOf5 violations...
 430   if ($OptionsInfo{RuleOf5Violations}) {
 431     CalculateRuleViolationsCount('RuleOf5Violations', \%CalculatedPhysicochemicalProperties);
 432   }
 433 
 434   return \%CalculatedPhysicochemicalProperties;
 435 }
 436 
 437 # Setup molecular descriptor generator to calculate property values for specifed
 438 # property names...
 439 #
 440 sub SetupMolecularDescriptorsGenerator {
 441   my($PropertyName, $MolecularDescriptorsGenerator);
 442 
 443   $MolecularDescriptorsGenerator = new MolecularDescriptorsGenerator('Mode' => 'Specify', 'DescriptorNames' => \@{$OptionsInfo{SpecifiedPropertyNames}});
 444 
 445   # Setup molecular desciptor calculation parameters...
 446   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('MolecularWeight')}) || exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('ExactMass')}) ) {
 447     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'WeightAndMassDescriptors', %{$OptionsInfo{PrecisionParametersMap}});
 448   }
 449 
 450   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('RotatableBonds')})) {
 451     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'RotatableBondsDescriptors', %{$OptionsInfo{RotatableBondsParametersMap}});
 452   }
 453 
 454   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('HydrogenBondDonors')}) || exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('HydrogenBondAcceptors')}) ) {
 455     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'HydrogenBondsDescriptors', 'HydrogenBondsType' => $OptionsInfo{HydrogenBonds});
 456   }
 457 
 458   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('TPSA')})) {
 459     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'TPSADescriptors', %{$OptionsInfo{TPSAParametersMap}});
 460   }
 461 
 462   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('MolecularComplexity')})) {
 463     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'MolecularComplexityDescriptors', %{$OptionsInfo{MolecularComplexityParametersMap}});
 464   }
 465 
 466   return $MolecularDescriptorsGenerator;
 467 }
 468 
 469 # Calculate RuleOf3 or RuleOf5 violations count...
 470 #
 471 sub CalculateRuleViolationsCount {
 472   my($RuleViolationsType, $CalculatedPropertiesMapRef) = @_;
 473   my($RuleViolationsCount, $PropertyName);
 474 
 475   $RuleViolationsCount = 0;
 476 
 477   RULEVIOLATIONSTYPE: {
 478     if ($RuleViolationsType =~ /^RuleOf3Violations$/i) {
 479       for $PropertyName (@{$OptionsInfo{RuleOf3PropertyNames}}) {
 480         if ($CalculatedPropertiesMapRef->{$PropertyName} > $OptionsInfo{RuleOf3MaxPropertyValuesMap}{$PropertyName}) {
 481           $RuleViolationsCount++;
 482         }
 483       }
 484       last RULEVIOLATIONSTYPE;
 485     }
 486 
 487     if ($RuleViolationsType =~ /^RuleOf5Violations$/i) {
 488       for $PropertyName (@{$OptionsInfo{RuleOf5PropertyNames}}) {
 489         if ($CalculatedPropertiesMapRef->{$PropertyName} > $OptionsInfo{RuleOf5MaxPropertyValuesMap}{$PropertyName}) {
 490           $RuleViolationsCount++;
 491         }
 492       }
 493       last RULEVIOLATIONSTYPE;
 494     }
 495 
 496     die "Warning: Unknown rule violation type: $RuleViolationsType...";
 497   }
 498 
 499   # Set rule violation count...
 500   $CalculatedPropertiesMapRef->{$RuleViolationsType} = $RuleViolationsCount;
 501 
 502 }
 503 
 504 # Retrieve information about SD files...
 505 #
 506 sub RetrieveSDFilesInfo {
 507   my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $NewSDFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef);
 508 
 509   %SDFilesInfo = ();
 510   @{$SDFilesInfo{FileOkay}} = ();
 511   @{$SDFilesInfo{OutFileRoot}} = ();
 512   @{$SDFilesInfo{SDOutFileNames}} = ();
 513   @{$SDFilesInfo{TextOutFileNames}} = ();
 514   @{$SDFilesInfo{AllDataFieldsRef}} = ();
 515   @{$SDFilesInfo{CommonDataFieldsRef}} = ();
 516 
 517   $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0;
 518   $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0;
 519 
 520   FILELIST: for $Index (0 .. $#SDFilesList) {
 521     $SDFile = $SDFilesList[$Index];
 522 
 523     $SDFilesInfo{FileOkay}[$Index] = 0;
 524     $SDFilesInfo{OutFileRoot}[$Index] = '';
 525     $SDFilesInfo{SDOutFileNames}[$Index] = '';
 526     $SDFilesInfo{TextOutFileNames}[$Index] = '';
 527 
 528     $SDFile = $SDFilesList[$Index];
 529     if (!(-e $SDFile)) {
 530       warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
 531       next FILELIST;
 532     }
 533     if (!CheckFileType($SDFile, "sd sdf")) {
 534       warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
 535       next FILELIST;
 536     }
 537 
 538     if ($CheckDataField) {
 539       # Make sure data field exists in SD file..
 540       my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues);
 541 
 542       @CmpdLines = ();
 543       open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
 544       $CmpdString = ReadCmpdString(\*SDFILE);
 545       close SDFILE;
 546       @CmpdLines = split "\n", $CmpdString;
 547       %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
 548       $SpecifiedDataField = $OptionsInfo{CompoundID};
 549       if (!exists $DataFieldValues{$SpecifiedDataField}) {
 550         warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using  \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n";
 551         next FILELIST;
 552       }
 553     }
 554 
 555     $AllDataFieldsRef = '';
 556     $CommonDataFieldsRef = '';
 557     if ($CollectDataFields) {
 558       my($CmpdCount);
 559       open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
 560       ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
 561       close SDFILE;
 562     }
 563 
 564     # Setup output file names...
 565     $FileDir = ""; $FileName = ""; $FileExt = "";
 566     ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
 567 
 568     $TextOutFileExt = "csv";
 569     if ($Options{outdelim} =~ /^tab$/i) {
 570       $TextOutFileExt = "tsv";
 571     }
 572     $SDOutFileExt = $FileExt;
 573 
 574     if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
 575       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 576       if ($RootFileName && $RootFileExt) {
 577         $FileName = $RootFileName;
 578       }
 579       else {
 580         $FileName = $OptionsInfo{OutFileRoot};
 581       }
 582       $OutFileRoot = $FileName;
 583     }
 584     else {
 585       $OutFileRoot = "${FileName}PhysicochemicalProperties";
 586     }
 587 
 588     $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}";
 589     $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}";
 590 
 591     if ($OptionsInfo{SDOutput}) {
 592       if ($SDFile =~ /$NewSDFileName/i) {
 593         warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
 594         print "Specify a different name using \"-r --root\" option or use default name.\n";
 595         next FILELIST;
 596       }
 597     }
 598 
 599     if (!$OptionsInfo{OverwriteFiles}) {
 600       # Check SD and text outout files...
 601       if ($OptionsInfo{SDOutput}) {
 602         if (-e $NewSDFileName) {
 603           warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n";
 604           next FILELIST;
 605         }
 606       }
 607       if ($OptionsInfo{TextOutput}) {
 608         if (-e $NewTextFileName) {
 609           warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n";
 610           next FILELIST;
 611         }
 612       }
 613     }
 614 
 615     $SDFilesInfo{FileOkay}[$Index] = 1;
 616 
 617     $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 618     $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName;
 619     $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName;
 620 
 621     $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef;
 622     $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef;
 623   }
 624 }
 625 
 626 # Process option values...
 627 sub ProcessOptions {
 628   %OptionsInfo = ();
 629 
 630   # Process property name related options...
 631   ProcessPropertyNamesOption();
 632 
 633   # Setup RuleOf3 and RuleOf5 violation calculations...
 634   $OptionsInfo{RuleOf3Violations} = ($Options{ruleof3violations} =~ /^Yes$/i) ? 1 : 0;
 635   $OptionsInfo{RuleOf5Violations} = ($Options{ruleof5violations} =~ /^Yes$/i) ? 1 : 0;
 636 
 637   $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
 638   $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel};
 639   $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode};
 640 
 641   my(@SpecifiedDataFields);
 642   @SpecifiedDataFields = ();
 643 
 644   @{$OptionsInfo{SpecifiedDataFields}} = ();
 645   $OptionsInfo{CompoundID} = '';
 646 
 647   if ($Options{datafieldsmode} =~ /^CompoundID$/i) {
 648     if ($Options{compoundidmode} =~ /^DataField$/i) {
 649       if (!$Options{compoundid}) {
 650         die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n";
 651       }
 652       $OptionsInfo{CompoundID} = $Options{compoundid};
 653     }
 654     elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) {
 655       $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd';
 656     }
 657   }
 658   elsif ($Options{datafieldsmode} =~ /^Specify$/i) {
 659     if (!$Options{datafields}) {
 660       die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n";
 661     }
 662     @SpecifiedDataFields = split /\,/, $Options{datafields};
 663     push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields;
 664   }
 665 
 666   # Types of hydrogen bonds...
 667   $OptionsInfo{HydrogenBonds} = $Options{hydrogenbonds};
 668 
 669   # Process precision value parameters...
 670   ProcessPrecisionOption();
 671 
 672   # Process rotatable bonds parameters...
 673   ProcessRotatableBondsOption();
 674 
 675   # Process TPSA parameters...
 676   ProcessTPSAOption();
 677 
 678   # Process molecular complexity parameters...
 679   ProcessMolecularComplexityOption();
 680 
 681   $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0;
 682 
 683   $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0;
 684 
 685   $OptionsInfo{Output} = $Options{output};
 686   $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|Both)$/i) ? 1 : 0;
 687   $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|Both)$/i) ? 1 : 0;
 688 
 689   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /tab/i ) ? "\t" : (($Options{outdelim} =~ /semicolon/i) ? "\;" : "\,");
 690   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
 691 
 692   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 693   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 694 }
 695 
 696 # Process property name related options...
 697 #
 698 sub ProcessPropertyNamesOption {
 699 
 700   # Setup supported physicochemical properties...
 701   my($SupportedProperty);
 702 
 703   @{$OptionsInfo{SupportedPropertyNames}} = ();
 704   %{$OptionsInfo{SupportedPropertyNamesMap}} = ();
 705 
 706   @{$OptionsInfo{RuleOf5PropertyNames}} = ();
 707   %{$OptionsInfo{RuleOf5MaxPropertyValuesMap}} = ();
 708 
 709   @{$OptionsInfo{RuleOf3PropertyNames}} = ();
 710   %{$OptionsInfo{RuleOf3MaxPropertyValuesMap}} = ();
 711 
 712   @{$OptionsInfo{DefaultPropertyNames}} = ();
 713 
 714   @{$OptionsInfo{SupportedPropertyNames}} = qw(MolecularWeight ExactMass HeavyAtoms Rings AromaticRings MolecularVolume RotatableBonds HydrogenBondDonors HydrogenBondAcceptors SLogP SMR TPSA Fsp3Carbons Sp3Carbons MolecularComplexity);
 715 
 716   @{$OptionsInfo{RuleOf5PropertyNames}} = qw(MolecularWeight HydrogenBondDonors HydrogenBondAcceptors SLogP);
 717   %{$OptionsInfo{RuleOf5MaxPropertyValuesMap}} = ('MolecularWeight' => 500, 'HydrogenBondDonors' => 5, 'HydrogenBondAcceptors' => 10,  'SLogP' => 5);
 718 
 719   @{$OptionsInfo{RuleOf3PropertyNames}} = qw(MolecularWeight RotatableBonds HydrogenBondDonors HydrogenBondAcceptors SLogP TPSA);
 720   %{$OptionsInfo{RuleOf3MaxPropertyValuesMap}} = ('MolecularWeight' => 300, 'RotatableBonds' => 3, 'HydrogenBondDonors' => 3, 'HydrogenBondAcceptors' => 3, 'SLogP' => 3, 'TPSA' => 60);
 721 
 722   @{$OptionsInfo{DefaultPropertyNames}} = qw(MolecularWeight HeavyAtoms MolecularVolume RotatableBonds HydrogenBondDonors HydrogenBondAcceptors SLogP TPSA);
 723 
 724   for $SupportedProperty (@{$OptionsInfo{SupportedPropertyNames}}) {
 725     $OptionsInfo{SupportedPropertyNamesMap}{lc($SupportedProperty)} = $SupportedProperty;
 726   }
 727 
 728   # Process specified properties....
 729   my($SpecifiedPropertyName, @SpecifiedPropertyNames, %SpecifiedPropertyNamesMap);
 730 
 731   @SpecifiedPropertyNames = ();
 732   %SpecifiedPropertyNamesMap = ();
 733 
 734   @{$OptionsInfo{SpecifiedPropertyNames}} = ();
 735   %{$OptionsInfo{SpecifiedPropertyNamesMap}} = ();
 736 
 737   if ($Options{mode} =~ /^All$/i) {
 738     @SpecifiedPropertyNames = @{$OptionsInfo{SupportedPropertyNames}};
 739   }
 740   elsif ($Options{mode} =~ /^RuleOf5$/i) {
 741     @SpecifiedPropertyNames = @{$OptionsInfo{RuleOf5PropertyNames}};
 742   }
 743   elsif ($Options{mode} =~ /^RuleOf3$/i) {
 744     @SpecifiedPropertyNames = @{$OptionsInfo{RuleOf3PropertyNames}};
 745   }
 746   elsif (IsEmpty($Options{mode})) {
 747     @SpecifiedPropertyNames = @{$OptionsInfo{DefaultPropertyNames}};
 748   }
 749   else {
 750     # Comma delimited lisr of specified property names...
 751     my($Mode, $PropertyName, @PropertyNames, @UnsupportedPropertyNames);
 752 
 753     $Mode = $Options{mode};
 754     $Mode =~ s/ //g;
 755 
 756     @PropertyNames = split ",", $Mode;
 757     @UnsupportedPropertyNames = ();
 758 
 759     for $PropertyName (@PropertyNames) {
 760       if (exists($OptionsInfo{SupportedPropertyNamesMap}{lc($PropertyName)})) {
 761         push @SpecifiedPropertyNames, $PropertyName;
 762       }
 763       else {
 764         push @UnsupportedPropertyNames, $PropertyName;
 765       }
 766     }
 767     if (@UnsupportedPropertyNames) {
 768       if (@UnsupportedPropertyNames > 1) {
 769         warn "Error: The physicochemical property names specified - ", JoinWords(\@UnsupportedPropertyNames, ", ", 0)," - for option \"-m --mode\" are not valid.\n";
 770       }
 771       else {
 772         warn "Error: The physicochemical property name specified, @UnsupportedPropertyNames , for option \"-m --mode\" is not valid.\n";
 773       }
 774       die "Allowed values:", JoinWords(\@{$OptionsInfo{SupportedPropertyNames}}, ", ", 0), "\n";
 775     }
 776     if (!@SpecifiedPropertyNames) {
 777       die "Error: No valid physicochemical property names specified for option \"-m --mode\".\n";
 778     }
 779   }
 780 
 781   # Set up specified property names map...
 782   PROPERTY: for $SpecifiedPropertyName (@SpecifiedPropertyNames) {
 783     if (exists $SpecifiedPropertyNamesMap{lc($SpecifiedPropertyName)}) {
 784       warn "Warning: The physicochemical property name, $SpecifiedPropertyName, is specified multiple times as value of option \"-m --mode\" .\n";
 785       next PROPERTY;
 786     }
 787     # Canonical specified property name...
 788     $SpecifiedPropertyNamesMap{lc($SpecifiedPropertyName)} = $OptionsInfo{SupportedPropertyNamesMap}{lc($SpecifiedPropertyName)};
 789   }
 790 
 791   # Make sure for calculation of  RuleOf3Violations, all appropriate property names are specified...
 792   if ($Options{ruleof3violations} =~ /^Yes$/i && $Options{mode} =~ /^RuleOf5$/i) {
 793     die "Error: The value specified, $Options{ruleof3violations}, for  \"--RuleOf3Violations\" option in \"RuleOf5\" \"-m --Mode\" is not valid. You must specify RuleOf3 value for \"-m --Mode\" to calculate RuleOf3 violations.\n";
 794   }
 795 
 796   if ($Options{ruleof3violations} =~ /^Yes$/i) {
 797     my($RuleOf3PropertyName, @MissingRuleOf3Names);
 798 
 799     @MissingRuleOf3Names = ();
 800     PROPERTY: for $RuleOf3PropertyName (@{$OptionsInfo{RuleOf3PropertyNames}}) {
 801       if (exists $SpecifiedPropertyNamesMap{lc($RuleOf3PropertyName)}) {
 802         next PROPERTY;
 803       }
 804       push @MissingRuleOf3Names, $RuleOf3PropertyName;
 805 
 806       # Add property name to specified properties names list and map...
 807       push @SpecifiedPropertyNames, $RuleOf3PropertyName;
 808       $SpecifiedPropertyNamesMap{lc($RuleOf3PropertyName)} = $OptionsInfo{SupportedPropertyNamesMap}{lc($RuleOf3PropertyName)};
 809     }
 810     if (@MissingRuleOf3Names) {
 811       warn "Warning: The following physicochemical property names not specified in \"-m --Mode\" option are required for calculating RuleOf3Violations and have been added to the list of property names: @MissingRuleOf3Names\n";
 812     }
 813   }
 814 
 815   # Make sure for calculation of  RuleOf5Violations, all appropriate property names are specified...
 816   if ($Options{ruleof5violations} =~ /^Yes$/i && $Options{mode} =~ /^RuleOf3$/i) {
 817     die "Error: The value specified, $Options{ruleof5violations}, for  \"--RuleOf5Violations\" option in \"RuleOf3\" \"-m --Mode\" is not valid. You must specify RuleOf5 value for \"-m --Mode\" to calculate RuleOf5 violations.\n";
 818   }
 819 
 820   if ($Options{ruleof5violations} =~ /^Yes$/i) {
 821     my($RuleOf5PropertyName, @MissingRuleOf5Names);
 822 
 823     @MissingRuleOf5Names = ();
 824     PROPERTY: for $RuleOf5PropertyName (@{$OptionsInfo{RuleOf5PropertyNames}}) {
 825       if (exists $SpecifiedPropertyNamesMap{lc($RuleOf5PropertyName)}) {
 826         next PROPERTY;
 827       }
 828       push @MissingRuleOf5Names, $RuleOf5PropertyName;
 829 
 830       # Add property name to specified properties names list and map...
 831       push @SpecifiedPropertyNames, $RuleOf5PropertyName;
 832       $SpecifiedPropertyNamesMap{lc($RuleOf5PropertyName)} = $OptionsInfo{SupportedPropertyNamesMap}{lc($RuleOf5PropertyName)};
 833     }
 834     if (@MissingRuleOf5Names) {
 835       warn "Warning: The following physicochemical property names not specified in \"-m --Mode\" option are required for calculating RuleOf5Violations and have been added to the list of property names: @MissingRuleOf5Names\n";
 836     }
 837   }
 838   $OptionsInfo{Mode} = $Options{mode};
 839 
 840   # Setup canonical specified property names corresponding to supported names in mixed case...
 841   my(@SpecifiedCanonicalPropertyNames);
 842 
 843   @SpecifiedCanonicalPropertyNames = ();
 844   for $SpecifiedPropertyName (@SpecifiedPropertyNames) {
 845     push @SpecifiedCanonicalPropertyNames, $SpecifiedPropertyNamesMap{lc($SpecifiedPropertyName)};
 846   }
 847   @{$OptionsInfo{SpecifiedPropertyNames}} = @SpecifiedCanonicalPropertyNames;
 848   %{$OptionsInfo{SpecifiedPropertyNamesMap}} = %SpecifiedPropertyNamesMap;
 849 
 850   # Based on specified property names, figure out whether hydrogens need to be added before
 851   # calculation of properties...
 852   #
 853   $OptionsInfo{AddHydrogens} = 0;
 854   if (exists($SpecifiedPropertyNamesMap{lc('MolecularVolume')}) || exists($SpecifiedPropertyNamesMap{lc('SLogP')}) || exists($SpecifiedPropertyNamesMap{lc('SMR')})) {
 855     $OptionsInfo{AddHydrogens} = 1;
 856   }
 857 }
 858 
 859 # Process precision option...
 860 #
 861 sub ProcessPrecisionOption {
 862   my($ParameterName, $ParameterValue, %PrecisionParametersMap, %PrecisionParameterNamesMap);
 863 
 864   %{$OptionsInfo{PrecisionParametersMap}} = ();
 865 
 866   %PrecisionParametersMap = ('WeightPrecision' => 2, 'MassPrecision' => 4);
 867   %PrecisionParameterNamesMap = ('molecularweight' => 'WeightPrecision', 'exactmass' => 'MassPrecision');
 868 
 869   if ($Options{precision}) {
 870     # Process specified values...
 871     my($Index, $SpecifiedPrecision, @SpecifiedPrecisionValuePairs);
 872 
 873     $SpecifiedPrecision = $Options{precision};
 874     $SpecifiedPrecision =~ s/ //g;
 875     @SpecifiedPrecisionValuePairs = split ",", $SpecifiedPrecision;
 876     if (@SpecifiedPrecisionValuePairs % 2) {
 877       die "Error: Invalid number of values specified using \"--Precision\" option: It must contain even number of values.\n";
 878     }
 879     for ($Index = 0; (($Index + 1) < @SpecifiedPrecisionValuePairs); $Index += 2 ) {
 880       $ParameterName = $SpecifiedPrecisionValuePairs[$Index];
 881       $ParameterValue = $SpecifiedPrecisionValuePairs[$Index + 1];
 882       if (!exists $PrecisionParameterNamesMap{lc($ParameterName)}) {
 883         die "Error: The precision parameter name specified, $ParameterName, for option \"--Precision\" is not valid.\n";
 884       }
 885       if (!IsPositiveInteger($ParameterValue)) {
 886         die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--Precision\" is not valid. Allowed values: positive integer. \n";
 887       }
 888       $ParameterName = $PrecisionParameterNamesMap{lc($ParameterName)};
 889       $PrecisionParametersMap{$ParameterName} = $ParameterValue;
 890     }
 891   }
 892   $OptionsInfo{Precision} = $Options{precision};
 893   %{$OptionsInfo{PrecisionParametersMap}} = %PrecisionParametersMap;
 894 }
 895 
 896 # Process rotatable bonds option...
 897 sub ProcessRotatableBondsOption {
 898   my($ParameterName, $ParameterValue, %RotatableBondsParametersMap, %RotatableBondsParameterNamesMap);
 899 
 900   %{$OptionsInfo{RotatableBondsParametersMap}} = ();
 901   %RotatableBondsParametersMap = ('IgnoreTerminalBonds' => 1, 'IgnoreBondsToTripleBonds' => 1, 'IgnoreAmideBonds' => 1, 'IgnoreThioamideBonds' => 1, 'IgnoreSulfonamideBonds' => 1);
 902 
 903   for $ParameterName (keys %RotatableBondsParametersMap) {
 904     $RotatableBondsParameterNamesMap{lc($ParameterName)} = $ParameterName;
 905   }
 906 
 907   if ($Options{rotatablebonds}) {
 908     # Process specified values...
 909     my($Index, $SpecifiedRotatableBonds, @SpecifiedRotatableBondsValuePairs);
 910 
 911     $SpecifiedRotatableBonds = $Options{rotatablebonds};
 912     $SpecifiedRotatableBonds =~ s/ //g;
 913     @SpecifiedRotatableBondsValuePairs = split ",", $SpecifiedRotatableBonds;
 914     if (@SpecifiedRotatableBondsValuePairs % 2) {
 915       die "Error: Invalid number of values specified using \"--RotatableBonds\" option: It must contain even number of values.\n";
 916     }
 917     for ($Index = 0; (($Index + 1) < @SpecifiedRotatableBondsValuePairs); $Index += 2 ) {
 918       $ParameterName = $SpecifiedRotatableBondsValuePairs[$Index];
 919       $ParameterValue = $SpecifiedRotatableBondsValuePairs[$Index + 1];
 920       if (!exists $RotatableBondsParameterNamesMap{lc($ParameterName)}) {
 921         die "Error: The rotatable bonds parameter name specified, $ParameterName, for option \"--RotatableBonds\" is not valid.\n";
 922       }
 923       if ($ParameterValue !~ /^(Yes|No)$/i) {
 924         die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--RotatableBonds\" is not valid. Allowed values: Yes or No. \n";
 925       }
 926       $ParameterName = $RotatableBondsParameterNamesMap{lc($ParameterName)};
 927       $ParameterValue = ($ParameterValue =~ /^Yes$/i) ? 1 : 0;
 928       $RotatableBondsParametersMap{$ParameterName} = $ParameterValue;
 929     }
 930   }
 931   $OptionsInfo{RotatableBonds} = $Options{rotatablebonds};
 932   %{$OptionsInfo{RotatableBondsParametersMap}} = %RotatableBondsParametersMap;
 933 }
 934 
 935 # Process TPSA option...
 936 #
 937 sub ProcessTPSAOption {
 938   my($ParameterName, $ParameterValue, %TPSAParametersMap, %TPSAParameterNamesMap);
 939 
 940   %{$OptionsInfo{TPSAParametersMap}} = ();
 941 
 942   %TPSAParametersMap = ('IgnorePhosphorus' => 1, 'IgnoreSulfur' => 1);
 943   for $ParameterName (keys %TPSAParametersMap) {
 944     $TPSAParameterNamesMap{lc($ParameterName)} = $ParameterName;
 945   }
 946 
 947   if ($Options{tpsa}) {
 948     # Process specified values...
 949     my($Index, $SpecifiedTPSA, @SpecifiedTPSAValuePairs);
 950 
 951     $SpecifiedTPSA = $Options{tpsa};
 952     $SpecifiedTPSA =~ s/ //g;
 953     @SpecifiedTPSAValuePairs = split ",", $SpecifiedTPSA;
 954     if (@SpecifiedTPSAValuePairs % 2) {
 955       die "Error: Invalid number of values specified using \"--TPSA\" option: It must contain even number of values.\n";
 956     }
 957     for ($Index = 0; (($Index + 1) < @SpecifiedTPSAValuePairs); $Index += 2 ) {
 958       $ParameterName = $SpecifiedTPSAValuePairs[$Index];
 959       $ParameterValue = $SpecifiedTPSAValuePairs[$Index + 1];
 960       if (!exists $TPSAParameterNamesMap{lc($ParameterName)}) {
 961         die "Error: The TPSA parameter name specified, $ParameterName, for option \"--TPSA\" is not valid.\n";
 962       }
 963       if ($ParameterValue !~ /^(Yes|No)$/i) {
 964         die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--TPSA\" is not valid. Allowed values: Yes or No. \n";
 965       }
 966       $ParameterName = $TPSAParameterNamesMap{lc($ParameterName)};
 967       $ParameterValue = ($ParameterValue =~ /^Yes$/i) ? 1 : 0;
 968       $TPSAParametersMap{$ParameterName} = $ParameterValue;
 969     }
 970   }
 971   $OptionsInfo{TPSA} = $Options{tpsa};
 972   %{$OptionsInfo{TPSAParametersMap}} = %TPSAParametersMap;
 973 }
 974 
 975 # Process molecular complexity parameters...
 976 #
 977 sub ProcessMolecularComplexityOption {
 978   my($MolecularComplexityType, $ParameterName, $ParameterValue, @ParameterNames, @ParameterValues, @AtomIdentifierTypeParameters, %ComplexityParametersMap, %ComplexityParameterNamesMap);
 979 
 980   %{$OptionsInfo{MolecularComplexityParametersMap}} = ();
 981 
 982   %ComplexityParametersMap = ('MolecularComplexityType' => '', 'AtomIdentifierType' => '',
 983                               'AtomicInvariantsToUse' => '', 'FunctionalClassesToUse' => '',
 984                               'MACCSKeysSize' => '166', 'NeighborhoodRadius' => '2',
 985                               'MinPathLength' => '1', 'MaxPathLength' => '8', 'UseBondSymbols' => '1',
 986                               'MinDistance' => '1', 'MaxDistance' => '10', 'UseTriangleInequality' => '',
 987                               'DistanceBinSize' => '2', 'NormalizationMethodology' => 'None');
 988 
 989   %ComplexityParameterNamesMap = ();
 990   for $ParameterName (keys %ComplexityParametersMap) {
 991     $ComplexityParameterNamesMap{lc($ParameterName)} = $ParameterName;
 992   }
 993 
 994   if ($Options{molecularcomplexity}) {
 995     # Process specified values...
 996     my($Index, $SpecifiedComplexity, @SpecifiedComplexityValuePairs);
 997 
 998     $SpecifiedComplexity = $Options{molecularcomplexity};
 999 
1000     @SpecifiedComplexityValuePairs = split ",", $SpecifiedComplexity;
1001     if (@SpecifiedComplexityValuePairs % 2) {
1002       die "Error: Invalid number of values specified using \"--MolecularComplexity\" option: It must contain even number of values.\n";
1003     }
1004 
1005     for ($Index = 0; (($Index + 1) < @SpecifiedComplexityValuePairs); $Index += 2 ) {
1006       $ParameterName = $SpecifiedComplexityValuePairs[$Index];
1007       $ParameterValue = $SpecifiedComplexityValuePairs[$Index + 1];
1008 
1009       $ParameterName = RemoveLeadingAndTrailingWhiteSpaces($ParameterName);
1010       $ParameterValue = RemoveLeadingAndTrailingWhiteSpaces($ParameterValue);
1011 
1012       if (!exists $ComplexityParameterNamesMap{lc($ParameterName)}) {
1013         die "Error: The molecular complexity parameter name specified, $ParameterName, for option \"--MolecularComplexity\" is not valid.\n";
1014       }
1015       $ParameterName = $ComplexityParameterNamesMap{lc($ParameterName)};
1016 
1017       if ($ParameterName =~ /^AtomicInvariantsToUse$/i) {
1018         my($AtomSymbolFound);
1019 
1020         $AtomSymbolFound = 0;
1021         @ParameterValues = split(' ', $ParameterValue);
1022         for $ParameterValue (@ParameterValues) {
1023           if (!AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($ParameterValue)) {
1024             die "Error: The atomic invariant specified, $ParameterValue, for  AtomicInvariantsToUse in option \"--MolecularComplexity\" is not valid.\n";
1025           }
1026           if ($ParameterValue =~ /^(AS|AtomSymbol)$/i) {
1027             $AtomSymbolFound = 1;
1028           }
1029         }
1030         if (!$AtomSymbolFound) {
1031           die "Error: The atomic invariants specified using AtomicInvariantsToUse in option \"--MolecularComplexity\" is not valid: AtomicInvariant atom symbol, AS or AtomSymbol, must be specified.\n";
1032         }
1033         $ParameterValue = JoinWords(\@ParameterValues, ",", 0);
1034       }
1035       elsif ($ParameterName =~ /^FunctionalClassesToUse$/i) {
1036         @ParameterValues = split(' ', $ParameterValue);
1037         for $ParameterValue (@ParameterValues) {
1038           if (!FunctionalClassAtomTypes::IsFunctionalClassAvailable($ParameterValue)) {
1039             die "Error: The functional class specified, $ParameterValue, for  FunctionalClassesToUse in option \"--MolecularComplexity\" is not valid.\n";
1040           }
1041         }
1042         $ParameterValue = JoinWords(\@ParameterValues, ",", 0);
1043       }
1044       else {
1045         if ($ParameterValue =~ / /) {
1046           $ParameterValue =~ s/ //g;
1047         }
1048         if ($ParameterValue =~ /^(Yes|No)$/i) {
1049           $ParameterValue = ($ParameterValue =~ /^Yes$/i) ? 1 : 0;
1050         }
1051       }
1052 
1053       if ($ParameterName =~ /^MolecularComplexityType$/i) {
1054         if ($ParameterValue !~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|MACCSKeys|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
1055           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Allowed values: AtomTypesFingerprints, ExtendedConnectivityFingerprints, MACCSKeys, PathLengthFingerprints, TopologicalAtomPairsFingerprints, TopologicalAtomTripletsFingerprints, TopologicalAtomTorsionsFingerprints, TopologicalPharmacophoreAtomPairsFingerprints, or TopologicalPharmacophoreAtomTripletsFingerprints..\n";
1056         }
1057       }
1058       elsif ($ParameterName =~ /^AtomIdentifierType$/i) {
1059         if ($ParameterValue !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
1060           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes.\n";
1061         }
1062       }
1063       elsif ($ParameterName =~ /^(MACCSKeysSize|MinPathLength|MaxPathLength|MinDistance|MaxDistance|DistanceBinSize)$/i) {
1064         if (!IsPositiveInteger($ParameterValue)) {
1065           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Allowed values: positive integer. \n";
1066         }
1067       }
1068       elsif ($ParameterName =~ /^NeighborhoodRadius$/i) {
1069         if (!(IsInteger($ParameterValue) && $ParameterValue >=0)) {
1070           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Allowed values: 0 or positive integer. \n";
1071         }
1072       }
1073       elsif ($ParameterName =~ /^NormalizationMethodology$/i) {
1074         if ($ParameterValue !~ /^(None|ByHeavyAtomsCount|ByPossibleKeysCount)$/i) {
1075           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Allowed values: None, ByHeavyAtomsCount, or ByPossibleKeysCount\n";
1076         }
1077       }
1078       $ComplexityParametersMap{$ParameterName} = $ParameterValue;
1079     }
1080 
1081     if ($ComplexityParametersMap{MACCSKeysSize} !~ /^(166|322)$/i) {
1082       die "Error: The parameter value specified, $ComplexityParametersMap{MACCSKeysSize}, for parameter name, MACCSKeysSize in option \"--MolecularComplexity\" is not valid. Allowed values: 166 or 322\n";
1083     }
1084     if ($ComplexityParametersMap{MinPathLength} > $ComplexityParametersMap{MaxPathLength}) {
1085       die "Error: The parameter value specified for MinPathLength, $ComplexityParametersMap{MinPathLength}, must be <= MaxPathLength, $ComplexityParametersMap{MaxPathLength} ...\n";
1086     }
1087     if ($ComplexityParametersMap{MinDistance} > $ComplexityParametersMap{MaxDistance}) {
1088       die "Error: The parameter value specified for MinDistance, $ComplexityParametersMap{MinDistance}, must be <= MaxDistance, $ComplexityParametersMap{MaxDistance} ...\n";
1089     }
1090   }
1091 
1092   # Set default parameter values...
1093 
1094   if (IsEmpty($ComplexityParametersMap{MolecularComplexityType})) {
1095     $ComplexityParametersMap{MolecularComplexityType} = 'MACCSKeys';
1096   }
1097   $MolecularComplexityType = $ComplexityParametersMap{MolecularComplexityType};
1098 
1099 
1100   if (IsEmpty($ComplexityParametersMap{AtomIdentifierType})) {
1101     $ComplexityParametersMap{AtomIdentifierType} = ($MolecularComplexityType =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) ? "FunctionalClassAtomTypes" : "AtomicInvariantsAtomTypes";
1102   }
1103 
1104   if (IsEmpty($ComplexityParametersMap{AtomicInvariantsToUse})) {
1105     my($AtomicInvariantsToUse);
1106 
1107     if ($MolecularComplexityType =~ /^(AtomTypesFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints)$/i) {
1108       $AtomicInvariantsToUse = "AS,X,BO,H,FC";
1109     }
1110     elsif ($MolecularComplexityType =~ /^ExtendedConnectivityFingerprints$/i) {
1111       $AtomicInvariantsToUse = "AS,X,BO,H,FC,MN";
1112     }
1113     else {
1114       $AtomicInvariantsToUse = "AS";
1115     }
1116     $ComplexityParametersMap{AtomicInvariantsToUse} = $AtomicInvariantsToUse;
1117   }
1118 
1119   if (IsEmpty($ComplexityParametersMap{FunctionalClassesToUse})) {
1120     my($FunctionalClassesToUse);
1121 
1122     if ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) {
1123       $FunctionalClassesToUse = "HBD,HBA,PI,NI,H";
1124     }
1125     elsif ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) {
1126       $FunctionalClassesToUse = "HBD,HBA,PI,NI,H,Ar";
1127     }
1128     else {
1129       $FunctionalClassesToUse = "HBD,HBA,PI,NI,H,Ar,Hal";
1130     }
1131     $ComplexityParametersMap{FunctionalClassesToUse} = $FunctionalClassesToUse;
1132   }
1133 
1134   my(@AtomicInvariantsToUse);
1135   @AtomicInvariantsToUse = split ',', $ComplexityParametersMap{AtomicInvariantsToUse};
1136   $ComplexityParametersMap{AtomicInvariantsToUse} = \@AtomicInvariantsToUse;
1137 
1138   my(@FunctionalClassesToUse);
1139   @FunctionalClassesToUse = split ',', $ComplexityParametersMap{FunctionalClassesToUse};
1140   $ComplexityParametersMap{FunctionalClassesToUse} = \@FunctionalClassesToUse;
1141 
1142   if (IsEmpty($ComplexityParametersMap{UseTriangleInequality})) {
1143     $ComplexityParametersMap{UseTriangleInequality} = 0;
1144     if ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) {
1145       $ComplexityParametersMap{UseTriangleInequality} = 1;
1146     }
1147   }
1148 
1149   if ($MolecularComplexityType =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
1150     if ($ComplexityParametersMap{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
1151       die "Error: The parameter value specified for AtomIdentifierType, $ComplexityParametersMap{AtomIdentifierType}, in option \"--MolecularComplexity\" is not valid for MolecularComplexityType, $MolecularComplexityType: Allowed value: FunctionalClassAtomTypes...\n";
1152     }
1153   }
1154 
1155   # Set up approprate paremeter names for specified molecular complexity...
1156 
1157   @ParameterNames = ();
1158   push @ParameterNames, 'MolecularComplexityType';
1159 
1160   @AtomIdentifierTypeParameters = ();
1161   push @AtomIdentifierTypeParameters, 'AtomIdentifierType';
1162   if ($ComplexityParametersMap{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
1163     push @AtomIdentifierTypeParameters, 'AtomicInvariantsToUse';
1164   }
1165   elsif ($ComplexityParametersMap{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
1166     push @AtomIdentifierTypeParameters, 'FunctionalClassesToUse';
1167   }
1168 
1169   COMPLEXITYTYPE: {
1170     if ($MolecularComplexityType =~ /^AtomTypesFingerprints$/i) {
1171       push @ParameterNames, @AtomIdentifierTypeParameters;
1172       last COMPLEXITYTYPE;
1173     }
1174     if ($MolecularComplexityType =~ /^ExtendedConnectivityFingerprints$/i) {
1175       push @ParameterNames, @AtomIdentifierTypeParameters;
1176       push @ParameterNames, ('NeighborhoodRadius', 'NormalizationMethodology');
1177       last COMPLEXITYTYPE;
1178     }
1179     if ($MolecularComplexityType =~ /^MACCSKeys$/i) {
1180       push @ParameterNames, 'MACCSKeysSize';
1181       last COMPLEXITYTYPE;
1182     }
1183     if ($MolecularComplexityType =~ /^PathLengthFingerprints$/i) {
1184       push @ParameterNames, @AtomIdentifierTypeParameters;
1185       push @ParameterNames, ('MinPathLength', 'MaxPathLength', 'UseBondSymbols');
1186       last COMPLEXITYTYPE;
1187     }
1188     if ($MolecularComplexityType =~ /^TopologicalAtomPairsFingerprints$/i) {
1189       push @ParameterNames, @AtomIdentifierTypeParameters;
1190       push @ParameterNames, ('MinDistance', 'MaxDistance');
1191       last COMPLEXITYTYPE;
1192     }
1193     if ($MolecularComplexityType =~ /^TopologicalAtomTripletsFingerprints$/i) {
1194       push @ParameterNames, @AtomIdentifierTypeParameters;
1195       push @ParameterNames, ('MinDistance', 'MaxDistance', 'UseTriangleInequality');
1196       last COMPLEXITYTYPE;
1197     }
1198     if ($MolecularComplexityType =~ /^TopologicalAtomTorsionsFingerprints$/i) {
1199       push @ParameterNames, @AtomIdentifierTypeParameters;
1200       last COMPLEXITYTYPE;
1201     }
1202     if ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) {
1203       push @ParameterNames, ('AtomIdentifierType', 'FunctionalClassesToUse', 'MinDistance', 'MaxDistance', 'NormalizationMethodology');
1204       last COMPLEXITYTYPE;
1205     }
1206     if ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) {
1207       push @ParameterNames, ('AtomIdentifierType', 'FunctionalClassesToUse', 'MinDistance', 'MaxDistance', 'UseTriangleInequality', 'NormalizationMethodology', 'DistanceBinSize');
1208       last COMPLEXITYTYPE;
1209     }
1210     die "Error: The parameter value specified, $ParameterValue, for parameter name MolecularComplexityType using \"--MolecularComplexity\" is not valid.\n";
1211   }
1212 
1213   $OptionsInfo{MolecularComplexity} = $Options{molecularcomplexity};
1214 
1215   %{$OptionsInfo{MolecularComplexityParametersMap}} = ();
1216   for $ParameterName (@ParameterNames) {
1217     $ParameterValue = $ComplexityParametersMap{$ParameterName};
1218     $OptionsInfo{MolecularComplexityParametersMap}{$ParameterName} = $ParameterValue;
1219   }
1220 }
1221 
1222 # Setup script usage  and retrieve command line arguments specified using various options...
1223 sub SetupScriptUsage {
1224 
1225   # Retrieve all the options...
1226   %Options = ();
1227 
1228   $Options{compoundidmode} = 'LabelPrefix';
1229   $Options{compoundidlabel} = 'CompoundID';
1230   $Options{datafieldsmode} = 'CompoundID';
1231 
1232   $Options{filter} = 'Yes';
1233 
1234   $Options{hydrogenbonds} = 'HBondsType2';
1235 
1236   $Options{keeplargestcomponent} = 'Yes';
1237 
1238   # Default mode values are set later...
1239   $Options{mode} = '';
1240 
1241   # Default moelcular complexity values are set later...
1242   $Options{molecularcomplexity} = '';
1243 
1244   # Default precision values are set later...
1245   $Options{precision} = '';
1246 
1247   $Options{output} = 'text';
1248   $Options{outdelim} = 'comma';
1249   $Options{quote} = 'yes';
1250 
1251   # Default rotatable bond parameter values are set later...
1252   $Options{rotatablebonds} = '';
1253 
1254   $Options{ruleof3violations} = 'No';
1255   $Options{ruleof5violations} = 'No';
1256 
1257   # Default TPSA paramater values are set later...
1258   $Options{tpsa} = '';
1259 
1260   if (!GetOptions(\%Options, "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "filter|f=s", "help|h", "hydrogenbonds=s", "keeplargestcomponent|k=s", "mode|m=s", "molecularcomplexity=s", "outdelim=s", "output=s", "overwrite|o", "precision=s", "rotatablebonds=s", "ruleof3violations=s", "ruleof5violations=s", "quote|q=s", "root|r=s", "tpsa=s", "workingdir|w=s")) {
1261     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
1262   }
1263   if ($Options{workingdir}) {
1264     if (! -d $Options{workingdir}) {
1265       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
1266     }
1267     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
1268   }
1269   if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
1270     die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
1271   }
1272   if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) {
1273     die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n";
1274   }
1275   if ($Options{filter} !~ /^(Yes|No)$/i) {
1276     die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n";
1277   }
1278   if ($Options{hydrogenbonds} !~ /^(HBondsType1|HydrogenBondsType1|HBondsType2|HydrogenBondsType2)$/i) {
1279     die "Error: The value specified, $Options{hydrogenbonds}, for option \"--HydrogenBonds\" is not valid. Allowed values: HBondsType1, HydrogenBondsType1, HBondsType2, HydrogenBondsType2\n";
1280   }
1281   if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) {
1282     die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n";
1283   }
1284   if ($Options{output} !~ /^(SD|text|both)$/i) {
1285     die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, text, or both\n";
1286   }
1287   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
1288     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
1289   }
1290   if ($Options{quote} !~ /^(Yes|No)$/i) {
1291     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
1292   }
1293   if ($Options{ruleof3violations} !~ /^(Yes|No)$/i) {
1294     die "Error: The value specified, $Options{ruleof3violations}, for option \"--RuleOf3Violations\" is not valid. Allowed values: Yes or No\n";
1295   }
1296   if ($Options{ruleof5violations} !~ /^(Yes|No)$/i) {
1297     die "Error: The value specified, $Options{ruleof5violations}, for option \"--RuleOf5Violations\" is not valid. Allowed values: Yes or No\n";
1298   }
1299 }
1300