MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # File: PathLengthFingerprints.pl
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use FindBin; use lib "$FindBin::Bin/../lib";
  28 use Getopt::Long;
  29 use File::Basename;
  30 use Text::ParseWords;
  31 use Benchmark;
  32 use FileUtil;
  33 use TextUtil;
  34 use SDFileUtil;
  35 use MoleculeFileIO;
  36 use FileIO::FingerprintsSDFileIO;
  37 use FileIO::FingerprintsTextFileIO;
  38 use FileIO::FingerprintsFPFileIO;
  39 use AtomTypes::AtomicInvariantsAtomTypes;
  40 use AtomTypes::FunctionalClassAtomTypes;
  41 use Fingerprints::PathLengthFingerprints;
  42 
  43 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  44 
  45 # Autoflush STDOUT
  46 $| = 1;
  47 
  48 # Starting message...
  49 $ScriptName = basename($0);
  50 print "\n$ScriptName: Starting...\n\n";
  51 $StartTime = new Benchmark;
  52 
  53 # Get the options and setup script...
  54 SetupScriptUsage();
  55 if ($Options{help} || @ARGV < 1) {
  56   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  57 }
  58 
  59 my(@SDFilesList);
  60 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
  61 
  62 # Process options...
  63 print "Processing options...\n";
  64 my(%OptionsInfo);
  65 ProcessOptions();
  66 
  67 # Setup information about input files...
  68 print "Checking input SD file(s)...\n";
  69 my(%SDFilesInfo);
  70 RetrieveSDFilesInfo();
  71 
  72 # Process input files..
  73 my($FileIndex);
  74 if (@SDFilesList > 1) {
  75   print "\nProcessing SD files...\n";
  76 }
  77 for $FileIndex (0 .. $#SDFilesList) {
  78   if ($SDFilesInfo{FileOkay}[$FileIndex]) {
  79     print "\nProcessing file $SDFilesList[$FileIndex]...\n";
  80     GeneratePathLengthFingerprints($FileIndex);
  81   }
  82 }
  83 print "\n$ScriptName:Done...\n\n";
  84 
  85 $EndTime = new Benchmark;
  86 $TotalTime = timediff ($EndTime, $StartTime);
  87 print "Total time: ", timestr($TotalTime), "\n";
  88 
  89 ###############################################################################
  90 
  91 # Generate fingerprints for a SD file...
  92 #
  93 sub GeneratePathLengthFingerprints {
  94   my($FileIndex) = @_;
  95   my($CmpdCount, $IgnoredCmpdCount, $SDFile, $MoleculeFileIO, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
  96 
  97   $SDFile = $SDFilesList[$FileIndex];
  98 
  99   # Setup output files...
 100   #
 101   ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = SetupAndOpenOutputFiles($FileIndex);
 102 
 103   $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile);
 104   $MoleculeFileIO->Open();
 105 
 106   $CmpdCount = 0;
 107   $IgnoredCmpdCount = 0;
 108 
 109   COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) {
 110     $CmpdCount++;
 111 
 112     # Filter compound data before calculating fingerprints...
 113     if ($OptionsInfo{Filter}) {
 114       if (CheckAndFilterCompound($CmpdCount, $Molecule)) {
 115         $IgnoredCmpdCount++;
 116         next COMPOUND;
 117       }
 118     }
 119 
 120     $PathLengthFingerprints = GenerateMoleculeFingerprints($Molecule);
 121     if (!$PathLengthFingerprints) {
 122       $IgnoredCmpdCount++;
 123       ProcessIgnoredCompound('FingerprintsGenerationFailed', $CmpdCount, $Molecule);
 124       next COMPOUND;
 125     }
 126 
 127     WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
 128   }
 129   $MoleculeFileIO->Close();
 130 
 131   if ($NewFPSDFileIO) {
 132     $NewFPSDFileIO->Close();
 133   }
 134   if ($NewFPTextFileIO) {
 135     $NewFPTextFileIO->Close();
 136   }
 137   if ($NewFPFileIO) {
 138     $NewFPFileIO->Close();
 139   }
 140 
 141   WriteFingerprintsGenerationSummaryStatistics($CmpdCount, $IgnoredCmpdCount);
 142 }
 143 
 144 # Process compound being ignored due to problems in fingerprints geneation...
 145 #
 146 sub ProcessIgnoredCompound {
 147   my($Mode, $CmpdCount, $Molecule) = @_;
 148   my($CmpdID, $DataFieldLabelAndValuesRef);
 149 
 150   $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 151   $CmpdID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 152 
 153   MODE: {
 154     if ($Mode =~ /^ContainsNonElementalData$/i) {
 155       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n";
 156       next MODE;
 157     }
 158 
 159     if ($Mode =~ /^ContainsNoElementalData$/i) {
 160       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n";
 161       next MODE;
 162     }
 163 
 164     if ($Mode =~ /^FingerprintsGenerationFailed$/i) {
 165       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
 166       next MODE;
 167     }
 168     warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
 169   }
 170 }
 171 
 172 # Check and filter compounds....
 173 #
 174 sub CheckAndFilterCompound {
 175   my($CmpdCount, $Molecule) = @_;
 176   my($ElementCount, $NonElementCount);
 177 
 178   ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements();
 179 
 180   if ($NonElementCount) {
 181     ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule);
 182     return 1;
 183   }
 184 
 185   if (!$ElementCount) {
 186     ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule);
 187     return 1;
 188   }
 189 
 190   return 0;
 191 }
 192 
 193 # Write out compounds fingerprints generation summary statistics...
 194 #
 195 sub WriteFingerprintsGenerationSummaryStatistics {
 196   my($CmpdCount, $IgnoredCmpdCount) = @_;
 197   my($ProcessedCmpdCount);
 198 
 199   $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount;
 200 
 201   print "\nNumber of compounds: $CmpdCount\n";
 202   print "Number of compounds processed successfully during fingerprints generation: $ProcessedCmpdCount\n";
 203   print "Number of compounds ignored during fingerprints generation: $IgnoredCmpdCount\n";
 204 }
 205 
 206 # Open output files...
 207 #
 208 sub SetupAndOpenOutputFiles {
 209   my($FileIndex) = @_;
 210   my($NewFPSDFile, $NewFPFile, $NewFPTextFile, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, %FingerprintsFileIOParams);
 211 
 212   ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3;
 213 
 214   # Setup common parameters for fingerprints file IO objects...
 215   #
 216   %FingerprintsFileIOParams = ();
 217   if ($OptionsInfo{Mode} =~ /^PathLengthBits$/i) {
 218     %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsBitVectorString', 'BitStringFormat' => $OptionsInfo{BitStringFormat}, 'BitsOrder' => $OptionsInfo{BitsOrder});
 219   }
 220   elsif ($OptionsInfo{Mode} =~ /^PathLengthCount$/i) {
 221     %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsVectorString', 'VectorStringFormat' => $OptionsInfo{VectorStringFormat});
 222   }
 223 
 224   if ($OptionsInfo{SDOutput}) {
 225     $NewFPSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex];
 226     print "Generating SD file $NewFPSDFile...\n";
 227     $NewFPSDFileIO = new FileIO::FingerprintsSDFileIO('Name' => $NewFPSDFile, %FingerprintsFileIOParams, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsLabel});
 228     $NewFPSDFileIO->Open();
 229   }
 230 
 231   if ($OptionsInfo{FPOutput}) {
 232     $NewFPFile = $SDFilesInfo{FPOutFileNames}[$FileIndex];
 233     print "Generating FP file $NewFPFile...\n";
 234     $NewFPFileIO = new FileIO::FingerprintsFPFileIO('Name' => $NewFPFile, %FingerprintsFileIOParams);
 235     $NewFPFileIO->Open();
 236   }
 237 
 238   if ($OptionsInfo{TextOutput}) {
 239     my($ColLabelsRef);
 240 
 241     $NewFPTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex];
 242     $ColLabelsRef = SetupFPTextFileCoulmnLabels($FileIndex);
 243 
 244     print "Generating text file $NewFPTextFile...\n";
 245     $NewFPTextFileIO = new FileIO::FingerprintsTextFileIO('Name' => $NewFPTextFile, %FingerprintsFileIOParams, 'DataColLabels' => $ColLabelsRef, 'OutDelim' => $OptionsInfo{OutDelim}, 'OutQuote' => $OptionsInfo{OutQuote});
 246     $NewFPTextFileIO->Open();
 247   }
 248 
 249   return ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
 250 }
 251 
 252 # Write fingerpritns and other data to appropriate output files...
 253 #
 254 sub WriteDataToOutputFiles {
 255   my($FileIndex, $CmpdCount, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = @_;
 256   my($DataFieldLabelAndValuesRef);
 257 
 258   $DataFieldLabelAndValuesRef = undef;
 259   if ($NewFPTextFileIO || $NewFPFileIO) {
 260     $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 261   }
 262 
 263   if ($NewFPSDFileIO) {
 264     my($CmpdString);
 265 
 266     $CmpdString = $Molecule->GetInputMoleculeString();
 267     $NewFPSDFileIO->WriteFingerprints($PathLengthFingerprints, $CmpdString);
 268   }
 269 
 270   if ($NewFPTextFileIO) {
 271     my($ColValuesRef);
 272 
 273     $ColValuesRef = SetupFPTextFileCoulmnValues($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 274     $NewFPTextFileIO->WriteFingerprints($PathLengthFingerprints, $ColValuesRef);
 275   }
 276 
 277   if ($NewFPFileIO) {
 278     my($CompoundID);
 279 
 280     $CompoundID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 281     $NewFPFileIO->WriteFingerprints($PathLengthFingerprints, $CompoundID);
 282   }
 283 }
 284 
 285 # Generate approriate column labels for FPText output file...
 286 #
 287 sub SetupFPTextFileCoulmnLabels {
 288   my($FileIndex) = @_;
 289   my($Line, @ColLabels);
 290 
 291   @ColLabels = ();
 292   if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
 293     push @ColLabels, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
 294   }
 295   elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
 296     push @ColLabels, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
 297   }
 298   elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
 299     push @ColLabels, @{$OptionsInfo{SpecifiedDataFields}};
 300   }
 301   elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
 302     push @ColLabels, $OptionsInfo{CompoundIDLabel};
 303   }
 304   # Add fingerprints label...
 305   push @ColLabels, $OptionsInfo{FingerprintsLabel};
 306 
 307   return \@ColLabels;
 308 }
 309 
 310 # Generate column values FPText output file..
 311 #
 312 sub SetupFPTextFileCoulmnValues {
 313   my($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
 314   my(@ColValues);
 315 
 316   @ColValues = ();
 317   if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
 318     push @ColValues, SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 319   }
 320   elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
 321     @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
 322   }
 323   elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
 324     @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
 325   }
 326   elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
 327     @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}};
 328   }
 329 
 330   return \@ColValues;
 331 }
 332 
 333 # Generate compound ID for FP and FPText output files..
 334 #
 335 sub SetupCmpdIDForOutputFiles {
 336   my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
 337   my($CmpdID);
 338 
 339   $CmpdID = '';
 340   if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
 341     my($MolName);
 342     $MolName = $Molecule->GetName();
 343     $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}";
 344   }
 345   elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) {
 346     $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}";
 347   }
 348   elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) {
 349     my($SpecifiedDataField);
 350     $SpecifiedDataField = $OptionsInfo{CompoundID};
 351     $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : '';
 352   }
 353   elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) {
 354     $CmpdID = $Molecule->GetName();
 355   }
 356   return $CmpdID;
 357 }
 358 
 359 # Generate fingerprints for molecule...
 360 #
 361 sub GenerateMoleculeFingerprints {
 362   my($Molecule) = @_;
 363   my($PathLengthFingerprints);
 364 
 365   if ($OptionsInfo{KeepLargestComponent}) {
 366     $Molecule->KeepLargestComponent();
 367   }
 368   if ($OptionsInfo{IgnoreHydrogens}) {
 369     $Molecule->DeleteHydrogens();
 370   }
 371 
 372   if ($OptionsInfo{DetectAromaticity}) {
 373     if (!$Molecule->DetectRings()) {
 374       return undef;
 375     }
 376     $Molecule->SetAromaticityModel($OptionsInfo{AromaticityModel});
 377     $Molecule->DetectAromaticity();
 378   }
 379 
 380   $PathLengthFingerprints = undef;
 381   if ($OptionsInfo{Mode} =~ /^PathLengthBits$/i) {
 382     $PathLengthFingerprints = GeneratePathLengthBitsFingerprints($Molecule);
 383   }
 384   elsif ($OptionsInfo{Mode} =~ /^PathLengthCount$/i) {
 385     $PathLengthFingerprints = GeneratePathLengthCountFingerprints($Molecule);
 386   }
 387   else {
 388     die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: PathLengthBits or PathLengthCount\n";
 389   }
 390 
 391   return $PathLengthFingerprints;
 392 }
 393 
 394 # Generate pathlength bits finerprints for molecule...
 395 #
 396 sub GeneratePathLengthBitsFingerprints {
 397   my($Molecule) = @_;
 398   my($PathLengthFingerprints);
 399 
 400   $PathLengthFingerprints = new Fingerprints::PathLengthFingerprints('Molecule' => $Molecule, 'Type' => 'PathLengthBits', 'AtomIdentifierType' => $OptionsInfo{AtomIdentifierType}, 'NumOfBitsToSetPerPath' => $OptionsInfo{NumOfBitsToSetPerPath}, 'Size' => $OptionsInfo{Size}, 'MinLength' => $OptionsInfo{MinPathLength}, 'MaxLength' => $OptionsInfo{MaxPathLength}, 'AllowRings' => $OptionsInfo{AllowRings}, 'AllowSharedBonds' => $OptionsInfo{AllowSharedBonds}, 'UseBondSymbols' => $OptionsInfo{UseBondSymbols}, 'UseUniquePaths' => $OptionsInfo{UseUniquePaths}, 'UsePerlCoreRandom' => $OptionsInfo{UsePerlCoreRandom});
 401 
 402   # Set atom identifier type...
 403   SetAtomIdentifierTypeValuesToUse($PathLengthFingerprints);
 404 
 405   # Generate fingerprints...
 406   $PathLengthFingerprints->GenerateFingerprints();
 407 
 408   # Make sure fingerprints generation is successful...
 409   if (!$PathLengthFingerprints->IsFingerprintsGenerationSuccessful()) {
 410     return undef;
 411   }
 412 
 413   if ($OptionsInfo{Fold}) {
 414     my($CheckSizeValue) = 0;
 415     $PathLengthFingerprints->FoldFingerprintsBySize($OptionsInfo{FoldedSize}, $CheckSizeValue);
 416   }
 417 
 418   return $PathLengthFingerprints;
 419 }
 420 
 421 # Generate pathlength count finerprints for molecule...
 422 #
 423 sub GeneratePathLengthCountFingerprints {
 424   my($Molecule) = @_;
 425   my($PathLengthFingerprints);
 426 
 427   $PathLengthFingerprints = new Fingerprints::PathLengthFingerprints('Molecule' => $Molecule, 'Type' => 'PathLengthCount', 'AtomIdentifierType' => $OptionsInfo{AtomIdentifierType}, 'MinLength' => $OptionsInfo{MinPathLength}, 'MaxLength' => $OptionsInfo{MaxPathLength}, 'AllowRings' => $OptionsInfo{AllowRings}, 'AllowSharedBonds' => $OptionsInfo{AllowSharedBonds}, 'UseBondSymbols' => $OptionsInfo{UseBondSymbols}, 'UseUniquePaths' => $OptionsInfo{UseUniquePaths});
 428 
 429   # Set atom identifier type...
 430   SetAtomIdentifierTypeValuesToUse($PathLengthFingerprints);
 431 
 432   # Generate fingerprints...
 433   $PathLengthFingerprints->GenerateFingerprints();
 434 
 435   # Make sure fingerprints generation is successful...
 436   if (!$PathLengthFingerprints->IsFingerprintsGenerationSuccessful()) {
 437     return undef;
 438   }
 439   return $PathLengthFingerprints;
 440 }
 441 
 442 # Set atom identifier type to use for generating path strings...
 443 #
 444 sub SetAtomIdentifierTypeValuesToUse {
 445   my($PathLengthFingerprints) = @_;
 446 
 447   if ($OptionsInfo{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 448     $PathLengthFingerprints->SetAtomicInvariantsToUse(\@{$OptionsInfo{AtomicInvariantsToUse}});
 449   }
 450   elsif ($OptionsInfo{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 451     $PathLengthFingerprints->SetFunctionalClassesToUse(\@{$OptionsInfo{FunctionalClassesToUse}});
 452   }
 453   elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 454     # Nothing to do for now...
 455   }
 456   else {
 457     die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
 458   }
 459 }
 460 
 461 # Retrieve information about SD files...
 462 #
 463 sub RetrieveSDFilesInfo {
 464   my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $FPOutFileExt, $NewSDFileName, $NewFPFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef);
 465 
 466   %SDFilesInfo = ();
 467   @{$SDFilesInfo{FileOkay}} = ();
 468   @{$SDFilesInfo{OutFileRoot}} = ();
 469   @{$SDFilesInfo{SDOutFileNames}} = ();
 470   @{$SDFilesInfo{FPOutFileNames}} = ();
 471   @{$SDFilesInfo{TextOutFileNames}} = ();
 472   @{$SDFilesInfo{AllDataFieldsRef}} = ();
 473   @{$SDFilesInfo{CommonDataFieldsRef}} = ();
 474 
 475   $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0;
 476   $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0;
 477 
 478   FILELIST: for $Index (0 .. $#SDFilesList) {
 479     $SDFile = $SDFilesList[$Index];
 480 
 481     $SDFilesInfo{FileOkay}[$Index] = 0;
 482     $SDFilesInfo{OutFileRoot}[$Index] = '';
 483     $SDFilesInfo{SDOutFileNames}[$Index] = '';
 484     $SDFilesInfo{FPOutFileNames}[$Index] = '';
 485     $SDFilesInfo{TextOutFileNames}[$Index] = '';
 486 
 487     $SDFile = $SDFilesList[$Index];
 488     if (!(-e $SDFile)) {
 489       warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
 490       next FILELIST;
 491     }
 492     if (!CheckFileType($SDFile, "sd sdf")) {
 493       warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
 494       next FILELIST;
 495     }
 496 
 497     if ($CheckDataField) {
 498       # Make sure data field exists in SD file..
 499       my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues);
 500 
 501       @CmpdLines = ();
 502       open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
 503       $CmpdString = ReadCmpdString(\*SDFILE);
 504       close SDFILE;
 505       @CmpdLines = split "\n", $CmpdString;
 506       %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
 507       $SpecifiedDataField = $OptionsInfo{CompoundID};
 508       if (!exists $DataFieldValues{$SpecifiedDataField}) {
 509         warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using  \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n";
 510         next FILELIST;
 511       }
 512     }
 513 
 514     $AllDataFieldsRef = '';
 515     $CommonDataFieldsRef = '';
 516     if ($CollectDataFields) {
 517       my($CmpdCount);
 518       open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
 519       ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
 520       close SDFILE;
 521     }
 522 
 523     # Setup output file names...
 524     $FileDir = ""; $FileName = ""; $FileExt = "";
 525     ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
 526 
 527     $TextOutFileExt = "csv";
 528     if ($Options{outdelim} =~ /^tab$/i) {
 529       $TextOutFileExt = "tsv";
 530     }
 531     $SDOutFileExt = $FileExt;
 532     $FPOutFileExt = "fpf";
 533 
 534     if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
 535       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 536       if ($RootFileName && $RootFileExt) {
 537         $FileName = $RootFileName;
 538       }
 539       else {
 540         $FileName = $OptionsInfo{OutFileRoot};
 541       }
 542       $OutFileRoot = $FileName;
 543     }
 544     else {
 545       $OutFileRoot = "${FileName}PathLengthFP";
 546     }
 547 
 548     $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}";
 549     $NewFPFileName = "${OutFileRoot}.${FPOutFileExt}";
 550     $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}";
 551 
 552     if ($OptionsInfo{SDOutput}) {
 553       if ($SDFile =~ /$NewSDFileName/i) {
 554         warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
 555         print "Specify a different name using \"-r --root\" option or use default name.\n";
 556         next FILELIST;
 557       }
 558     }
 559 
 560     if (!$OptionsInfo{OverwriteFiles}) {
 561       # Check SD, FP and text outout files...
 562       if ($OptionsInfo{SDOutput}) {
 563         if (-e $NewSDFileName) {
 564           warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n";
 565           next FILELIST;
 566         }
 567       }
 568       if ($OptionsInfo{FPOutput}) {
 569         if (-e $NewFPFileName) {
 570           warn "Warning: Ignoring file $SDFile: The file $NewFPFileName already exists\n";
 571           next FILELIST;
 572         }
 573       }
 574       if ($OptionsInfo{TextOutput}) {
 575         if (-e $NewTextFileName) {
 576           warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n";
 577           next FILELIST;
 578         }
 579       }
 580     }
 581 
 582     $SDFilesInfo{FileOkay}[$Index] = 1;
 583 
 584     $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 585     $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName;
 586     $SDFilesInfo{FPOutFileNames}[$Index] = $NewFPFileName;
 587     $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName;
 588 
 589     $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef;
 590     $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef;
 591   }
 592 }
 593 
 594 # Process option values...
 595 sub ProcessOptions {
 596   %OptionsInfo = ();
 597 
 598   $OptionsInfo{Mode} = $Options{mode};
 599   $OptionsInfo{AromaticityModel} = $Options{aromaticitymodel};
 600   $OptionsInfo{PathMode} = $Options{pathmode};
 601 
 602   ProcessAtomIdentifierTypeOptions();
 603 
 604   $OptionsInfo{BitsOrder} = $Options{bitsorder};
 605   $OptionsInfo{BitStringFormat} = $Options{bitstringformat};
 606 
 607   $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
 608   $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel};
 609   $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode};
 610 
 611   my(@SpecifiedDataFields);
 612   @SpecifiedDataFields = ();
 613 
 614   @{$OptionsInfo{SpecifiedDataFields}} = ();
 615   $OptionsInfo{CompoundID} = '';
 616 
 617   if ($Options{datafieldsmode} =~ /^CompoundID$/i) {
 618     if ($Options{compoundidmode} =~ /^DataField$/i) {
 619       if (!$Options{compoundid}) {
 620         die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n";
 621       }
 622       $OptionsInfo{CompoundID} = $Options{compoundid};
 623     }
 624     elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) {
 625       $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd';
 626     }
 627   }
 628   elsif ($Options{datafieldsmode} =~ /^Specify$/i) {
 629     if (!$Options{datafields}) {
 630       die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n";
 631     }
 632     @SpecifiedDataFields = split /\,/, $Options{datafields};
 633     push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields;
 634   }
 635 
 636   if ($Options{atomidentifiertype} !~ /^AtomicInvariantsAtomTypes$/i) {
 637     if ($Options{detectaromaticity} =~ /^No$/i) {
 638       die "Error: The value specified, $Options{detectaromaticity}, for option \"--DetectAromaticity\" is not valid. No value is only allowed during AtomicInvariantsAtomTypes value for \"-a, --AtomIdentifierType\" \n";
 639     }
 640   }
 641   $OptionsInfo{DetectAromaticity} = ($Options{detectaromaticity} =~ /^Yes$/i) ? 1 : 0;
 642 
 643   $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0;
 644 
 645   $OptionsInfo{FingerprintsLabel} = $Options{fingerprintslabel} ? $Options{fingerprintslabel} : 'PathLengthFingerprints';
 646 
 647   my($Size, $MinSize, $MaxSize);
 648   $MinSize = 32;
 649   $MaxSize = 2**32;
 650   $Size = $Options{size};
 651   if (!(IsPositiveInteger($Size) && $Size >= $MinSize && $Size <= $MaxSize && IsNumberPowerOfNumber($Size, 2))) {
 652     die "Error: Invalid size value, $Size, for \"-s, --size\" option. Allowed values: power of 2, >= minimum size of $MinSize, and <= maximum size of $MaxSize.\n";
 653   }
 654   $OptionsInfo{Size} = $Size;
 655 
 656   $OptionsInfo{Fold} = ($Options{fold} =~ /^Yes$/i) ? 1 : 0;
 657   my($FoldedSize);
 658   $FoldedSize = $Options{foldedsize};
 659   if ($Options{fold} =~ /^Yes$/i) {
 660     if (!(IsPositiveInteger($FoldedSize) && $FoldedSize < $Size && IsNumberPowerOfNumber($FoldedSize, 2))) {
 661       die "Error: Invalid folded size value, $FoldedSize, for \"--FoldedSize\" option. Allowed values: power of 2, >= minimum size of $MinSize, and < size value of $Size.\n";
 662     }
 663   }
 664   $OptionsInfo{FoldedSize} = $FoldedSize;
 665 
 666   $OptionsInfo{IgnoreHydrogens} = ($Options{ignorehydrogens} =~ /^Yes$/i) ? 1 : 0;
 667   $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0;
 668 
 669   my($MinPathLength, $MaxPathLength);
 670   $MinPathLength = $Options{minpathlength};
 671   $MaxPathLength = $Options{maxpathlength};
 672   if (!IsPositiveInteger($MinPathLength)) {
 673     die "Error: Invalid path length value, $MinPathLength, for \"--MinPathLength\" option. Allowed values: > 0\n";
 674   }
 675   if (!IsPositiveInteger($MaxPathLength)) {
 676     die "Error: Invalid path length value, $MaxPathLength, for \"--MinPathLength\" option. Allowed values: > 0\n";
 677   }
 678   if ($MinPathLength >= $MaxPathLength) {
 679     die "Error: Invalid minimum and maximum path length values, $MinPathLength and $MaxPathLength, for \"--MinPathLength\"  and \"--MaxPathLength\"options. Allowed values: minimum path length value must be smaller than maximum path length value.\n";
 680   }
 681   $OptionsInfo{MinPathLength} = $MinPathLength;
 682   $OptionsInfo{MaxPathLength} = $MaxPathLength;
 683 
 684   my($NumOfBitsToSetPerPath);
 685   $NumOfBitsToSetPerPath = $Options{numofbitstosetperpath};
 686   if (!IsPositiveInteger($MaxPathLength)) {
 687     die "Error: Invalid  value, $NumOfBitsToSetPerPath, for \"-n, --NumOfBitsToSetPerPath\" option. Allowed values: > 0\n";
 688   }
 689   if ($NumOfBitsToSetPerPath >= $Size) {
 690     die "Error: Invalid  value, $NumOfBitsToSetPerPath, for \"-n, --NumOfBitsToSetPerPath\" option. Allowed values: It must be less than the size, $Size, of the fingerprint bit-string.\n";
 691   }
 692   $OptionsInfo{NumOfBitsToSetPerPath} = $NumOfBitsToSetPerPath;
 693 
 694   $OptionsInfo{Output} = $Options{output};
 695   $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|All)$/i) ? 1 : 0;
 696   $OptionsInfo{FPOutput} = ($Options{output} =~ /^(FP|All)$/i) ? 1 : 0;
 697   $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|All)$/i) ? 1 : 0;
 698 
 699   $OptionsInfo{OutDelim} = $Options{outdelim};
 700   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
 701 
 702   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 703   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 704 
 705   $OptionsInfo{UseBondSymbols} = ($Options{usebondsymbols} =~ /^Yes$/i) ? 1 : 0;
 706 
 707   $OptionsInfo{UsePerlCoreRandom} = ($Options{useperlcorerandom} =~ /^Yes$/i) ? 1 : 0;
 708 
 709   $OptionsInfo{UseUniquePaths} = ($Options{useuniquepaths} =~ /^Yes$/i) ? 1 : 0;
 710 
 711   $OptionsInfo{VectorStringFormat} = $Options{vectorstringformat};
 712 
 713   # Setup parameters used during generation of fingerprints by PathLengthFingerprints class...
 714   my($AllowRings, $AllowSharedBonds);
 715   $AllowRings = 1;
 716   $AllowSharedBonds = 1;
 717   MODE: {
 718     if ($Options{pathmode} =~ /^AtomPathsWithoutRings$/i) { $AllowSharedBonds = 0; $AllowRings = 0; last MODE;}
 719     if ($Options{pathmode} =~ /^AtomPathsWithRings$/i) { $AllowSharedBonds = 0; $AllowRings = 1; last MODE;}
 720     if ($Options{pathmode} =~ /^AllAtomPathsWithoutRings$/i) { $AllowSharedBonds = 1; $AllowRings = 0; last MODE;}
 721     if ($Options{pathmode} =~ /^AllAtomPathsWithRings$/i) { $AllowSharedBonds = 1; $AllowRings = 1; last MODE;}
 722     die "Error: ProcessOptions: mode value, $Options{pathmode}, is not supported.\n";
 723   }
 724   $OptionsInfo{AllowRings} = $AllowRings;
 725   $OptionsInfo{AllowSharedBonds} = $AllowSharedBonds;
 726 }
 727 
 728 # Process atom identifier type and related options...
 729 #
 730 sub ProcessAtomIdentifierTypeOptions {
 731 
 732   $OptionsInfo{AtomIdentifierType} = $Options{atomidentifiertype};
 733 
 734   if ($Options{atomidentifiertype} =~ /^AtomicInvariantsAtomTypes$/i) {
 735     ProcessAtomicInvariantsToUseOption();
 736   }
 737   elsif ($Options{atomidentifiertype} =~ /^FunctionalClassAtomTypes$/i) {
 738     ProcessFunctionalClassesToUse();
 739   }
 740   elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 741     # Nothing to do for now...
 742   }
 743   else {
 744     die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
 745   }
 746 }
 747 
 748 # Process specified atomic invariants to use...
 749 #
 750 sub ProcessAtomicInvariantsToUseOption {
 751   my($AtomicInvariant, $AtomSymbolSpecified, @AtomicInvariantsWords);
 752 
 753   @{$OptionsInfo{AtomicInvariantsToUse}} = ();
 754   if (IsEmpty($Options{atomicinvariantstouse})) {
 755     die "Error: Atomic invariants value specified using \"--AtomicInvariantsToUse\" option is empty\n";
 756   }
 757   $AtomSymbolSpecified = 0;
 758   @AtomicInvariantsWords = split /\,/, $Options{atomicinvariantstouse};
 759   for $AtomicInvariant (@AtomicInvariantsWords) {
 760     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($AtomicInvariant)) {
 761       die "Error: Atomic invariant specified, $AtomicInvariant, using \"--AtomicInvariantsToUse\" option is not valid...\n ";
 762     }
 763     if ($AtomicInvariant =~ /^(AS|AtomSymbol)$/i) {
 764       $AtomSymbolSpecified = 1;
 765     }
 766     push @{$OptionsInfo{AtomicInvariantsToUse}}, $AtomicInvariant;
 767   }
 768   if (!$AtomSymbolSpecified) {
 769     die "Error: Atomic invariant, AS or AtomSymbol, must be specified as using \"--AtomicInvariantsToUse\" option...\n ";
 770   }
 771 }
 772 
 773 # Process specified functional classes invariants to use...
 774 #
 775 sub ProcessFunctionalClassesToUse {
 776   my($FunctionalClass, @FunctionalClassesToUseWords);
 777 
 778   @{$OptionsInfo{FunctionalClassesToUse}} = ();
 779   if (IsEmpty($Options{functionalclassestouse})) {
 780     die "Error: Functional classes value specified using \"--FunctionalClassesToUse\" option is empty\n";
 781   }
 782   @FunctionalClassesToUseWords = split /\,/, $Options{functionalclassestouse};
 783   for $FunctionalClass (@FunctionalClassesToUseWords) {
 784     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($FunctionalClass)) {
 785       die "Error: Functional class specified, $FunctionalClass, using \"--FunctionalClassesToUse\" option is not valid...\n ";
 786     }
 787     push @{$OptionsInfo{FunctionalClassesToUse}}, $FunctionalClass;
 788   }
 789 }
 790 
 791 # Setup script usage  and retrieve command line arguments specified using various options...
 792 sub SetupScriptUsage {
 793 
 794   # Retrieve all the options...
 795   %Options = ();
 796 
 797   $Options{aromaticitymodel} = 'MayaChemToolsAromaticityModel';
 798 
 799   $Options{atomidentifiertype} = 'AtomicInvariantsAtomTypes';
 800   $Options{atomicinvariantstouse} = 'AS';
 801 
 802   $Options{functionalclassestouse} = 'HBD,HBA,PI,NI,Ar,Hal';
 803 
 804   $Options{bitsorder} = 'Ascending';
 805   $Options{bitstringformat} = 'HexadecimalString';
 806 
 807   $Options{compoundidmode} = 'LabelPrefix';
 808   $Options{compoundidlabel} = 'CompoundID';
 809   $Options{datafieldsmode} = 'CompoundID';
 810   $Options{detectaromaticity} = 'Yes';
 811 
 812   $Options{filter} = 'Yes';
 813 
 814   $Options{fold} = 'No';
 815   $Options{foldedsize} = 256;
 816 
 817   $Options{ignorehydrogens} = 'Yes';
 818   $Options{keeplargestcomponent} = 'Yes';
 819 
 820   $Options{mode} = 'PathLengthBits';
 821   $Options{pathmode} = 'AllAtomPathsWithRings';
 822 
 823   $Options{minpathlength} = 1;
 824   $Options{maxpathlength} = 8;
 825 
 826   $Options{numofbitstosetperpath} = 1;
 827 
 828   $Options{output} = 'text';
 829   $Options{outdelim} = 'comma';
 830   $Options{quote} = 'yes';
 831 
 832   $Options{size} = 1024;
 833 
 834   $Options{usebondsymbols} = 'yes';
 835   $Options{useperlcorerandom} = 'yes';
 836   $Options{useuniquepaths} = 'yes';
 837 
 838   $Options{vectorstringformat} = 'IDsAndValuesString';
 839 
 840   if (!GetOptions(\%Options, "aromaticitymodel=s", "atomidentifiertype|a=s", "atomicinvariantstouse=s", "functionalclassestouse=s", "bitsorder=s", "bitstringformat|b=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "detectaromaticity=s", "filter|f=s", "fingerprintslabel=s", "fold=s", "foldedsize=i", "help|h", "ignorehydrogens|i=s", "keeplargestcomponent|k=s", "mode|m=s", "minpathlength=i", "maxpathlength=i", "numofbitstosetperpath|n=i", "outdelim=s", "output=s", "overwrite|o", "pathmode|p=s", "quote|q=s", "root|r=s", "size|s=i", "usebondsymbols|u=s", "useperlcorerandom=s", "useuniquepaths=s", "vectorstringformat|v=s", "workingdir|w=s")) {
 841     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 842   }
 843   if ($Options{workingdir}) {
 844     if (! -d $Options{workingdir}) {
 845       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 846     }
 847     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 848   }
 849   if (!Molecule::IsSupportedAromaticityModel($Options{aromaticitymodel})) {
 850     my(@SupportedModels) = Molecule::GetSupportedAromaticityModels();
 851     die "Error: The value specified, $Options{aromaticitymodel}, for option \"--AromaticityModel\" is not valid. Supported aromaticity models in current release of MayaChemTools: @SupportedModels\n";
 852   }
 853   if ($Options{atomidentifiertype} !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 854     die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
 855   }
 856   if ($Options{bitsorder} !~ /^(Ascending|Descending)$/i) {
 857     die "Error: The value specified, $Options{bitsorder}, for option \"--BitsOrder\" is not valid. Allowed values: Ascending or Descending\n";
 858   }
 859   if ($Options{bitstringformat} !~ /^(BinaryString|HexadecimalString)$/i) {
 860     die "Error: The value specified, $Options{bitstringformat}, for option \"-b, --bitstringformat\" is not valid. Allowed values: BinaryString or HexadecimalString\n";
 861   }
 862   if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
 863     die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
 864   }
 865   if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) {
 866     die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n";
 867   }
 868   if ($Options{detectaromaticity} !~ /^(Yes|No)$/i) {
 869     die "Error: The value specified, $Options{detectaromaticity}, for option \"--DetectAromaticity\" is not valid. Allowed values: Yes or No\n";
 870   }
 871   if ($Options{filter} !~ /^(Yes|No)$/i) {
 872     die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n";
 873   }
 874   if ($Options{fold} !~ /^(Yes|No)$/i) {
 875     die "Error: The value specified, $Options{fold}, for option \"--fold\" is not valid. Allowed values: Yes or No\n";
 876   }
 877   if (!IsPositiveInteger($Options{foldedsize})) {
 878     die "Error: The value specified, $Options{foldedsize}, for option \"--FoldedSize\" is not valid. Allowed values: > 0 \n";
 879   }
 880   if ($Options{ignorehydrogens} !~ /^(Yes|No)$/i) {
 881     die "Error: The value specified, $Options{ignorehydrogens}, for option \"-i, --IgnoreHydrogens\" is not valid. Allowed values: Yes or No\n";
 882   }
 883   if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) {
 884     die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n";
 885   }
 886   if ($Options{mode} !~ /^(PathLengthBits|PathLengthCount)$/i) {
 887     die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: PathLengthBits or PathLengthCount\n";
 888   }
 889   if (!IsPositiveInteger($Options{minpathlength})) {
 890     die "Error: The value specified, $Options{minpathlength}, for option \"--MinPathLength\" is not valid. Allowed values: > 0 \n";
 891   }
 892   if (!IsPositiveInteger($Options{numofbitstosetperpath})) {
 893     die "Error: The value specified, $Options{NumOfBitsToSetPerPath}, for option \"--NumOfBitsToSetPerPath\" is not valid. Allowed values: > 0 \n";
 894   }
 895   if (!IsPositiveInteger($Options{maxpathlength})) {
 896     die "Error: The value specified, $Options{maxpathlength}, for option \"--MaxPathLength\" is not valid. Allowed values: > 0 \n";
 897   }
 898   if ($Options{output} !~ /^(SD|FP|text|all)$/i) {
 899     die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, FP, text, or all\n";
 900   }
 901   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 902     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 903   }
 904   if ($Options{pathmode} !~ /^(AtomPathsWithoutRings|AtomPathsWithRings|AllAtomPathsWithoutRings|AllAtomPathsWithRings)$/i) {
 905     die "Error: The value specified, $Options{pathmode}, for option \"-m, --PathMode\" is not valid. Allowed values: AtomPathsWithoutRings, AtomPathsWithRings, AllAtomPathsWithoutRings or AllAtomPathsWithRings\n";
 906   }
 907   if ($Options{quote} !~ /^(Yes|No)$/i) {
 908     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
 909   }
 910   if ($Options{outdelim} =~ /semicolon/i && $Options{quote} =~ /^No$/i) {
 911     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not allowed with, semicolon value of \"--outdelim\" option: Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n";
 912   }
 913 
 914   if (!IsPositiveInteger($Options{size})) {
 915     die "Error: The value specified, $Options{size}, for option \"-s, --size\" is not valid. Allowed values: > 0 \n";
 916   }
 917   if ($Options{usebondsymbols} !~ /^(Yes|No)$/i) {
 918     die "Error: The value specified, $Options{usebondsymbols}, for option \"-u, --UseBondSymbols\" is not valid. Allowed values: Yes or No\n";
 919   }
 920   if ($Options{useperlcorerandom} !~ /^(Yes|No)$/i) {
 921     die "Error: The value specified, $Options{useperlcorerandom}, for option \"--UsePerlCoreRandom\" is not valid. Allowed values: Yes or No\n";
 922   }
 923   if ($Options{useuniquepaths} !~ /^(Yes|No)$/i) {
 924     die "Error: The value specified, $Options{useuniquepaths}, for option \"--UseUniquePaths\" is not valid. Allowed values: Yes or No\n";
 925   }
 926   if ($Options{vectorstringformat} !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString)$/i) {
 927     die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid. Allowed values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n";
 928   }
 929 }
 930