1 #!/usr/bin/perl -w 2 # 3 # $RCSfile: PathLengthFingerprints.pl,v $ 4 # $Date: 2011/12/27 20:27:03 $ 5 # $Revision: 1.43 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2012 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use FindBin; use lib "$FindBin::Bin/../lib"; 31 use Getopt::Long; 32 use File::Basename; 33 use Text::ParseWords; 34 use Benchmark; 35 use FileUtil; 36 use TextUtil; 37 use SDFileUtil; 38 use MoleculeFileIO; 39 use FileIO::FingerprintsSDFileIO; 40 use FileIO::FingerprintsTextFileIO; 41 use FileIO::FingerprintsFPFileIO; 42 use AtomTypes::AtomicInvariantsAtomTypes; 43 use AtomTypes::FunctionalClassAtomTypes; 44 use Fingerprints::PathLengthFingerprints; 45 46 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 47 48 # Autoflush STDOUT 49 $| = 1; 50 51 # Starting message... 52 $ScriptName = basename($0); 53 print "\n$ScriptName: Starting...\n\n"; 54 $StartTime = new Benchmark; 55 56 # Get the options and setup script... 57 SetupScriptUsage(); 58 if ($Options{help} || @ARGV < 1) { 59 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 60 } 61 62 my(@SDFilesList); 63 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd"); 64 65 # Process options... 66 print "Processing options...\n"; 67 my(%OptionsInfo); 68 ProcessOptions(); 69 70 # Setup information about input files... 71 print "Checking input SD file(s)...\n"; 72 my(%SDFilesInfo); 73 RetrieveSDFilesInfo(); 74 75 # Process input files.. 76 my($FileIndex); 77 if (@SDFilesList > 1) { 78 print "\nProcessing SD files...\n"; 79 } 80 for $FileIndex (0 .. $#SDFilesList) { 81 if ($SDFilesInfo{FileOkay}[$FileIndex]) { 82 print "\nProcessing file $SDFilesList[$FileIndex]...\n"; 83 GeneratePathLengthFingerprints($FileIndex); 84 } 85 } 86 print "\n$ScriptName:Done...\n\n"; 87 88 $EndTime = new Benchmark; 89 $TotalTime = timediff ($EndTime, $StartTime); 90 print "Total time: ", timestr($TotalTime), "\n"; 91 92 ############################################################################### 93 94 # Generate fingerprints for a SD file... 95 # 96 sub GeneratePathLengthFingerprints { 97 my($FileIndex) = @_; 98 my($CmpdCount, $IgnoredCmpdCount, $SDFile, $MoleculeFileIO, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO); 99 100 $SDFile = $SDFilesList[$FileIndex]; 101 102 # Setup output files... 103 # 104 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = SetupAndOpenOutputFiles($FileIndex); 105 106 $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile); 107 $MoleculeFileIO->Open(); 108 109 $CmpdCount = 0; 110 $IgnoredCmpdCount = 0; 111 112 COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) { 113 $CmpdCount++; 114 115 # Filter compound data before calculating fingerprints... 116 if ($OptionsInfo{Filter}) { 117 if (CheckAndFilterCompound($CmpdCount, $Molecule)) { 118 $IgnoredCmpdCount++; 119 next COMPOUND; 120 } 121 } 122 123 $PathLengthFingerprints = GenerateMoleculeFingerprints($Molecule); 124 if (!$PathLengthFingerprints) { 125 $IgnoredCmpdCount++; 126 ProcessIgnoredCompound('FingerprintsGenerationFailed', $CmpdCount, $Molecule); 127 next COMPOUND; 128 } 129 130 WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO); 131 } 132 $MoleculeFileIO->Close(); 133 134 if ($NewFPSDFileIO) { 135 $NewFPSDFileIO->Close(); 136 } 137 if ($NewFPTextFileIO) { 138 $NewFPTextFileIO->Close(); 139 } 140 if ($NewFPFileIO) { 141 $NewFPFileIO->Close(); 142 } 143 144 WriteFingerprintsGenerationSummaryStatistics($CmpdCount, $IgnoredCmpdCount); 145 } 146 147 # Process compound being ignored due to problems in fingerprints geneation... 148 # 149 sub ProcessIgnoredCompound { 150 my($Mode, $CmpdCount, $Molecule) = @_; 151 my($CmpdID, $DataFieldLabelAndValuesRef); 152 153 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); 154 $CmpdID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef); 155 156 MODE: { 157 if ($Mode =~ /^ContainsNonElementalData$/i) { 158 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n"; 159 next MODE; 160 } 161 162 if ($Mode =~ /^ContainsNoElementalData$/i) { 163 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n"; 164 next MODE; 165 } 166 167 if ($Mode =~ /^FingerprintsGenerationFailed$/i) { 168 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n"; 169 next MODE; 170 } 171 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n"; 172 } 173 } 174 175 # Check and filter compounds.... 176 # 177 sub CheckAndFilterCompound { 178 my($CmpdCount, $Molecule) = @_; 179 my($ElementCount, $NonElementCount); 180 181 ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements(); 182 183 if ($NonElementCount) { 184 ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule); 185 return 1; 186 } 187 188 if (!$ElementCount) { 189 ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule); 190 return 1; 191 } 192 193 return 0; 194 } 195 196 # Write out compounds fingerprints generation summary statistics... 197 # 198 sub WriteFingerprintsGenerationSummaryStatistics { 199 my($CmpdCount, $IgnoredCmpdCount) = @_; 200 my($ProcessedCmpdCount); 201 202 $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount; 203 204 print "\nNumber of compounds: $CmpdCount\n"; 205 print "Number of compounds processed successfully during fingerprints generation: $ProcessedCmpdCount\n"; 206 print "Number of compounds ignored during fingerprints generation: $IgnoredCmpdCount\n"; 207 } 208 209 # Open output files... 210 # 211 sub SetupAndOpenOutputFiles { 212 my($FileIndex) = @_; 213 my($NewFPSDFile, $NewFPFile, $NewFPTextFile, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, %FingerprintsFileIOParams); 214 215 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3; 216 217 # Setup common parameters for fingerprints file IO objects... 218 # 219 %FingerprintsFileIOParams = (); 220 if ($OptionsInfo{Mode} =~ /^PathLengthBits$/i) { 221 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsBitVectorString', 'BitStringFormat' => $OptionsInfo{BitStringFormat}, 'BitsOrder' => $OptionsInfo{BitsOrder}); 222 } 223 elsif ($OptionsInfo{Mode} =~ /^PathLengthCount$/i) { 224 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsVectorString', 'VectorStringFormat' => $OptionsInfo{VectorStringFormat}); 225 } 226 227 if ($OptionsInfo{SDOutput}) { 228 $NewFPSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex]; 229 print "Generating SD file $NewFPSDFile...\n"; 230 $NewFPSDFileIO = new FingerprintsSDFileIO('Name' => $NewFPSDFile, %FingerprintsFileIOParams, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsLabel}); 231 $NewFPSDFileIO->Open(); 232 } 233 234 if ($OptionsInfo{FPOutput}) { 235 $NewFPFile = $SDFilesInfo{FPOutFileNames}[$FileIndex]; 236 print "Generating FP file $NewFPFile...\n"; 237 $NewFPFileIO = new FingerprintsFPFileIO('Name' => $NewFPFile, %FingerprintsFileIOParams); 238 $NewFPFileIO->Open(); 239 } 240 241 if ($OptionsInfo{TextOutput}) { 242 my($ColLabelsRef); 243 244 $NewFPTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex]; 245 $ColLabelsRef = SetupFPTextFileCoulmnLabels($FileIndex); 246 247 print "Generating text file $NewFPTextFile...\n"; 248 $NewFPTextFileIO = new FingerprintsTextFileIO('Name' => $NewFPTextFile, %FingerprintsFileIOParams, 'DataColLabels' => $ColLabelsRef, 'OutDelim' => $OptionsInfo{OutDelim}, 'OutQuote' => $OptionsInfo{OutQuote}); 249 $NewFPTextFileIO->Open(); 250 } 251 252 return ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO); 253 } 254 255 # Write fingerpritns and other data to appropriate output files... 256 # 257 sub WriteDataToOutputFiles { 258 my($FileIndex, $CmpdCount, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = @_; 259 my($DataFieldLabelAndValuesRef); 260 261 $DataFieldLabelAndValuesRef = undef; 262 if ($NewFPTextFileIO || $NewFPFileIO) { 263 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); 264 } 265 266 if ($NewFPSDFileIO) { 267 my($CmpdString); 268 269 $CmpdString = $Molecule->GetInputMoleculeString(); 270 $NewFPSDFileIO->WriteFingerprints($PathLengthFingerprints, $CmpdString); 271 } 272 273 if ($NewFPTextFileIO) { 274 my($ColValuesRef); 275 276 $ColValuesRef = SetupFPTextFileCoulmnValues($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef); 277 $NewFPTextFileIO->WriteFingerprints($PathLengthFingerprints, $ColValuesRef); 278 } 279 280 if ($NewFPFileIO) { 281 my($CompoundID); 282 283 $CompoundID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef); 284 $NewFPFileIO->WriteFingerprints($PathLengthFingerprints, $CompoundID); 285 } 286 } 287 288 # Generate approriate column labels for FPText output file... 289 # 290 sub SetupFPTextFileCoulmnLabels { 291 my($FileIndex) = @_; 292 my($Line, @ColLabels); 293 294 @ColLabels = (); 295 if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) { 296 push @ColLabels, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]}; 297 } 298 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) { 299 push @ColLabels, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]}; 300 } 301 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) { 302 push @ColLabels, @{$OptionsInfo{SpecifiedDataFields}}; 303 } 304 elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) { 305 push @ColLabels, $OptionsInfo{CompoundIDLabel}; 306 } 307 # Add fingerprints label... 308 push @ColLabels, $OptionsInfo{FingerprintsLabel}; 309 310 return \@ColLabels; 311 } 312 313 # Generate column values FPText output file.. 314 # 315 sub SetupFPTextFileCoulmnValues { 316 my($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_; 317 my(@ColValues); 318 319 @ColValues = (); 320 if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) { 321 push @ColValues, SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef); 322 } 323 elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) { 324 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]}; 325 } 326 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) { 327 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]}; 328 } 329 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) { 330 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}}; 331 } 332 333 return \@ColValues; 334 } 335 336 # Generate compound ID for FP and FPText output files.. 337 # 338 sub SetupCmpdIDForOutputFiles { 339 my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_; 340 my($CmpdID); 341 342 $CmpdID = ''; 343 if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) { 344 my($MolName); 345 $MolName = $Molecule->GetName(); 346 $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}"; 347 } 348 elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) { 349 $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}"; 350 } 351 elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) { 352 my($SpecifiedDataField); 353 $SpecifiedDataField = $OptionsInfo{CompoundID}; 354 $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : ''; 355 } 356 elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) { 357 $CmpdID = $Molecule->GetName(); 358 } 359 return $CmpdID; 360 } 361 362 # Generate fingerprints for molecule... 363 # 364 sub GenerateMoleculeFingerprints { 365 my($Molecule) = @_; 366 my($PathLengthFingerprints); 367 368 if ($OptionsInfo{KeepLargestComponent}) { 369 $Molecule->KeepLargestComponent(); 370 } 371 if ($OptionsInfo{IgnoreHydrogens}) { 372 $Molecule->DeleteHydrogens(); 373 } 374 375 if ($OptionsInfo{DetectAromaticity}) { 376 if (!$Molecule->DetectRings()) { 377 return undef; 378 } 379 $Molecule->DetectAromaticity(); 380 } 381 382 $PathLengthFingerprints = undef; 383 if ($OptionsInfo{Mode} =~ /^PathLengthBits$/i) { 384 $PathLengthFingerprints = GeneratePathLengthBitsFingerprints($Molecule); 385 } 386 elsif ($OptionsInfo{Mode} =~ /^PathLengthCount$/i) { 387 $PathLengthFingerprints = GeneratePathLengthCountFingerprints($Molecule); 388 } 389 else { 390 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: PathLengthBits or PathLengthCount\n"; 391 } 392 393 return $PathLengthFingerprints; 394 } 395 396 # Generate pathlength bits finerprints for molecule... 397 # 398 sub GeneratePathLengthBitsFingerprints { 399 my($Molecule) = @_; 400 my($PathLengthFingerprints); 401 402 $PathLengthFingerprints = new PathLengthFingerprints('Molecule' => $Molecule, 'Type' => 'PathLengthBits', 'AtomIdentifierType' => $OptionsInfo{AtomIdentifierType}, 'NumOfBitsToSetPerPath' => $OptionsInfo{NumOfBitsToSetPerPath}, 'Size' => $OptionsInfo{Size}, 'MinLength' => $OptionsInfo{MinPathLength}, 'MaxLength' => $OptionsInfo{MaxPathLength}, 'AllowRings' => $OptionsInfo{AllowRings}, 'AllowSharedBonds' => $OptionsInfo{AllowSharedBonds}, 'UseBondSymbols' => $OptionsInfo{UseBondSymbols}, 'UseUniquePaths' => $OptionsInfo{UseUniquePaths}, 'UsePerlCoreRandom' => $OptionsInfo{UsePerlCoreRandom}); 403 404 # Set atom identifier type... 405 SetAtomIdentifierTypeValuesToUse($PathLengthFingerprints); 406 407 # Generate fingerprints... 408 $PathLengthFingerprints->GenerateFingerprints(); 409 410 # Make sure fingerprints generation is successful... 411 if (!$PathLengthFingerprints->IsFingerprintsGenerationSuccessful()) { 412 return undef; 413 } 414 415 if ($OptionsInfo{Fold}) { 416 my($CheckSizeValue) = 0; 417 $PathLengthFingerprints->FoldFingerprintsBySize($OptionsInfo{FoldedSize}, $CheckSizeValue); 418 } 419 420 return $PathLengthFingerprints; 421 } 422 423 # Generate pathlength count finerprints for molecule... 424 # 425 sub GeneratePathLengthCountFingerprints { 426 my($Molecule) = @_; 427 my($PathLengthFingerprints); 428 429 $PathLengthFingerprints = new PathLengthFingerprints('Molecule' => $Molecule, 'Type' => 'PathLengthCount', 'AtomIdentifierType' => $OptionsInfo{AtomIdentifierType}, 'MinLength' => $OptionsInfo{MinPathLength}, 'MaxLength' => $OptionsInfo{MaxPathLength}, 'AllowRings' => $OptionsInfo{AllowRings}, 'AllowSharedBonds' => $OptionsInfo{AllowSharedBonds}, 'UseBondSymbols' => $OptionsInfo{UseBondSymbols}, 'UseUniquePaths' => $OptionsInfo{UseUniquePaths}); 430 431 # Set atom identifier type... 432 SetAtomIdentifierTypeValuesToUse($PathLengthFingerprints); 433 434 # Generate fingerprints... 435 $PathLengthFingerprints->GenerateFingerprints(); 436 437 # Make sure fingerprints generation is successful... 438 if (!$PathLengthFingerprints->IsFingerprintsGenerationSuccessful()) { 439 return undef; 440 } 441 return $PathLengthFingerprints; 442 } 443 444 # Set atom identifier type to use for generating path strings... 445 # 446 sub SetAtomIdentifierTypeValuesToUse { 447 my($PathLengthFingerprints) = @_; 448 449 if ($OptionsInfo{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 450 $PathLengthFingerprints->SetAtomicInvariantsToUse(\@{$OptionsInfo{AtomicInvariantsToUse}}); 451 } 452 elsif ($OptionsInfo{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 453 $PathLengthFingerprints->SetFunctionalClassesToUse(\@{$OptionsInfo{FunctionalClassesToUse}}); 454 } 455 elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 456 # Nothing to do for now... 457 } 458 else { 459 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n"; 460 } 461 } 462 463 # Retrieve information about SD files... 464 # 465 sub RetrieveSDFilesInfo { 466 my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $FPOutFileExt, $NewSDFileName, $NewFPFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef); 467 468 %SDFilesInfo = (); 469 @{$SDFilesInfo{FileOkay}} = (); 470 @{$SDFilesInfo{OutFileRoot}} = (); 471 @{$SDFilesInfo{SDOutFileNames}} = (); 472 @{$SDFilesInfo{FPOutFileNames}} = (); 473 @{$SDFilesInfo{TextOutFileNames}} = (); 474 @{$SDFilesInfo{AllDataFieldsRef}} = (); 475 @{$SDFilesInfo{CommonDataFieldsRef}} = (); 476 477 $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0; 478 $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0; 479 480 FILELIST: for $Index (0 .. $#SDFilesList) { 481 $SDFile = $SDFilesList[$Index]; 482 483 $SDFilesInfo{FileOkay}[$Index] = 0; 484 $SDFilesInfo{OutFileRoot}[$Index] = ''; 485 $SDFilesInfo{SDOutFileNames}[$Index] = ''; 486 $SDFilesInfo{FPOutFileNames}[$Index] = ''; 487 $SDFilesInfo{TextOutFileNames}[$Index] = ''; 488 489 $SDFile = $SDFilesList[$Index]; 490 if (!(-e $SDFile)) { 491 warn "Warning: Ignoring file $SDFile: It doesn't exist\n"; 492 next FILELIST; 493 } 494 if (!CheckFileType($SDFile, "sd sdf")) { 495 warn "Warning: Ignoring file $SDFile: It's not a SD file\n"; 496 next FILELIST; 497 } 498 499 if ($CheckDataField) { 500 # Make sure data field exists in SD file.. 501 my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues); 502 503 @CmpdLines = (); 504 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n"; 505 $CmpdString = ReadCmpdString(\*SDFILE); 506 close SDFILE; 507 @CmpdLines = split "\n", $CmpdString; 508 %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); 509 $SpecifiedDataField = $OptionsInfo{CompoundID}; 510 if (!exists $DataFieldValues{$SpecifiedDataField}) { 511 warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n"; 512 next FILELIST; 513 } 514 } 515 516 $AllDataFieldsRef = ''; 517 $CommonDataFieldsRef = ''; 518 if ($CollectDataFields) { 519 my($CmpdCount); 520 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n"; 521 ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE); 522 close SDFILE; 523 } 524 525 # Setup output file names... 526 $FileDir = ""; $FileName = ""; $FileExt = ""; 527 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile); 528 529 $TextOutFileExt = "csv"; 530 if ($Options{outdelim} =~ /^tab$/i) { 531 $TextOutFileExt = "tsv"; 532 } 533 $SDOutFileExt = $FileExt; 534 $FPOutFileExt = "fpf"; 535 536 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) { 537 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); 538 if ($RootFileName && $RootFileExt) { 539 $FileName = $RootFileName; 540 } 541 else { 542 $FileName = $OptionsInfo{OutFileRoot}; 543 } 544 $OutFileRoot = $FileName; 545 } 546 else { 547 $OutFileRoot = "${FileName}PathLengthFP"; 548 } 549 550 $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}"; 551 $NewFPFileName = "${OutFileRoot}.${FPOutFileExt}"; 552 $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}"; 553 554 if ($OptionsInfo{SDOutput}) { 555 if ($SDFile =~ /$NewSDFileName/i) { 556 warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n"; 557 print "Specify a different name using \"-r --root\" option or use default name.\n"; 558 next FILELIST; 559 } 560 } 561 562 if (!$OptionsInfo{OverwriteFiles}) { 563 # Check SD, FP and text outout files... 564 if ($OptionsInfo{SDOutput}) { 565 if (-e $NewSDFileName) { 566 warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n"; 567 next FILELIST; 568 } 569 } 570 if ($OptionsInfo{FPOutput}) { 571 if (-e $NewFPFileName) { 572 warn "Warning: Ignoring file $SDFile: The file $NewFPFileName already exists\n"; 573 next FILELIST; 574 } 575 } 576 if ($OptionsInfo{TextOutput}) { 577 if (-e $NewTextFileName) { 578 warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n"; 579 next FILELIST; 580 } 581 } 582 } 583 584 $SDFilesInfo{FileOkay}[$Index] = 1; 585 586 $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; 587 $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName; 588 $SDFilesInfo{FPOutFileNames}[$Index] = $NewFPFileName; 589 $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName; 590 591 $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef; 592 $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef; 593 } 594 } 595 596 # Process option values... 597 sub ProcessOptions { 598 %OptionsInfo = (); 599 600 $OptionsInfo{Mode} = $Options{mode}; 601 $OptionsInfo{PathMode} = $Options{pathmode}; 602 603 ProcessAtomIdentifierTypeOptions(); 604 605 $OptionsInfo{BitsOrder} = $Options{bitsorder}; 606 $OptionsInfo{BitStringFormat} = $Options{bitstringformat}; 607 608 $OptionsInfo{CompoundIDMode} = $Options{compoundidmode}; 609 $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel}; 610 $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode}; 611 612 my(@SpecifiedDataFields); 613 @SpecifiedDataFields = (); 614 615 @{$OptionsInfo{SpecifiedDataFields}} = (); 616 $OptionsInfo{CompoundID} = ''; 617 618 if ($Options{datafieldsmode} =~ /^CompoundID$/i) { 619 if ($Options{compoundidmode} =~ /^DataField$/i) { 620 if (!$Options{compoundid}) { 621 die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n"; 622 } 623 $OptionsInfo{CompoundID} = $Options{compoundid}; 624 } 625 elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) { 626 $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd'; 627 } 628 } 629 elsif ($Options{datafieldsmode} =~ /^Specify$/i) { 630 if (!$Options{datafields}) { 631 die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n"; 632 } 633 @SpecifiedDataFields = split /\,/, $Options{datafields}; 634 push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields; 635 } 636 637 if ($Options{atomidentifiertype} !~ /^AtomicInvariantsAtomTypes$/i) { 638 if ($Options{detectaromaticity} =~ /^No$/i) { 639 die "Error: The value specified, $Options{detectaromaticity}, for option \"--DetectAromaticity\" is not valid. No value is only allowed during AtomicInvariantsAtomTypes value for \"-a, --AtomIdentifierType\" \n"; 640 } 641 } 642 $OptionsInfo{DetectAromaticity} = ($Options{detectaromaticity} =~ /^Yes$/i) ? 1 : 0; 643 644 $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0; 645 646 $OptionsInfo{FingerprintsLabel} = $Options{fingerprintslabel} ? $Options{fingerprintslabel} : 'PathLengthFingerprints'; 647 648 my($Size, $MinSize, $MaxSize); 649 $MinSize = 32; 650 $MaxSize = 2**32; 651 $Size = $Options{size}; 652 if (!(IsPositiveInteger($Size) && $Size >= $MinSize && $Size <= $MaxSize && IsNumberPowerOfNumber($Size, 2))) { 653 die "Error: Invalid size value, $Size, for \"-s, --size\" option. Allowed values: power of 2, >= minimum size of $MinSize, and <= maximum size of $MaxSize.\n"; 654 } 655 $OptionsInfo{Size} = $Size; 656 657 $OptionsInfo{Fold} = ($Options{fold} =~ /^Yes$/i) ? 1 : 0; 658 my($FoldedSize); 659 $FoldedSize = $Options{foldedsize}; 660 if ($Options{fold} =~ /^Yes$/i) { 661 if (!(IsPositiveInteger($FoldedSize) && $FoldedSize < $Size && IsNumberPowerOfNumber($FoldedSize, 2))) { 662 die "Error: Invalid folded size value, $FoldedSize, for \"--FoldedSize\" option. Allowed values: power of 2, >= minimum size of $MinSize, and < size value of $Size.\n"; 663 } 664 } 665 $OptionsInfo{FoldedSize} = $FoldedSize; 666 667 $OptionsInfo{IgnoreHydrogens} = ($Options{ignorehydrogens} =~ /^Yes$/i) ? 1 : 0; 668 $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0; 669 670 my($MinPathLength, $MaxPathLength); 671 $MinPathLength = $Options{minpathlength}; 672 $MaxPathLength = $Options{maxpathlength}; 673 if (!IsPositiveInteger($MinPathLength)) { 674 die "Error: Invalid path length value, $MinPathLength, for \"--MinPathLength\" option. Allowed values: > 0\n"; 675 } 676 if (!IsPositiveInteger($MaxPathLength)) { 677 die "Error: Invalid path length value, $MaxPathLength, for \"--MinPathLength\" option. Allowed values: > 0\n"; 678 } 679 if ($MinPathLength >= $MaxPathLength) { 680 die "Error: Invalid minimum and maximum path length values, $MinPathLength and $MaxPathLength, for \"--MinPathLength\" and \"--MaxPathLength\"options. Allowed values: minimum path length value must be smaller than maximum path length value.\n"; 681 } 682 $OptionsInfo{MinPathLength} = $MinPathLength; 683 $OptionsInfo{MaxPathLength} = $MaxPathLength; 684 685 my($NumOfBitsToSetPerPath); 686 $NumOfBitsToSetPerPath = $Options{numofbitstosetperpath}; 687 if (!IsPositiveInteger($MaxPathLength)) { 688 die "Error: Invalid value, $NumOfBitsToSetPerPath, for \"-n, --NumOfBitsToSetPerPath\" option. Allowed values: > 0\n"; 689 } 690 if ($NumOfBitsToSetPerPath >= $Size) { 691 die "Error: Invalid value, $NumOfBitsToSetPerPath, for \"-n, --NumOfBitsToSetPerPath\" option. Allowed values: It must be less than the size, $Size, of the fingerprint bit-string.\n"; 692 } 693 $OptionsInfo{NumOfBitsToSetPerPath} = $NumOfBitsToSetPerPath; 694 695 $OptionsInfo{Output} = $Options{output}; 696 $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|All)$/i) ? 1 : 0; 697 $OptionsInfo{FPOutput} = ($Options{output} =~ /^(FP|All)$/i) ? 1 : 0; 698 $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|All)$/i) ? 1 : 0; 699 700 $OptionsInfo{OutDelim} = $Options{outdelim}; 701 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0; 702 703 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; 704 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; 705 706 $OptionsInfo{UseBondSymbols} = ($Options{usebondsymbols} =~ /^Yes$/i) ? 1 : 0; 707 708 $OptionsInfo{UsePerlCoreRandom} = ($Options{useperlcorerandom} =~ /^Yes$/i) ? 1 : 0; 709 710 $OptionsInfo{UseUniquePaths} = ($Options{useuniquepaths} =~ /^Yes$/i) ? 1 : 0; 711 712 $OptionsInfo{VectorStringFormat} = $Options{vectorstringformat}; 713 714 # Setup parameters used during generation of fingerprints by PathLengthFingerprints class... 715 my($AllowRings, $AllowSharedBonds); 716 $AllowRings = 1; 717 $AllowSharedBonds = 1; 718 MODE: { 719 if ($Options{pathmode} =~ /^AtomPathsWithoutRings$/i) { $AllowSharedBonds = 0; $AllowRings = 0; last MODE;} 720 if ($Options{pathmode} =~ /^AtomPathsWithRings$/i) { $AllowSharedBonds = 0; $AllowRings = 1; last MODE;} 721 if ($Options{pathmode} =~ /^AllAtomPathsWithoutRings$/i) { $AllowSharedBonds = 1; $AllowRings = 0; last MODE;} 722 if ($Options{pathmode} =~ /^AllAtomPathsWithRings$/i) { $AllowSharedBonds = 1; $AllowRings = 1; last MODE;} 723 die "Error: ProcessOptions: mode value, $Options{pathmode}, is not supported.\n"; 724 } 725 $OptionsInfo{AllowRings} = $AllowRings; 726 $OptionsInfo{AllowSharedBonds} = $AllowSharedBonds; 727 } 728 729 # Process atom identifier type and related options... 730 # 731 sub ProcessAtomIdentifierTypeOptions { 732 733 $OptionsInfo{AtomIdentifierType} = $Options{atomidentifiertype}; 734 735 if ($Options{atomidentifiertype} =~ /^AtomicInvariantsAtomTypes$/i) { 736 ProcessAtomicInvariantsToUseOption(); 737 } 738 elsif ($Options{atomidentifiertype} =~ /^FunctionalClassAtomTypes$/i) { 739 ProcessFunctionalClassesToUse(); 740 } 741 elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 742 # Nothing to do for now... 743 } 744 else { 745 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n"; 746 } 747 } 748 749 # Process specified atomic invariants to use... 750 # 751 sub ProcessAtomicInvariantsToUseOption { 752 my($AtomicInvariant, $AtomSymbolSpecified, @AtomicInvariantsWords); 753 754 @{$OptionsInfo{AtomicInvariantsToUse}} = (); 755 if (IsEmpty($Options{atomicinvariantstouse})) { 756 die "Error: Atomic invariants value specified using \"--AtomicInvariantsToUse\" option is empty\n"; 757 } 758 $AtomSymbolSpecified = 0; 759 @AtomicInvariantsWords = split /\,/, $Options{atomicinvariantstouse}; 760 for $AtomicInvariant (@AtomicInvariantsWords) { 761 if (!AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($AtomicInvariant)) { 762 die "Error: Atomic invariant specified, $AtomicInvariant, using \"--AtomicInvariantsToUse\" option is not valid...\n "; 763 } 764 if ($AtomicInvariant =~ /^(AS|AtomSymbol)$/i) { 765 $AtomSymbolSpecified = 1; 766 } 767 push @{$OptionsInfo{AtomicInvariantsToUse}}, $AtomicInvariant; 768 } 769 if (!$AtomSymbolSpecified) { 770 die "Error: Atomic invariant, AS or AtomSymbol, must be specified as using \"--AtomicInvariantsToUse\" option...\n "; 771 } 772 } 773 774 # Process specified functional classes invariants to use... 775 # 776 sub ProcessFunctionalClassesToUse { 777 my($FunctionalClass, @FunctionalClassesToUseWords); 778 779 @{$OptionsInfo{FunctionalClassesToUse}} = (); 780 if (IsEmpty($Options{functionalclassestouse})) { 781 die "Error: Functional classes value specified using \"--FunctionalClassesToUse\" option is empty\n"; 782 } 783 @FunctionalClassesToUseWords = split /\,/, $Options{functionalclassestouse}; 784 for $FunctionalClass (@FunctionalClassesToUseWords) { 785 if (!FunctionalClassAtomTypes::IsFunctionalClassAvailable($FunctionalClass)) { 786 die "Error: Functional class specified, $FunctionalClass, using \"--FunctionalClassesToUse\" option is not valid...\n "; 787 } 788 push @{$OptionsInfo{FunctionalClassesToUse}}, $FunctionalClass; 789 } 790 } 791 792 # Setup script usage and retrieve command line arguments specified using various options... 793 sub SetupScriptUsage { 794 795 # Retrieve all the options... 796 %Options = (); 797 798 $Options{atomidentifiertype} = 'AtomicInvariantsAtomTypes'; 799 $Options{atomicinvariantstouse} = 'AS'; 800 801 $Options{functionalclassestouse} = 'HBD,HBA,PI,NI,Ar,Hal'; 802 803 $Options{bitsorder} = 'Ascending'; 804 $Options{bitstringformat} = 'HexadecimalString'; 805 806 $Options{compoundidmode} = 'LabelPrefix'; 807 $Options{compoundidlabel} = 'CompoundID'; 808 $Options{datafieldsmode} = 'CompoundID'; 809 $Options{detectaromaticity} = 'Yes'; 810 811 $Options{filter} = 'Yes'; 812 813 $Options{fold} = 'No'; 814 $Options{foldedsize} = 256; 815 816 $Options{ignorehydrogens} = 'Yes'; 817 $Options{keeplargestcomponent} = 'Yes'; 818 819 $Options{mode} = 'PathLengthBits'; 820 $Options{pathmode} = 'AllAtomPathsWithRings'; 821 822 $Options{minpathlength} = 1; 823 $Options{maxpathlength} = 8; 824 825 $Options{numofbitstosetperpath} = 1; 826 827 $Options{output} = 'text'; 828 $Options{outdelim} = 'comma'; 829 $Options{quote} = 'yes'; 830 831 $Options{size} = 1024; 832 833 $Options{usebondsymbols} = 'yes'; 834 $Options{useperlcorerandom} = 'yes'; 835 $Options{useuniquepaths} = 'yes'; 836 837 $Options{vectorstringformat} = 'IDsAndValuesString'; 838 839 if (!GetOptions(\%Options, "atomidentifiertype|a=s", "atomicinvariantstouse=s", "functionalclassestouse=s", "bitsorder=s", "bitstringformat|b=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "detectaromaticity=s", "filter|f=s", "fingerprintslabel=s", "fold=s", "foldedsize=i", "help|h", "ignorehydrogens|i=s", "keeplargestcomponent|k=s", "mode|m=s", "minpathlength=i", "maxpathlength=i", "numofbitstosetperpath|n=i", "outdelim=s", "output=s", "overwrite|o", "pathmode|p=s", "quote|q=s", "root|r=s", "size|s=i", "usebondsymbols|u=s", "useperlcorerandom=s", "useuniquepaths=s", "vectorstringformat|v=s", "workingdir|w=s")) { 840 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 841 } 842 if ($Options{workingdir}) { 843 if (! -d $Options{workingdir}) { 844 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 845 } 846 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 847 } 848 if ($Options{atomidentifiertype} !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 849 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n"; 850 } 851 if ($Options{bitsorder} !~ /^(Ascending|Descending)$/i) { 852 die "Error: The value specified, $Options{bitsorder}, for option \"--BitsOrder\" is not valid. Allowed values: Ascending or Descending\n"; 853 } 854 if ($Options{bitstringformat} !~ /^(BinaryString|HexadecimalString)$/i) { 855 die "Error: The value specified, $Options{bitstringformat}, for option \"-b, --bitstringformat\" is not valid. Allowed values: BinaryString or HexadecimalString\n"; 856 } 857 if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) { 858 die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n"; 859 } 860 if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) { 861 die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n"; 862 } 863 if ($Options{detectaromaticity} !~ /^(Yes|No)$/i) { 864 die "Error: The value specified, $Options{detectaromaticity}, for option \"--DetectAromaticity\" is not valid. Allowed values: Yes or No\n"; 865 } 866 if ($Options{filter} !~ /^(Yes|No)$/i) { 867 die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n"; 868 } 869 if ($Options{fold} !~ /^(Yes|No)$/i) { 870 die "Error: The value specified, $Options{fold}, for option \"--fold\" is not valid. Allowed values: Yes or No\n"; 871 } 872 if (!IsPositiveInteger($Options{foldedsize})) { 873 die "Error: The value specified, $Options{foldedsize}, for option \"--FoldedSize\" is not valid. Allowed values: > 0 \n"; 874 } 875 if ($Options{ignorehydrogens} !~ /^(Yes|No)$/i) { 876 die "Error: The value specified, $Options{ignorehydrogens}, for option \"-i, --IgnoreHydrogens\" is not valid. Allowed values: Yes or No\n"; 877 } 878 if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) { 879 die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n"; 880 } 881 if ($Options{mode} !~ /^(PathLengthBits|PathLengthCount)$/i) { 882 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: PathLengthBits or PathLengthCount\n"; 883 } 884 if (!IsPositiveInteger($Options{minpathlength})) { 885 die "Error: The value specified, $Options{minpathlength}, for option \"--MinPathLength\" is not valid. Allowed values: > 0 \n"; 886 } 887 if (!IsPositiveInteger($Options{numofbitstosetperpath})) { 888 die "Error: The value specified, $Options{NumOfBitsToSetPerPath}, for option \"--NumOfBitsToSetPerPath\" is not valid. Allowed values: > 0 \n"; 889 } 890 if (!IsPositiveInteger($Options{maxpathlength})) { 891 die "Error: The value specified, $Options{maxpathlength}, for option \"--MaxPathLength\" is not valid. Allowed values: > 0 \n"; 892 } 893 if ($Options{output} !~ /^(SD|FP|text|all)$/i) { 894 die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, FP, text, or all\n"; 895 } 896 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) { 897 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n"; 898 } 899 if ($Options{pathmode} !~ /^(AtomPathsWithoutRings|AtomPathsWithRings|AllAtomPathsWithoutRings|AllAtomPathsWithRings)$/i) { 900 die "Error: The value specified, $Options{pathmode}, for option \"-m, --PathMode\" is not valid. Allowed values: AtomPathsWithoutRings, AtomPathsWithRings, AllAtomPathsWithoutRings or AllAtomPathsWithRings\n"; 901 } 902 if ($Options{quote} !~ /^(Yes|No)$/i) { 903 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n"; 904 } 905 if ($Options{outdelim} =~ /semicolon/i && $Options{quote} =~ /^No$/i) { 906 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not allowed with, semicolon value of \"--outdelim\" option: Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n"; 907 } 908 909 if (!IsPositiveInteger($Options{size})) { 910 die "Error: The value specified, $Options{size}, for option \"-s, --size\" is not valid. Allowed values: > 0 \n"; 911 } 912 if ($Options{usebondsymbols} !~ /^(Yes|No)$/i) { 913 die "Error: The value specified, $Options{usebondsymbols}, for option \"-u, --UseBondSymbols\" is not valid. Allowed values: Yes or No\n"; 914 } 915 if ($Options{useperlcorerandom} !~ /^(Yes|No)$/i) { 916 die "Error: The value specified, $Options{useperlcorerandom}, for option \"--UsePerlCoreRandom\" is not valid. Allowed values: Yes or No\n"; 917 } 918 if ($Options{useuniquepaths} !~ /^(Yes|No)$/i) { 919 die "Error: The value specified, $Options{useuniquepaths}, for option \"--UseUniquePaths\" is not valid. Allowed values: Yes or No\n"; 920 } 921 if ($Options{vectorstringformat} !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString)$/i) { 922 die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid. Allowed values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n"; 923 } 924 } 925