1 #!/bin/env python 2 # 3 # File: RDKitEnumerateCompoundLibrary.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 # Add local python path to the global path and import standard library modules... 32 import os 33 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 34 import time 35 import re 36 37 # RDKit imports... 38 try: 39 from rdkit import rdBase 40 from rdkit import Chem 41 from rdkit.Chem import AllChem 42 from rdkit.Chem import FunctionalGroups 43 except ImportError as ErrMsg: 44 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 45 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 46 sys.exit(1) 47 48 # MayaChemTools imports... 49 try: 50 from docopt import docopt 51 import MiscUtil 52 import RDKitUtil 53 except ImportError as ErrMsg: 54 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 55 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 56 sys.exit(1) 57 58 ScriptName = os.path.basename(sys.argv[0]) 59 Options = {} 60 OptionsInfo = {} 61 62 RxnNamesMap = {} 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 68 69 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 70 71 # Retrieve command line arguments and options... 72 RetrieveOptions() 73 74 # Process and validate command line arguments and options... 75 ProcessOptions() 76 77 # Perform actions required by the script... 78 PerformChemicalLibraryEnumeration() 79 80 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 81 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 82 83 def PerformChemicalLibraryEnumeration(): 84 """Retrieve functional groups information and perform search.""" 85 86 ProcessReactionNamesInfo() 87 PerformEnumeration() 88 89 def PerformEnumeration(): 90 """Enumerate virutal compound library.""" 91 92 ReactantFilesList = OptionsInfo["ReactantFilesList"] 93 Outfile = OptionsInfo["Outfile"] 94 95 RxnByNameMode = OptionsInfo["RxnByNameMode"] 96 if RxnByNameMode: 97 RxnSMIRKSPattern = OptionsInfo["RxnNameSMIRKS"] 98 else: 99 RxnSMIRKSPattern = OptionsInfo["SpecifiedSMIRKS"] 100 101 # Set up a reaction and match number of reactants in rxn SMIRKS against number of 102 # reactant files... 103 Rxn = AllChem.ReactionFromSmarts(RxnSMIRKSPattern) 104 RxnReactantsCount = Rxn.GetNumReactantTemplates() 105 106 ReactantFilesList = OptionsInfo["ReactantFilesList"] 107 ReactantFilesCount = len(ReactantFilesList) 108 if ReactantFilesCount != RxnReactantsCount: 109 MiscUtil.PrintError("The number of specified reactant files, %d, must match number of reactants, %d, in reaction SMIRKS" % (ReactantFilesCount, RxnReactantsCount)) 110 111 # Retrieve reactant molecules... 112 ReactantsMolsList = RetrieveReactantsMolecules() 113 114 # Set up a molecule writer... 115 Writer = None 116 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 117 if Writer is None: 118 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 119 120 MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile) 121 122 # Set up reaction... 123 ReturnReactants = False 124 if OptionsInfo["UseReactantNames"]: 125 ReturnReactants = True 126 RxnProducts = AllChem.EnumerateLibraryFromReaction(Rxn, ReactantsMolsList, ReturnReactants) 127 128 # Generate product molecules and write them out... 129 130 Compute2DCoords = OptionsInfo["Compute2DCoords"] 131 Sanitize = OptionsInfo["Sanitize"] 132 133 ProdMolCount = 0 134 ValidProdMolCount = 0 135 136 if ReturnReactants: 137 for Products, Reactants in list(RxnProducts): 138 for ProdMol in Products: 139 ProdMolCount += 1 140 141 # Set product name... 142 ReactantMolNames = [ReactantMol.GetProp("_Name") for ReactantMol in Reactants] 143 Delimiter = "_" 144 ProdMolName = Delimiter.join(ReactantMolNames) + "_Prod%d" % ProdMolCount 145 ProdMol.SetProp("_Name", ProdMolName) 146 147 Status = WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords) 148 if Status: 149 ValidProdMolCount += 1 150 else: 151 for Products in list(RxnProducts): 152 for ProdMol in Products: 153 ProdMolCount += 1 154 155 # Set product name... 156 ProdMolName = "Prod%d" % ProdMolCount 157 ProdMol.SetProp("_Name", ProdMolName) 158 159 Status = WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords) 160 if Status: 161 ValidProdMolCount += 1 162 163 if Writer is not None: 164 Writer.close() 165 166 if ValidProdMolCount: 167 MiscUtil.PrintInfo("\nTotal number of product molecules: %d" % ProdMolCount) 168 MiscUtil.PrintInfo("Number of valid product molecules: %d" % ValidProdMolCount) 169 MiscUtil.PrintInfo("Number of ignored product molecules: %d" % (ProdMolCount - ValidProdMolCount)) 170 else: 171 MiscUtil.PrintInfo("\nThe compound library enumeration failed to generate any product molecules.\nCheck to make sure the reactants specified in input files match their corresponding specifications in reaction SMIRKS and try again.") 172 173 def WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords): 174 """Prepare and write out product molecule.""" 175 176 try: 177 if Sanitize: 178 Chem.SanitizeMol(ProdMol) 179 except (RuntimeError, ValueError): 180 MiscUtil.PrintWarning("Ignoring product molecule: Failed to sanitize...\n") 181 return False 182 183 try: 184 if Compute2DCoords: 185 AllChem.Compute2DCoords(ProdMol) 186 except (RuntimeError, ValueError): 187 MiscUtil.PrintWarning("Ignoring product molecule: Failed to compute 2D coordinates...\n") 188 return False 189 190 Writer.write(ProdMol) 191 192 return True 193 194 def RetrieveReactantsMolecules(): 195 """Retrieve reactant molecules from each reactant file and return a list containing lists of molecules 196 for each reactant file.""" 197 198 MiscUtil.PrintInfo("\nProcessing reactant file(s)...") 199 200 ReactantsMolsList = [] 201 ReactantFilesList = OptionsInfo["ReactantFilesList"] 202 UseReactantNames = OptionsInfo["UseReactantNames"] 203 ReactantCount = 0 204 205 for FileIndex in range(0, len(ReactantFilesList)): 206 ReactantCount += 1 207 ReactantFile = ReactantFilesList[FileIndex] 208 209 MiscUtil.PrintInfo("\nProcessing reactant file: %s..." % ReactantFile) 210 211 Mols = RDKitUtil.ReadMolecules(ReactantFile, **OptionsInfo["InfileParams"]) 212 213 ValidMols = [] 214 MolCount = 0 215 ValidMolCount = 0 216 217 for Mol in Mols: 218 MolCount += 1 219 if Mol is None: 220 continue 221 222 if RDKitUtil.IsMolEmpty(Mol): 223 MolName = RDKitUtil.GetMolName(Mol, MolCount) 224 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 225 continue 226 227 ValidMolCount += 1 228 229 # Check and set mol name... 230 if UseReactantNames: 231 MolName = RDKitUtil.GetMolName(Mol) 232 if not len(MolName): 233 MolName = "React%dMol%d" % (ReactantCount, MolCount) 234 Mol.SetProp("_Name", MolName) 235 236 ValidMols.append(Mol) 237 238 ReactantsMolsList.append(ValidMols) 239 240 MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) 241 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 242 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) 243 244 return ReactantsMolsList 245 246 def ProcessReactionNamesInfo(): 247 """Process reaction names information.""" 248 249 if not OptionsInfo["RxnByNameMode"]: 250 return 251 252 RetrieveReactionNamesInfo() 253 ProcessSpecifiedReactionName() 254 255 def ProcessSpecifiedReactionName(): 256 """Process and validate specified reaction name.""" 257 258 OptionsInfo["RxnNameSMIRKS"] = None 259 260 # Set up a map of valid group rxn names for checking specified rxn names... 261 CanonicalRxnNameMap = {} 262 for Name in RxnNamesMap['Names']: 263 CanonicalRxnNameMap[Name.lower()] = Name 264 265 CanonicalRxnName = OptionsInfo["RxnName"].lower() 266 if CanonicalRxnName in CanonicalRxnNameMap: 267 Name = CanonicalRxnNameMap[CanonicalRxnName] 268 OptionsInfo["RxnNameSMIRKS"] = RxnNamesMap['SMIRKSPattern'][Name] 269 else: 270 MiscUtil.PrintError("The rxn name name, %s, specified using \"-r, --rxnName\" option is not a valid name." % (OptionsInfo["RxnName"])) 271 272 def ProcessListReactionNamesOption(): 273 """Process list reaction names information.""" 274 275 # Validate and process dataFile option for listing reaction names information... 276 OptionsInfo["RxnNamesFile"] = None 277 if not re.match("^auto$", Options["--rxnNamesFile"], re.I): 278 MiscUtil.ValidateOptionFilePath("--rxnNamesFile", Options["--rxnNamesFile"]) 279 OptionsInfo["RxnNamesFile"] = Options["--rxnNamesFile"] 280 281 RetrieveReactionNamesInfo() 282 ListReactionNamesInfo() 283 284 def RetrieveReactionNamesInfo(): 285 """Retrieve reaction names information.""" 286 287 RxnNamesFilePath = OptionsInfo["RxnNamesFile"] 288 if RxnNamesFilePath is None: 289 MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath() 290 RxnNamesFilePath = os.path.join(MayaChemToolsDataDir, "ReactionNamesAndSMIRKS.csv") 291 292 MiscUtil.PrintInfo("\nRetrieving reaction names and SMIRKS patterns from file %s" % (RxnNamesFilePath)) 293 294 if not os.path.exists(RxnNamesFilePath): 295 MiscUtil.PrintError("The reaction names file, %s, doesn't exist.\n" % (RxnNamesFilePath)) 296 297 Delimiter = ',' 298 QuoteChar = '"' 299 IgnoreHeaderLine = True 300 RxnLinesWords = MiscUtil.GetTextLinesWords(RxnNamesFilePath, Delimiter, QuoteChar, IgnoreHeaderLine) 301 302 RxnNamesMap['Names'] = [] 303 RxnNamesMap['SMIRKSPattern'] = {} 304 305 for LineWords in RxnLinesWords: 306 Name = LineWords[0] 307 SMIRKSPattern = LineWords[1] 308 309 if Name in RxnNamesMap['SMIRKSPattern']: 310 MiscUtil.PrintWarning("Ignoring duplicate reaction name: %s..." % Name) 311 else: 312 RxnNamesMap['Names'].append(Name) 313 RxnNamesMap['SMIRKSPattern'][Name] = SMIRKSPattern 314 315 if not len(RxnNamesMap['Names']): 316 MiscUtil.PrintError("Failed to retrieve any reaction names and SMIRKS patterns...") 317 318 MiscUtil.PrintInfo("Total number of reactions present in reaction names and SMIRKS file: %d" % (len(RxnNamesMap['Names']))) 319 320 def ListReactionNamesInfo(): 321 """List reaction names information.""" 322 323 MiscUtil.PrintInfo("\nListing available freaction names and SMIRKS patterns...") 324 MiscUtil.PrintInfo("\nReactionName\tSMIRKSPattern") 325 326 for Name in sorted(RxnNamesMap['Names']): 327 SMIRKSPattern = RxnNamesMap['SMIRKSPattern'][Name] 328 MiscUtil.PrintInfo("%s\t%s" % (Name, SMIRKSPattern)) 329 330 MiscUtil.PrintInfo("") 331 332 def ProcessOptions(): 333 """Process and validate command line arguments and options.""" 334 335 MiscUtil.PrintInfo("Processing options...") 336 337 # Validate options... 338 ValidateOptions() 339 340 Compute2DCoords = True 341 if not re.match("^yes$", Options["--compute2DCoords"], re.I): 342 Compute2DCoords = False 343 OptionsInfo["Compute2DCoords"] = Compute2DCoords 344 345 OptionsInfo["Mode"] = Options["--mode"] 346 RxnByNameMode = True 347 if not re.match("^RxnByName$", Options["--mode"], re.I): 348 RxnByNameMode = False 349 OptionsInfo["RxnByNameMode"] = RxnByNameMode 350 351 OptionsInfo["ProdMolNamesMode"] = Options["--prodMolNames"] 352 UseReactantNames = False 353 if re.match("^UseReactants$", Options["--prodMolNames"], re.I): 354 UseReactantNames = True 355 OptionsInfo["UseReactantNames"] = UseReactantNames 356 357 OptionsInfo["RxnName"] = Options["--rxnName"] 358 OptionsInfo["RxnNameSMIRKS"] = None 359 if OptionsInfo["RxnByNameMode"]: 360 if not Options["--rxnName"]: 361 MiscUtil.PrintError("No rxn name specified using \"-r, --rxnName\" option during \"RxnByName\" value of \"-m, --mode\" option") 362 363 OptionsInfo["RxnNamesFile"] = None 364 if not re.match("^auto$", Options["--rxnNamesFile"], re.I): 365 OptionsInfo["RxnNamesFile"] = Options["--rxnNamesFile"] 366 367 ReactantFiles = re.sub(" ", "", Options["--infiles"]) 368 ReactantFilesList = [] 369 ReactantFilesList = ReactantFiles.split(",") 370 OptionsInfo["ReactantFiles"] = ReactantFiles 371 OptionsInfo["ReactantFilesList"] = ReactantFilesList 372 373 OptionsInfo["SpecifiedSMIRKS"] = Options["--smirksRxn"] 374 if not OptionsInfo["RxnByNameMode"]: 375 if not Options["--smirksRxn"]: 376 MiscUtil.PrintError("No rxn SMIRKS pattern specified using \"-r, --rxnName\" option during \"RxnByName\" value of \"-m, --mode\" option") 377 378 OptionsInfo["Outfile"] = Options["--outfile"] 379 OptionsInfo["Overwrite"] = Options["--overwrite"] 380 381 # Use first reactant file as input file as all input files have the same format... 382 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], ReactantFilesList[0]) 383 384 # No need to pass any input or output file name due to absence of any auto parameter... 385 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 386 387 Sanitize = True 388 if not re.match("^yes$", Options["--sanitize"], re.I): 389 Sanitize = False 390 OptionsInfo["Sanitize"] = Sanitize 391 392 def RetrieveOptions(): 393 """Retrieve command line arguments and options.""" 394 395 # Get options... 396 global Options 397 Options = docopt(_docoptUsage_) 398 399 # Set current working directory to the specified directory... 400 WorkingDir = Options["--workingdir"] 401 if WorkingDir: 402 os.chdir(WorkingDir) 403 404 # Handle examples option... 405 if "--examples" in Options and Options["--examples"]: 406 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 407 sys.exit(0) 408 409 # Handle listing of functional group information... 410 if Options and Options["--list"]: 411 ProcessListReactionNamesOption() 412 sys.exit(0) 413 414 def ValidateOptions(): 415 """Validate option values.""" 416 417 MiscUtil.ValidateOptionTextValue("--compute2DCoords", Options["--compute2DCoords"], "yes no") 418 419 MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "RxnByName RxnBySMIRKS") 420 MiscUtil.ValidateOptionTextValue("-p, --prodMolNames", Options["--prodMolNames"], "UseReactants Sequential") 421 422 if not re.match("^auto$", Options["--rxnNamesFile"], re.I): 423 MiscUtil.ValidateOptionFilePath("--rxnNamesFile", Options["--rxnNamesFile"]) 424 425 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi") 426 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 427 428 ReactantFiles = re.sub(" ", "", Options["--infiles"]) 429 if not ReactantFiles: 430 MiscUtil.PrintError("No reactant files specified for \"-i, --infiles\" option") 431 432 # Validate file extensions... 433 for ReactantFile in ReactantFiles.split(","): 434 MiscUtil.ValidateOptionFilePath("-i, --infiles", ReactantFile) 435 MiscUtil.ValidateOptionFileExt("-i, --infiles", ReactantFile, "sdf sd smi csv tsv txt") 436 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infiles", ReactantFile, "-o, --outfile", Options["--outfile"]) 437 438 # Match file formats... 439 FirstFile = True 440 FirstFileFormat = "" 441 for ReactantFile in ReactantFiles.split(","): 442 FileFormat = "" 443 if MiscUtil.CheckFileExt(ReactantFile, "sdf sd"): 444 FileFormat = "SD" 445 elif MiscUtil.CheckFileExt(ReactantFile, "smi csv tsv txt"): 446 FileFormat = "SMILES" 447 else: 448 MiscUtil.PrintError("The file name specified , %s, for option \"-i, --infiles\" is not valid. Supported file formats: sdf sd smi csv tsv txt\n" % ReactantFile) 449 450 if FirstFile: 451 FirstFile = False 452 FirstFileFormat = FileFormat 453 continue 454 455 if not re.match("^%s$" % FirstFileFormat, FileFormat, re.IGNORECASE): 456 MiscUtil.PrintError("All reactant file names - %s - specified using option \"-i, --infiles\" must have the same file format.\n" % ReactantFiles) 457 458 459 MiscUtil.ValidateOptionTextValue("--sanitize", Options["--sanitize"], "yes no") 460 461 # Setup a usage string for docopt... 462 _docoptUsage_ = """ 463 RDKitEnumerateCompoundLibrary.py - Enumerate a virtual compound library 464 465 Usage: 466 RDKitEnumerateCompoundLibrary.py [--compute2DCoords <yes or no>] [--infileParams <Name,Value,...>] 467 [--mode <RxnByName or RxnBySMIRKS>] [--outfileParams <Name,Value,...>] [--overwrite] 468 [--prodMolNames <UseReactants or Sequential>] [--rxnName <text>] 469 [--rxnNamesFile <FileName or auto>] [--smirksRxn <text>] [--sanitize <yes or no>] 470 [-w <dir>] -i <ReactantFile1,...> -o <outfile> 471 RDKitEnumerateCompoundLibrary.py [--rxnNamesFile <FileName or auto>] -l | --list 472 RDKitEnumerateCompoundLibrary.py -h | --help | -e | --examples 473 474 Description: 475 Perform a combinatorial enumeration of a virtual library of molecules for a reaction specified 476 using a reaction name or SMIRKS pattern and reactant input files. 477 478 The SMIRKS patterns for supported reactions names [ Ref 134 ] are retrieved from file, 479 ReactionNamesAndSMIRKS.csv, available in MayaChemTools data directory. The current 480 list of supported reaction names is shown below: 481 482 '1,2,4_triazole_acetohydrazide', '1,2,4_triazole_carboxylic_acid_ester', 3_nitrile_pyridine, 483 Benzimidazole_derivatives_aldehyde, Benzimidazole_derivatives_carboxylic_acid_ester, 484 Benzofuran, Benzothiazole, Benzothiophene, Benzoxazole_aromatic_aldehyde, 485 Benzoxazole_carboxylic_acid, Buchwald_Hartwig, Decarboxylative_coupling, Fischer_indole, 486 Friedlaender_chinoline, Grignard_alcohol, Grignard_carbonyl, Heck_non_terminal_vinyl, 487 Heck_terminal_vinyl, Heteroaromatic_nuc_sub, Huisgen_Cu_catalyzed_1,4_subst, 488 Huisgen_disubst_alkyne, Huisgen_Ru_catalyzed_1,5_subst, Imidazole, Indole, Mitsunobu_imide, 489 Mitsunobu_phenole, Mitsunobu_sulfonamide, Mitsunobu_tetrazole_1, Mitsunobu_tetrazole_2, 490 Mitsunobu_tetrazole_3, Mitsunobu_tetrazole_4, N_arylation_heterocycles, Negishi, 491 Niementowski_quinazoline, Nucl_sub_aromatic_ortho_nitro, Nucl_sub_aromatic_para_nitro, 492 Oxadiazole, Paal_Knorr_pyrrole, Phthalazinone, Pictet_Spengler, Piperidine_indole, 493 Pyrazole, Reductive_amination, Schotten_Baumann_amide, Sonogashira, Spiro_chromanone, 494 Stille, Sulfon_amide, Suzuki, Tetrazole_connect_regioisomer_1, Tetrazole_connect_regioisomer_2, 495 Tetrazole_terminal, Thiazole, Thiourea, Triaryl_imidazole, Urea, Williamson_ether, Wittig 496 497 The supported input file formats are: SD (.sdf, .sd), SMILES (.smi, .csv, .tsv, .txt) 498 499 The supported output file formats are: SD (.sdf, .sd), SMILES (.smi) 500 501 Options: 502 -c, --compute2DCoords <yes or no> [default: yes] 503 Compute 2D coordinates of product molecules before writing them out. 504 -i, --infiles <ReactantFile1, ReactantFile2...> 505 Comma delimited list of reactant file names for enumerating a compound library 506 using reaction SMIRKS. The number of reactant files must match number of 507 reaction components in reaction SMIRKS. All reactant input files must have 508 the same format. 509 --infileParams <Name,Value,...> [default: auto] 510 A comma delimited list of parameter name and value pairs for reading 511 molecules from files. The supported parameter names for different file 512 formats, along with their default values, are shown below: 513 514 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes 515 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 516 smilesTitleLine,auto,sanitize,yes 517 518 Possible values for smilesDelimiter: space, comma or tab. These parameters apply 519 to all reactant input files, which must have the same file format. 520 -e, --examples 521 Print examples. 522 -h, --help 523 Print this help message. 524 -l, --list 525 List available reaction names along with corresponding SMIRKS patterns without 526 performing any enumeration. 527 -m, --mode <RxnByName or RxnBySMIRKS> [default: RxnByName] 528 Indicate whether a reaction is specified by a reaction name or a SMIRKS pattern. 529 Possible values: RxnByName or RxnBySMIRKS. 530 -o, --outfile <outfile> 531 Output file name. 532 --outfileParams <Name,Value,...> [default: auto] 533 A comma delimited list of parameter name and value pairs for writing 534 molecules to files. The supported parameter names for different file 535 formats, along with their default values, are shown below: 536 537 SD: kekulize,yes,forceV3000,no 538 SMILES: smilesKekulize,no,smilesDelimiter,space, smilesIsomeric,yes, 539 smilesTitleLine,yes 540 541 -p, --prodMolNames <UseReactants or Sequential> [default: UseReactants] 542 Generate names of product molecules using reactant names or assign names in 543 a sequential order. Possible values: UseReactants or Sequential. Format of 544 molecule names: UseReactants - <ReactName1>_<ReactName2>..._Prod<Num>; 545 Sequential - Prod<Num> 546 --overwrite 547 Overwrite existing files. 548 -r, --rxnName <text> 549 Name of a reaction to use for enumerating a compound library. This option 550 is only used during 'RxnByName' value of '-m, --mode' option. 551 --rxnNamesFile <FileName or auto> [default: auto] 552 Specify a file name containing data for names of reactions and SMIRKS patterns or 553 use default file, ReactionNamesAndSMIRKS.csv, available in MayaChemTools data 554 directory. 555 556 Reactions SMIRKS file format: RxnName,RxnSMIRKS. 557 558 The format of data in local reaction names file must match format of the reaction 559 SMIRKS file available in MayaChemTools data directory. 560 -s, --smirksRxn <text> 561 SMIRKS pattern of a reaction to use for enumerating a compound library. This 562 option is only used during 'RxnBySMIRKS' value of '-m, --mode' option. 563 --sanitize <yes or no> [default: yes] 564 Sanitize product molecules before writing them out. 565 -w, --workingdir <dir> 566 Location of working directory which defaults to the current directory. 567 568 Examples: 569 To list all available reaction names along with their SMIRKS pattern, type: 570 571 % RDKitEnumerateCompoundLibrary.py -l 572 573 To perform a combinatorial enumeration of a virtual compound library corresponding 574 to named amide reaction, Schotten_Baumann_amide and write out a SMILES file 575 type: 576 577 % RDKitEnumerateCompoundLibrary.py -r Schotten_Baumann_amide 578 -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.smi 579 580 To perform a combinatorial enumeration of a virtual compound library corresponding 581 to an amide reaction specified using a SMIRKS pattern and write out a SD file containing 582 sanitized molecules, computed 2D coordinates, and generation of molecule names from 583 reactant names, type: 584 585 % RDKitEnumerateCompoundLibrary.py -m RxnBySMIRKS 586 -s '[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]' 587 -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.sdf 588 589 To perform a combinatorial enumeration of a virtual compound library corresponding 590 to an amide reaction specified using a SMIRKS pattern and write out a SD file containing 591 unsanitized molecules, without generating 2D coordinates, and a sequential generation 592 of molecule names, type: 593 594 % RDKitEnumerateCompoundLibrary.py -m RxnBySMIRKS -c no -s no 595 -p Sequential -s '[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]' 596 -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.sdf 597 598 Author: 599 Manish Sud(msud@san.rr.com) 600 601 See also: 602 RDKitConvertFileFormat.py, RDKitFilterPAINS.py, RDKitSearchFunctionalGroups.py, 603 RDKitSearchSMARTS.py 604 605 Copyright: 606 Copyright (C) 2024 Manish Sud. All rights reserved. 607 608 The functionality available in this script is implemented using RDKit, an 609 open source toolkit for cheminformatics developed by Greg Landrum. 610 611 This file is part of MayaChemTools. 612 613 MayaChemTools is free software; you can redistribute it and/or modify it under 614 the terms of the GNU Lesser General Public License as published by the Free 615 Software Foundation; either version 3 of the License, or (at your option) any 616 later version. 617 618 """ 619 620 if __name__ == "__main__": 621 main()