1 #!/bin/env python 2 # 3 # File: RDKitFilterTorsionStrainEnergyAlerts.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Collaborator: Pat Walters 7 # 8 # Copyright (C) 2023 Manish Sud. All rights reserved. 9 # 10 # This script uses the torsion strain energy library developed by Gu, S.; 11 # Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ]. 12 # 13 # The torsion strain enegy library is based on the Torsion Library jointly 14 # developed by the University of Hamburg, Center for Bioinformatics, 15 # Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland. 16 # 17 # The functionality available in this script is implemented using RDKit, an 18 # open source toolkit for cheminformatics developed by Greg Landrum. 19 # 20 # This file is part of MayaChemTools. 21 # 22 # MayaChemTools is free software; you can redistribute it and/or modify it under 23 # the terms of the GNU Lesser General Public License as published by the Free 24 # Software Foundation; either version 3 of the License, or (at your option) any 25 # later version. 26 # 27 # MayaChemTools is distributed in the hope that it will be useful, but without 28 # any warranty; without even the implied warranty of merchantability of fitness 29 # for a particular purpose. See the GNU Lesser General Public License for more 30 # details. 31 # 32 # You should have received a copy of the GNU Lesser General Public License 33 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 34 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 35 # Boston, MA, 02111-1307, USA. 36 # 37 38 from __future__ import print_function 39 40 # Add local python path to the global path and import standard library modules... 41 import os 42 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 43 import time 44 import re 45 import glob 46 import multiprocessing as mp 47 import math 48 49 # RDKit imports... 50 try: 51 from rdkit import rdBase 52 from rdkit import Chem 53 from rdkit.Chem import rdMolTransforms 54 except ImportError as ErrMsg: 55 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 56 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 57 sys.exit(1) 58 59 # MayaChemTools imports... 60 try: 61 from docopt import docopt 62 import MiscUtil 63 import RDKitUtil 64 import TorsionLibraryUtil 65 except ImportError as ErrMsg: 66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 68 sys.exit(1) 69 70 ScriptName = os.path.basename(sys.argv[0]) 71 Options = {} 72 OptionsInfo = {} 73 74 TorsionLibraryInfo = {} 75 76 def main(): 77 """Start execution of the script.""" 78 79 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 80 81 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 82 83 # Retrieve command line arguments and options... 84 RetrieveOptions() 85 86 if Options["--list"]: 87 # Handle listing of torsion library information... 88 ProcessListTorsionLibraryOption() 89 else: 90 # Process and validate command line arguments and options... 91 ProcessOptions() 92 93 # Perform actions required by the script... 94 PerformFiltering() 95 96 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 97 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 98 99 def PerformFiltering(): 100 """Filter molecules using SMARTS torsion rules in the torsion strain energy 101 library file.""" 102 103 # Process torsion library info... 104 ProcessTorsionLibraryInfo() 105 106 # Set up a pattern molecule for rotatable bonds... 107 RotBondsPatternMol = Chem.MolFromSmarts(OptionsInfo["RotBondsSMARTSPattern"]) 108 109 # Setup a molecule reader... 110 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 111 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 112 113 MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols, RotBondsPatternMol) 114 115 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 116 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 117 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount) 118 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount)) 119 120 MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount) 121 MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount)) 122 123 def ProcessMolecules(Mols, RotBondsPatternMol): 124 """Process and filter molecules.""" 125 126 if OptionsInfo["MPMode"]: 127 return ProcessMoleculesUsingMultipleProcesses(Mols, RotBondsPatternMol) 128 else: 129 return ProcessMoleculesUsingSingleProcess(Mols, RotBondsPatternMol) 130 131 def ProcessMoleculesUsingSingleProcess(Mols, RotBondsPatternMol): 132 """Process and filter molecules using a single process.""" 133 134 SetupTorsionLibraryInfo() 135 136 MiscUtil.PrintInfo("\nFiltering molecules...") 137 138 OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"] 139 140 # Set up writers... 141 OutfilesWriters = SetupOutfilesWriters() 142 143 WriterRemaining = OutfilesWriters["WriterRemaining"] 144 WriterFiltered = OutfilesWriters["WriterFiltered"] 145 WriterAlertSummary = OutfilesWriters["WriterAlertSummary"] 146 147 # Initialize alerts summary info... 148 TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo() 149 150 (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5 151 for Mol in Mols: 152 MolCount += 1 153 154 if Mol is None: 155 continue 156 157 if RDKitUtil.IsMolEmpty(Mol): 158 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount)) 159 continue 160 161 # Check for 3D flag... 162 if not Mol.GetConformer().Is3D(): 163 MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount)) 164 continue 165 166 ValidMolCount += 1 167 168 # Identify torsion library alerts for rotatable bonds.. 169 RotBondsAlertsStatus, RotBondsAlertsInfo = IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, RotBondsPatternMol) 170 171 TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo) 172 173 # Write out filtered and remaining molecules... 174 WriteStatus = True 175 if RotBondsAlertsStatus: 176 if OutfileFilteredMode: 177 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo) 178 if WriteStatus: 179 FilteredMolWriteCount += 1 180 else: 181 RemainingMolCount += 1 182 WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo) 183 184 if not WriteStatus: 185 WriteFailedCount += 1 186 187 WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo) 188 CloseOutfilesWriters(OutfilesWriters) 189 190 if FilteredMolWriteCount: 191 WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo) 192 193 return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount) 194 195 def ProcessMoleculesUsingMultipleProcesses(Mols, RotBondsPatternMol): 196 """Process and filter molecules using multiprocessing.""" 197 198 MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...") 199 200 MPParams = OptionsInfo["MPParams"] 201 OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"] 202 203 # Set up writers... 204 OutfilesWriters = SetupOutfilesWriters() 205 206 WriterRemaining = OutfilesWriters["WriterRemaining"] 207 WriterFiltered = OutfilesWriters["WriterFiltered"] 208 WriterAlertSummary = OutfilesWriters["WriterAlertSummary"] 209 210 # Initialize alerts summary info... 211 TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo() 212 213 # Setup data for initializing a worker process... 214 MiscUtil.PrintInfo("\nEncoding options info and rotatable bond pattern molecule...") 215 OptionsInfo["EncodedRotBondsPatternMol"] = RDKitUtil.MolToBase64EncodedMolString(RotBondsPatternMol) 216 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 217 218 # Setup a encoded mols data iterable for a worker process... 219 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 220 221 # Setup process pool along with data initialization for each process... 222 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 223 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 224 225 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 226 227 # Start processing... 228 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 229 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 230 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 231 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 232 else: 233 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 234 235 (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5 236 for Result in Results: 237 MolCount += 1 238 MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result 239 240 if EncodedMol is None: 241 continue 242 ValidMolCount += 1 243 244 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 245 246 TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo) 247 248 # Write out filtered and remaining molecules... 249 WriteStatus = True 250 if RotBondsAlertsStatus: 251 if OutfileFilteredMode: 252 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo) 253 if WriteStatus: 254 FilteredMolWriteCount += 1 255 else: 256 RemainingMolCount += 1 257 WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo) 258 259 if not WriteStatus: 260 WriteFailedCount += 1 261 262 WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo) 263 CloseOutfilesWriters(OutfilesWriters) 264 265 if FilteredMolWriteCount: 266 WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo) 267 268 return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount) 269 270 def InitializeWorkerProcess(*EncodedArgs): 271 """Initialize data for a worker process.""" 272 273 global Options, OptionsInfo 274 275 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 276 277 # Decode Options and OptionInfo... 278 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 279 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 280 281 # Decode RotBondsPatternMol... 282 OptionsInfo["RotBondsPatternMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRotBondsPatternMol"]) 283 284 # Setup torsion library... 285 RetrieveTorsionLibraryInfo(Quiet = True) 286 SetupTorsionLibraryInfo(Quiet = True) 287 288 def WorkerProcess(EncodedMolInfo): 289 """Process data for a worker process.""" 290 291 MolIndex, EncodedMol = EncodedMolInfo 292 293 if EncodedMol is None: 294 return [MolIndex, None, False, None] 295 296 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 297 if RDKitUtil.IsMolEmpty(Mol): 298 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 299 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 300 return [MolIndex, None, False, None] 301 302 # Check for 3D flag... 303 if not Mol.GetConformer().Is3D(): 304 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 305 MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName) 306 return [MolIndex, None, False, None] 307 308 # Identify torsion library alerts for rotatable bonds.. 309 RotBondsAlertsStatus, RotBondsAlertsInfo = IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, OptionsInfo["RotBondsPatternMol"]) 310 311 return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo] 312 313 def IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, RotBondsPatternMol): 314 """Identify rotatable bonds for torsion library alerts.""" 315 316 # Identify rotatable bonds... 317 RotBondsStatus, RotBondsInfo = TorsionLibraryUtil.IdentifyRotatableBondsForTorsionLibraryMatch(TorsionLibraryInfo, Mol, RotBondsPatternMol) 318 319 if not RotBondsStatus: 320 return (False, None) 321 322 # Identify alerts for rotatable bonds... 323 RotBondsAlertsStatus, RotBondsAlertsInfo = MatchRotatableBondsToTorsionLibrary(Mol, RotBondsInfo) 324 325 return (RotBondsAlertsStatus, RotBondsAlertsInfo) 326 327 def MatchRotatableBondsToTorsionLibrary(Mol, RotBondsInfo): 328 """Match rotatable bond to torsion library.""" 329 330 # Initialize... 331 RotBondsAlertsInfo = InitializeRotatableBondsAlertsInfo() 332 333 # Match rotatable bonds to torsion library... 334 for ID in RotBondsInfo["IDs"]: 335 AtomIndices = RotBondsInfo["AtomIndices"][ID] 336 HierarchyClass = RotBondsInfo["HierarchyClass"][ID] 337 338 MatchStatus, MatchInfo = MatchRotatableBondToTorsionLibrary(Mol, AtomIndices, HierarchyClass) 339 TrackRotatableBondsAlertsInfo(RotBondsAlertsInfo, ID, AtomIndices, MatchStatus, MatchInfo) 340 341 RotBondsAlertsStatus = SetupRotatableBondsAlertStatusTotalStrainEnergiesInfo(RotBondsAlertsInfo) 342 343 return (RotBondsAlertsStatus, RotBondsAlertsInfo) 344 345 def InitializeRotatableBondsAlertsInfo(): 346 """Initialize alerts information for rotatable bonds.""" 347 348 RotBondsAlertsInfo = {} 349 RotBondsAlertsInfo["IDs"] = [] 350 351 for DataLabel in ["RotBondsAlertsStatus", "TotalEnergy", "TotalEnergyLowerBound", "TotalEnergyUpperBound", "AnglesNotObservedCount", "MaxSingleEnergy", "MaxSingleEnergyAlertsCount"]: 352 RotBondsAlertsInfo[DataLabel] = None 353 354 for DataLabel in ["MatchStatus", "MaxSingleEnergyAlertStatus", "AtomIndices", "TorsionAtomIndices", "TorsionAngle", "HierarchyClassName", "HierarchySubClassName", "TorsionRuleNodeID", "TorsionRuleSMARTS", "EnergyMethod", "AngleNotObserved", "Energy", "EnergyLowerBound", "EnergyUpperBound"]: 355 RotBondsAlertsInfo[DataLabel] = {} 356 357 return RotBondsAlertsInfo 358 359 def TrackRotatableBondsAlertsInfo(RotBondsAlertsInfo, ID, AtomIndices, MatchStatus, MatchInfo): 360 """Track alerts information for rotatable bonds.""" 361 362 if MatchInfo is None or len(MatchInfo) == 0: 363 TorsionAtomIndices, TorsionAngle, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRuleSMARTS, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = [None] * 11 364 else: 365 TorsionAtomIndices, TorsionAngle, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRuleSMARTS, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = MatchInfo 366 367 # Track torsion match information... 368 RotBondsAlertsInfo["IDs"].append(ID) 369 RotBondsAlertsInfo["MatchStatus"][ID] = MatchStatus 370 RotBondsAlertsInfo["AtomIndices"][ID] = AtomIndices 371 RotBondsAlertsInfo["TorsionAtomIndices"][ID] = TorsionAtomIndices 372 RotBondsAlertsInfo["TorsionAngle"][ID] = TorsionAngle 373 RotBondsAlertsInfo["HierarchyClassName"][ID] = HierarchyClassName 374 RotBondsAlertsInfo["HierarchySubClassName"][ID] = HierarchySubClassName 375 RotBondsAlertsInfo["TorsionRuleNodeID"][ID] = TorsionRuleNodeID 376 RotBondsAlertsInfo["TorsionRuleSMARTS"][ID] = TorsionRuleSMARTS 377 RotBondsAlertsInfo["EnergyMethod"][ID] = EnergyMethod 378 RotBondsAlertsInfo["AngleNotObserved"][ID] = AngleNotObserved 379 RotBondsAlertsInfo["Energy"][ID] = Energy 380 RotBondsAlertsInfo["EnergyLowerBound"][ID] = EnergyLowerBound 381 RotBondsAlertsInfo["EnergyUpperBound"][ID] = EnergyUpperBound 382 383 def SetupRotatableBondsAlertStatusTotalStrainEnergiesInfo(RotBondsAlertsInfo): 384 """Setup rotatable bonds alert status along with total strain energies.""" 385 386 # Initialize... 387 RotBondsAlertsStatus = False 388 TotalEnergy, TotalEnergyLowerBound, TotalEnergyUpperBound, AnglesNotObservedCount = [None, None, None, None] 389 MaxSingleEnergy, MaxSingleEnergyAlertsCount = [None, None] 390 391 # Initialize max single energy alert status... 392 for ID in RotBondsAlertsInfo["IDs"]: 393 RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] = None 394 395 # Check for torsion angles not obervered in the strain library... 396 AnglesNotObservedCount = 0 397 for ID in RotBondsAlertsInfo["IDs"]: 398 AngleNotObserved = RotBondsAlertsInfo["AngleNotObserved"][ID] 399 if AngleNotObserved is not None and AngleNotObserved: 400 AnglesNotObservedCount += 1 401 402 # Setup alert status for rotable bonds... 403 if AnglesNotObservedCount > 0: 404 if OptionsInfo["FilterTorsionsNotObserved"]: 405 RotBondsAlertsStatus = True 406 else: 407 TotalEnergy = 0.0 408 for ID in RotBondsAlertsInfo["IDs"]: 409 Energy = RotBondsAlertsInfo["Energy"][ID] 410 TotalEnergy += Energy 411 if OptionsInfo["TotalEnergyMode"]: 412 if TotalEnergy > OptionsInfo["TotalEnergyCutoff"]: 413 RotBondsAlertsStatus = True 414 break 415 elif OptionsInfo["MaxSingleEnergyMode"]: 416 if Energy > OptionsInfo["MaxSingleEnergyCutoff"]: 417 RotBondsAlertsStatus = True 418 break 419 elif OptionsInfo["TotalOrMaxSingleEnergyMode"]: 420 if TotalEnergy > OptionsInfo["TotalEnergyCutoff"] or Energy > OptionsInfo["MaxSingleEnergyCutoff"]: 421 RotBondsAlertsStatus = True 422 break 423 424 # Setup energy infomation... 425 TotalEnergy, TotalEnergyLowerBound, TotalEnergyUpperBound = [0.0, 0.0, 0.0] 426 if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]: 427 MaxSingleEnergy, MaxSingleEnergyAlertsCount = [0.0, 0] 428 429 for ID in RotBondsAlertsInfo["IDs"]: 430 Energy = RotBondsAlertsInfo["Energy"][ID] 431 432 # Setup total energy along with the lower and upper bounds... 433 TotalEnergy += Energy 434 TotalEnergyLowerBound += RotBondsAlertsInfo["EnergyLowerBound"][ID] 435 TotalEnergyUpperBound += RotBondsAlertsInfo["EnergyUpperBound"][ID] 436 437 # Setup max single energy and max single energy alerts count... 438 if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]: 439 MaxSingleEnergyAlertStatus = False 440 441 if Energy > MaxSingleEnergy: 442 MaxSingleEnergy = Energy 443 if Energy > OptionsInfo["MaxSingleEnergyCutoff"]: 444 MaxSingleEnergyAlertStatus = True 445 MaxSingleEnergyAlertsCount += 1 446 447 RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] = MaxSingleEnergyAlertStatus 448 449 RotBondsAlertsInfo["RotBondsAlertsStatus"] = RotBondsAlertsStatus 450 451 RotBondsAlertsInfo["TotalEnergy"] = TotalEnergy 452 RotBondsAlertsInfo["TotalEnergyLowerBound"] = TotalEnergyLowerBound 453 RotBondsAlertsInfo["TotalEnergyUpperBound"] = TotalEnergyUpperBound 454 455 RotBondsAlertsInfo["AnglesNotObservedCount"] = AnglesNotObservedCount 456 457 RotBondsAlertsInfo["MaxSingleEnergy"] = MaxSingleEnergy 458 RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"] = MaxSingleEnergyAlertsCount 459 460 return RotBondsAlertsStatus 461 462 def MatchRotatableBondToTorsionLibrary(Mol, RotBondAtomIndices, RotBondHierarchyClass): 463 """Match rotatable bond to torsion library.""" 464 465 if TorsionLibraryUtil.IsSpecificHierarchyClass(TorsionLibraryInfo, RotBondHierarchyClass): 466 MatchStatus, MatchInfo = MatchRotatableBondAgainstSpecificHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass) 467 if not MatchStatus: 468 MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass) 469 else: 470 MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass) 471 472 return (MatchStatus, MatchInfo) 473 474 def MatchRotatableBondAgainstSpecificHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass): 475 """Match rotatable bond against a specific hierarchy class.""" 476 477 HierarchyClassElementNode = None 478 if RotBondHierarchyClass in TorsionLibraryInfo["SpecificClasses"]["ElementNode"]: 479 HierarchyClassElementNode = TorsionLibraryInfo["SpecificClasses"]["ElementNode"][RotBondHierarchyClass] 480 481 if HierarchyClassElementNode is None: 482 return (False, None, None, None) 483 484 TorsionLibraryUtil.TrackHierarchyClassElementNode(TorsionLibraryInfo, HierarchyClassElementNode) 485 MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, HierarchyClassElementNode) 486 TorsionLibraryUtil.RemoveLastHierarchyClassElementNodeFromTracking(TorsionLibraryInfo) 487 488 return (MatchStatus, MatchInfo) 489 490 def MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass): 491 """Match rotatable bond against a generic hierarchy class.""" 492 493 HierarchyClassElementNode = TorsionLibraryUtil.GetGenericHierarchyClassElementNode(TorsionLibraryInfo) 494 if HierarchyClassElementNode is None: 495 return (False, None) 496 497 TorsionLibraryUtil.TrackHierarchyClassElementNode(TorsionLibraryInfo, HierarchyClassElementNode) 498 499 # Match hierarchy subclasses before matching torsion rules... 500 MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchySubClasses(Mol, RotBondAtomIndices, HierarchyClassElementNode) 501 502 if not MatchStatus: 503 MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyTorsionRules(Mol, RotBondAtomIndices, HierarchyClassElementNode) 504 505 TorsionLibraryUtil.RemoveLastHierarchyClassElementNodeFromTracking(TorsionLibraryInfo) 506 507 return (MatchStatus, MatchInfo) 508 509 def MatchRotatableBondAgainstGenericHierarchySubClasses(Mol, RotBondAtomIndices, HierarchyClassElementNode): 510 """Match rotatable bond againat generic hierarchy subclasses.""" 511 512 for ElementChildNode in HierarchyClassElementNode: 513 if ElementChildNode.tag != "hierarchySubClass": 514 continue 515 516 SubClassMatchStatus = ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 517 518 if SubClassMatchStatus: 519 MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 520 521 if MatchStatus: 522 return (MatchStatus, MatchInfo) 523 524 return(False, None) 525 526 def MatchRotatableBondAgainstGenericHierarchyTorsionRules(Mol, RotBondAtomIndices, HierarchyClassElementNode): 527 """Match rotatable bond againat torsion rules generic hierarchy class.""" 528 529 for ElementChildNode in HierarchyClassElementNode: 530 if ElementChildNode.tag != "torsionRule": 531 continue 532 533 MatchStatus, MatchInfo = ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 534 535 if MatchStatus: 536 return (MatchStatus, MatchInfo) 537 538 return(False, None) 539 540 def ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode): 541 """Process element node to recursively match rotatable bond against hierarchy 542 subclasses and torsion rules.""" 543 544 for ElementChildNode in ElementNode: 545 if ElementChildNode.tag == "hierarchySubClass": 546 SubClassMatchStatus = ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 547 548 if SubClassMatchStatus: 549 TorsionLibraryUtil.TrackHierarchySubClassElementNode(TorsionLibraryInfo, ElementChildNode) 550 551 MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 552 if MatchStatus: 553 TorsionLibraryUtil.RemoveLastHierarchySubClassElementNodeFromTracking(TorsionLibraryInfo) 554 return (MatchStatus, MatchInfo) 555 556 TorsionLibraryUtil.RemoveLastHierarchySubClassElementNodeFromTracking(TorsionLibraryInfo) 557 558 elif ElementChildNode.tag == "torsionRule": 559 MatchStatus, MatchInfo = ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 560 561 if MatchStatus: 562 return (MatchStatus, MatchInfo) 563 564 return (False, None) 565 566 def ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode): 567 """Process hierarchy subclass element to match rotatable bond.""" 568 569 # Setup subclass SMARTS pattern mol... 570 SubClassPatternMol = TorsionLibraryUtil.SetupHierarchySubClassElementPatternMol(TorsionLibraryInfo, ElementNode) 571 if SubClassPatternMol is None: 572 return False 573 574 # Match SMARTS pattern... 575 SubClassPatternMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, SubClassPatternMol, Mol.GetSubstructMatches(SubClassPatternMol, useChirality = False)) 576 if len(SubClassPatternMatches) == 0: 577 return False 578 579 # Match rotatable bond indices... 580 RotBondAtomIndex1, RotBondAtomIndex2 = RotBondAtomIndices 581 MatchStatus = False 582 for SubClassPatternMatch in SubClassPatternMatches: 583 if len(SubClassPatternMatch) == 2: 584 # Matched to pattern containing map atom numbers ":2" and ":3"... 585 CentralAtomsIndex1, CentralAtomsIndex2 = SubClassPatternMatch 586 elif len(SubClassPatternMatch) == 4: 587 # Matched to pattern containing map atom numbers ":1", ":2", ":3" and ":4"... 588 CentralAtomsIndex1 = SubClassPatternMatch[1] 589 CentralAtomsIndex2 = SubClassPatternMatch[2] 590 elif len(SubClassPatternMatch) == 3: 591 SubClassSMARTSPattern = ElementNode.get("smarts") 592 if TorsionLibraryUtil.DoesSMARTSContainsMappedAtoms(SubClassSMARTSPattern, [":2", ":3", ":4"]): 593 # Matched to pattern containing map atom numbers ":2", ":3" and ":4"... 594 CentralAtomsIndex1 = SubClassPatternMatch[0] 595 CentralAtomsIndex2 = SubClassPatternMatch[1] 596 else: 597 # Matched to pattern containing map atom numbers ":1", ":2" and ":3"... 598 CentralAtomsIndex1 = SubClassPatternMatch[1] 599 CentralAtomsIndex2 = SubClassPatternMatch[2] 600 else: 601 continue 602 603 if CentralAtomsIndex1 != CentralAtomsIndex2: 604 if ((CentralAtomsIndex1 == RotBondAtomIndex1 and CentralAtomsIndex2 == RotBondAtomIndex2) or (CentralAtomsIndex1 == RotBondAtomIndex2 and CentralAtomsIndex2 == RotBondAtomIndex1)): 605 MatchStatus = True 606 break 607 608 return (MatchStatus) 609 610 def ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode): 611 """Process torsion rule element to match rotatable bond.""" 612 613 # Retrieve torsions matched to rotatable bond... 614 TorsionAtomIndicesList, TorsionAnglesList = MatchTorsionRuleToRotatableBond(Mol, RotBondAtomIndices, ElementNode) 615 if TorsionAtomIndicesList is None: 616 return (False, None) 617 618 # Setup torsion angles and enery bin information for matched torsion rule... 619 TorsionRuleAnglesInfo = TorsionLibraryUtil.SetupTorsionRuleAnglesInfo(TorsionLibraryInfo, ElementNode) 620 if TorsionRuleAnglesInfo is None: 621 return (False, None) 622 623 # Setup highest strain energy for matched torsions... 624 TorsionAtomIndices, TorsionAngle, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = SelectHighestStrainEnergyTorsionForRotatableBond(TorsionRuleAnglesInfo, TorsionAtomIndicesList, TorsionAnglesList) 625 626 # Setup hierarchy class and subclass names... 627 HierarchyClassName, HierarchySubClassName = TorsionLibraryUtil.SetupHierarchyClassAndSubClassNamesForRotatableBond(TorsionLibraryInfo) 628 629 # Setup rule node ID... 630 TorsionRuleNodeID = ElementNode.get("NodeID") 631 632 # Setup SMARTS... 633 TorsionRuleSMARTS = ElementNode.get("smarts") 634 if " " in TorsionRuleSMARTS: 635 TorsionRuleSMARTS = TorsionRuleSMARTS.replace(" ", "") 636 637 # Setup match info... 638 MatchInfo = [TorsionAtomIndices, TorsionAngle, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRuleSMARTS, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound] 639 640 # Setup match status... 641 MatchStatus = True 642 643 return (MatchStatus, MatchInfo) 644 645 def SelectHighestStrainEnergyTorsionForRotatableBond(TorsionRuleAnglesInfo, TorsionAtomIndicesList, TorsionAnglesList): 646 """Select highest strain energy torsion matched to a rotatable bond.""" 647 648 TorsionAtomIndices, TorsionAngle, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = [None] * 7 649 ValidEnergyValue, ValidCurrentEnergyValue = [False] * 2 650 651 FirstTorsion = True 652 for Index in range(0, len(TorsionAtomIndicesList)): 653 CurrentTorsionAtomIndices = TorsionAtomIndicesList[Index] 654 CurrentTorsionAngle = TorsionAnglesList[Index] 655 656 if FirstTorsion: 657 FirstTorsion = False 658 EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = SetupStrainEnergyForRotatableBond(TorsionRuleAnglesInfo, CurrentTorsionAngle) 659 TorsionAtomIndices = CurrentTorsionAtomIndices 660 TorsionAngle = CurrentTorsionAngle 661 ValidEnergyValue = IsEnergyValueValid(Energy) 662 continue 663 664 # Select highest strain energy... 665 CurrentEnergyMethod, CurrentAngleNotObserved, CurrentEnergy, CurrentEnergyLowerBound, CurrentEnergyUpperBound = SetupStrainEnergyForRotatableBond(TorsionRuleAnglesInfo, CurrentTorsionAngle) 666 ValidCurrentEnergyValue = IsEnergyValueValid(CurrentEnergy) 667 668 UpdateValues = False 669 if ValidEnergyValue and ValidCurrentEnergyValue: 670 if CurrentEnergy > Energy: 671 UpdateValues = True 672 elif ValidCurrentEnergyValue: 673 if not ValidEnergyValue: 674 UpdateValues = True 675 676 if UpdateValues: 677 EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = [CurrentEnergyMethod, CurrentAngleNotObserved, CurrentEnergy, CurrentEnergyLowerBound, CurrentEnergyUpperBound] 678 TorsionAtomIndices = CurrentTorsionAtomIndices 679 TorsionAngle = CurrentTorsionAngle 680 681 return (TorsionAtomIndices, TorsionAngle, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound) 682 683 def IsEnergyValueValid(Value): 684 """Check for valid energy value.""" 685 686 return False if (Value is None or math.isnan(Value) or math.isinf(Value)) else True 687 688 def SetupStrainEnergyForRotatableBond(TorsionRuleAnglesInfo, TorsionAngle): 689 """Setup strain energy for rotatable bond.""" 690 691 if TorsionRuleAnglesInfo["EnergyMethodExact"]: 692 return (SetupStrainEnergyForRotatableBondByExactMethod(TorsionRuleAnglesInfo, TorsionAngle)) 693 elif TorsionRuleAnglesInfo["EnergyMethodApproximate"]: 694 return (SetupStrainEnergyForRotatableBondByApproximateMethod(TorsionRuleAnglesInfo, TorsionAngle)) 695 else: 696 EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = [None, None, None, None, None] 697 return (EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound) 698 699 def SetupStrainEnergyForRotatableBondByExactMethod(TorsionRuleAnglesInfo, TorsionAngle): 700 """Setup strain energy for rotatable bond by exact method.""" 701 702 EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = ["Exact", None, None, None, None] 703 704 # Map angle to energy bin numbers... 705 BinNum = math.ceil(TorsionAngle / 10) + 17 706 PreviousBinNum = (BinNum + 35) % 36 707 708 # Bin angle from -170 to 180 by the right end points... 709 BinAngleRightSide = (BinNum - 17) * 10 710 711 # Angle offset towards the left of the bin from the right end point... 712 AngleOffset = TorsionAngle - BinAngleRightSide 713 714 BinEnergy = TorsionRuleAnglesInfo["HistogramEnergy"][BinNum] 715 PreviousBinEnergy = TorsionRuleAnglesInfo["HistogramEnergy"][PreviousBinNum] 716 Energy = BinEnergy + (BinEnergy - PreviousBinEnergy)/10.0 * AngleOffset 717 718 BinEnergyLowerBound = TorsionRuleAnglesInfo["HistogramEnergyLowerBound"][BinNum] 719 PreviousBinEnergyLowerBound = TorsionRuleAnglesInfo["HistogramEnergyLowerBound"][PreviousBinNum] 720 EnergyLowerBound = BinEnergyLowerBound + (BinEnergyLowerBound - PreviousBinEnergyLowerBound)/10.0 * AngleOffset 721 722 BinEnergyUpperBound = TorsionRuleAnglesInfo["HistogramEnergyUpperBound"][BinNum] 723 PreviousBinEnergyUpperBound = TorsionRuleAnglesInfo["HistogramEnergyUpperBound"][PreviousBinNum] 724 EnergyUpperBound = BinEnergyUpperBound + (BinEnergyUpperBound - PreviousBinEnergyUpperBound)/10.0 * AngleOffset 725 726 return (EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound) 727 728 def SetupStrainEnergyForRotatableBondByApproximateMethod(TorsionRuleAnglesInfo, TorsionAngle): 729 """Setup strain energy for rotatable bond by approximate method.""" 730 731 EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = ["Approximate", True, None, None, None] 732 733 for AngleID in TorsionRuleAnglesInfo["IDs"]: 734 Tolerance2 = TorsionRuleAnglesInfo["Tolerance2"][AngleID] 735 Theta0 = TorsionRuleAnglesInfo["Theta0"][AngleID] 736 737 AngleDiff = TorsionLibraryUtil.CalculateTorsionAngleDifference(TorsionAngle, Theta0) 738 if abs(AngleDiff) <= Tolerance2: 739 Beta1 = TorsionRuleAnglesInfo["Beta1"][AngleID] 740 Beta2 = TorsionRuleAnglesInfo["Beta2"][AngleID] 741 742 Energy = Beta1*(AngleDiff ** 2) + Beta2*(AngleDiff** 4) 743 744 # Estimates of lower and upper bound are not available for 745 # approximate method... 746 EnergyLowerBound = Energy 747 EnergyUpperBound = Energy 748 749 AngleNotObserved = False 750 751 break 752 753 return (EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound) 754 755 def MatchTorsionRuleToRotatableBond(Mol, RotBondAtomIndices, ElementNode): 756 """Retrieve matched torsions for torsion rule matched to rotatable bond.""" 757 758 # Get torsion matches... 759 TorsionMatches = GetMatchesForTorsionRule(Mol, ElementNode) 760 if TorsionMatches is None or len(TorsionMatches) == 0: 761 return (None, None) 762 763 # Identify all torsion matches corresponding to central atoms in RotBondAtomIndices... 764 RotBondAtomIndex1, RotBondAtomIndex2 = RotBondAtomIndices 765 RotBondTorsionMatches, RotBondTorsionAngles = [None] * 2 766 767 for TorsionMatch in TorsionMatches: 768 CentralAtomIndex1 = TorsionMatch[1] 769 CentralAtomIndex2 = TorsionMatch[2] 770 771 if ((CentralAtomIndex1 == RotBondAtomIndex1 and CentralAtomIndex2 == RotBondAtomIndex2) or (CentralAtomIndex1 == RotBondAtomIndex2 and CentralAtomIndex2 == RotBondAtomIndex1)): 772 TorsionAngle = CalculateTorsionAngle(Mol, TorsionMatch) 773 if RotBondTorsionMatches is None: 774 RotBondTorsionMatches = [] 775 RotBondTorsionAngles = [] 776 RotBondTorsionMatches.append(TorsionMatch) 777 RotBondTorsionAngles.append(TorsionAngle) 778 779 return (RotBondTorsionMatches, RotBondTorsionAngles) 780 781 def CalculateTorsionAngle(Mol, TorsionMatch): 782 """Calculate torsion angle.""" 783 784 # Calculate torsion angle using torsion atom indices.. 785 MolConf = Mol.GetConformer(0) 786 TorsionAngle = rdMolTransforms.GetDihedralDeg(MolConf, TorsionMatch[0], TorsionMatch[1], TorsionMatch[2], TorsionMatch[3]) 787 TorsionAngle = round(TorsionAngle, 2) 788 789 return TorsionAngle 790 791 def GetMatchesForTorsionRule(Mol, ElementNode): 792 """Get matches for torsion rule.""" 793 794 # Match torsions... 795 TorsionMatches = GetSubstructureMatchesForTorsionRule(Mol, ElementNode) 796 797 if TorsionMatches is None or len(TorsionMatches) == 0: 798 return TorsionMatches 799 800 # Filter torsion matches... 801 FiltertedTorsionMatches = [] 802 for TorsionMatch in TorsionMatches: 803 if len(TorsionMatch) != 4: 804 continue 805 806 # Ignore matches containing hydrogen atoms as first or last atom... 807 if Mol.GetAtomWithIdx(TorsionMatch[0]).GetAtomicNum() == 1: 808 continue 809 if Mol.GetAtomWithIdx(TorsionMatch[3]).GetAtomicNum() == 1: 810 continue 811 812 FiltertedTorsionMatches.append(TorsionMatch) 813 814 return FiltertedTorsionMatches 815 816 def GetSubstructureMatchesForTorsionRule(Mol, ElementNode): 817 """Get substructure matches for a torsion rule.""" 818 819 # Setup torsion rule SMARTS pattern mol.... 820 TorsionRuleNodeID = ElementNode.get("NodeID") 821 TorsionSMARTSPattern = ElementNode.get("smarts") 822 TorsionPatternMol = TorsionLibraryUtil.SetupTorsionRuleElementPatternMol(TorsionLibraryInfo, ElementNode, TorsionRuleNodeID, TorsionSMARTSPattern) 823 if TorsionPatternMol is None: 824 return None 825 826 # Match torsions... 827 TorsionMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = False)) 828 829 return TorsionMatches 830 831 def InitializeTorsionAlertsSummaryInfo(): 832 """Initialize torsion alerts summary.""" 833 834 if OptionsInfo["CountMode"]: 835 return None 836 837 if not OptionsInfo["TrackAlertsSummaryInfo"]: 838 return None 839 840 TorsionAlertsSummaryInfo = {} 841 TorsionAlertsSummaryInfo["RuleIDs"] = [] 842 843 for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "EnergyMethod", "MaxSingleEnergyAlertTypes", "MaxSingleEnergyAlertTypesMolCount"]: 844 TorsionAlertsSummaryInfo[DataLabel] = {} 845 846 return TorsionAlertsSummaryInfo 847 848 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo): 849 """Track torsion alerts summary information for matched torsion rules in a 850 molecule.""" 851 852 if OptionsInfo["CountMode"]: 853 return 854 855 if not OptionsInfo["TrackAlertsSummaryInfo"]: 856 return 857 858 if RotBondsAlertsInfo is None: 859 return 860 861 MolAlertsInfo = {} 862 MolAlertsInfo["RuleIDs"] = [] 863 MolAlertsInfo["MaxSingleEnergyAlertTypes"] = {} 864 865 for ID in RotBondsAlertsInfo["IDs"]: 866 if not RotBondsAlertsInfo["MatchStatus"][ID]: 867 continue 868 869 if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo): 870 continue 871 872 MaxSingleEnergyAlertType = SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]) 873 874 TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID] 875 TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID] 876 877 # Track data for torsion alert summary information across molecules... 878 if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]: 879 TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID) 880 TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID 881 882 TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS 883 TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassName"][ID] 884 TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassName"][ID] 885 886 TorsionAlertsSummaryInfo["EnergyMethod"][TorsionRuleNodeID] = RotBondsAlertsInfo["EnergyMethod"][ID] 887 888 # Initialize number of alert types across all molecules... 889 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {} 890 891 # Initialize number of molecules flagged by each alert type... 892 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID] = {} 893 894 if MaxSingleEnergyAlertType not in TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]: 895 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0 896 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0 897 898 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1 899 900 # Track data for torsion alert information in a molecule... 901 if TorsionRuleNodeID not in MolAlertsInfo["MaxSingleEnergyAlertTypes"]: 902 MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID) 903 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {} 904 905 if MaxSingleEnergyAlertType not in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]: 906 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0 907 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1 908 909 # Track number of molecules flagged by a specific torsion alert... 910 for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]: 911 for MaxSingleEnergyAlertType in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]: 912 if MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType]: 913 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1 914 915 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo): 916 """Write out torsion alerts summary informatio to a CSV file.""" 917 918 if OptionsInfo["CountMode"]: 919 return 920 921 if not OptionsInfo["OutfileSummaryMode"]: 922 return 923 924 if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0: 925 return 926 927 # Write headers... 928 QuoteValues = True 929 Values = ["TorsionRule", "HierarchyClass", "HierarchySubClass", "EnergyMethod", "MaxSingleEnergyTorsionAlertTypes", "MaxSingleEnergyTorsionAlertCount", "MaxSingleEnergyTorsionAlertMolCount"] 930 Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues)) 931 932 SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo) 933 934 # Write alerts information... 935 for ID in SortedRuleIDs: 936 # Remove any double quotes in SMARTS... 937 RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID] 938 RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I) 939 940 HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID] 941 HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID] 942 943 EnergyMethod = TorsionAlertsSummaryInfo["EnergyMethod"][ID] 944 945 MaxSingleEnergyAlertTypes = [] 946 MaxSingleEnergyAlertTypesCount = [] 947 MaxSingleEnergyAlertTypesMolCount = [] 948 for MaxSingleEnergyAlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]): 949 MaxSingleEnergyAlertTypes.append(MaxSingleEnergyAlertType) 950 MaxSingleEnergyAlertTypesCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID][MaxSingleEnergyAlertType]) 951 MaxSingleEnergyAlertTypesMolCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][MaxSingleEnergyAlertType]) 952 953 Values = [RuleSMARTS, HierarchyClassName, HierarchySubClassName, EnergyMethod, "%s" % MiscUtil.JoinWords(MaxSingleEnergyAlertTypes, ","), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesCount, ",")), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesMolCount, ","))] 954 Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues)) 955 956 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo): 957 """Sort torsion rule IDs by alert types molecule count in descending order.""" 958 959 SortedRuleIDs = [] 960 961 RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"] 962 if len(RuleIDs) == 0: 963 return SortedRuleIDs 964 965 # Setup a map from AlertTypesMolCount to IDs for sorting alerts... 966 RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"] 967 MolCountMap = {} 968 for ID in RuleIDs: 969 MolCount = 0 970 for AlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]): 971 MolCount += TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][AlertType] 972 MolCountMap[ID] = MolCount 973 974 SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True) 975 976 return SortedRuleIDs 977 978 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo): 979 """Write out torsion alerts SD files for individual torsion rules.""" 980 981 if OptionsInfo["CountMode"]: 982 return 983 984 if not OptionsInfo["OutfilesFilteredByRulesMode"]: 985 return 986 987 if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0: 988 return 989 990 # Setup a molecule reader for filtered molecules... 991 FilteredMols = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"]) 992 993 # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules... 994 TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo) 995 996 # Setup writers... 997 ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs) 998 999 for Mol in FilteredMols: 1000 # Retrieve torsion alerts info... 1001 TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo) 1002 if TorsionAlertsInfo is None: 1003 continue 1004 1005 for TorsionRuleID in TorsionRuleIDs: 1006 if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]: 1007 continue 1008 1009 WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo) 1010 1011 CloseByRuleOutfilesWriters(ByRuleOutfilesWriters) 1012 1013 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo): 1014 """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules.""" 1015 1016 # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to 1017 # the least number of unique molecules... 1018 RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo) 1019 1020 # Select torsion rule IDs for writing out SD files... 1021 if not OptionsInfo["OutfilesFilteredByRulesAllMode"]: 1022 MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"] 1023 if MaxRuleIDs < len(RuleIDs): 1024 RuleIDs = RuleIDs[0:MaxRuleIDs] 1025 1026 return RuleIDs 1027 1028 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo): 1029 """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds.""" 1030 1031 TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"] 1032 TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None 1033 1034 if TorsionAlerts is None or len(TorsionAlerts) == 0: 1035 return None 1036 1037 # Initialize for tracking by rule IDs... 1038 TorsionAlertsInfo = {} 1039 TorsionAlertsInfo["RuleIDs"] = [] 1040 1041 TorsionAlertsInfo["RuleSMARTS"] = {} 1042 TorsionAlertsInfo["HierarchyClassName"] = {} 1043 TorsionAlertsInfo["HierarchySubClassName"] = {} 1044 TorsionAlertsInfo["EnergyMethod"] = {} 1045 1046 TorsionAlertsInfo["AtomIndices"] = {} 1047 TorsionAlertsInfo["TorsionAtomIndices"] = {} 1048 TorsionAlertsInfo["TorsionAngle"] = {} 1049 1050 TorsionAlertsInfo["Energy"] = {} 1051 TorsionAlertsInfo["EnergyLowerBound"] = {} 1052 TorsionAlertsInfo["EnergyUpperBound"] = {} 1053 1054 TorsionAlertsInfo["AngleNotObserved"] = {} 1055 TorsionAlertsInfo["MaxSingleEnergyAlertType"] = {} 1056 1057 TorsionAlertsInfo["AnglesNotObservedCount"] = {} 1058 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"] = {} 1059 1060 ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"] 1061 TorsionAlertsSetSize = 12 1062 1063 TorsionAlertsWords = TorsionAlerts.split() 1064 if len(TorsionAlertsWords) % TorsionAlertsSetSize: 1065 MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize)) 1066 1067 ID = 0 1068 for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize): 1069 ID += 1 1070 1071 RotBondIndices, TorsionIndices, TorsionAngle, Energy, EnergyLowerBound, EnergyUpperBound, HierarchyClass, HierarchySubClass, TorsionRule, EnergyMethod, AngleNotObserved, MaxSingleEnergyAlertType = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize] 1072 RotBondIndices = RotBondIndices.split(ValuesDelimiter) 1073 TorsionIndices = TorsionIndices.split(ValuesDelimiter) 1074 1075 if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]: 1076 MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule) 1077 continue 1078 TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule] 1079 1080 # Track data for torsion alerts in a molecule... 1081 if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]: 1082 TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID) 1083 1084 TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule 1085 TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass 1086 TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass 1087 TorsionAlertsInfo["EnergyMethod"][TorsionRuleNodeID] = EnergyMethod 1088 1089 TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = [] 1090 TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = [] 1091 TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID] = [] 1092 1093 TorsionAlertsInfo["Energy"][TorsionRuleNodeID] = [] 1094 TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID] = [] 1095 TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID] = [] 1096 TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID] = [] 1097 TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID] = [] 1098 1099 TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] = 0 1100 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] = 0 1101 1102 # Track multiple values for a rule ID... 1103 TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices) 1104 TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices) 1105 TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID].append(TorsionAngle) 1106 1107 TorsionAlertsInfo["Energy"][TorsionRuleNodeID].append(Energy) 1108 TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID].append(EnergyLowerBound) 1109 TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID].append(EnergyUpperBound) 1110 TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID].append(AngleNotObserved) 1111 1112 TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID].append(MaxSingleEnergyAlertType) 1113 1114 # Count angles not observer for a rule ID... 1115 if AngleNotObserved == 'Yes': 1116 TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] += 1 1117 1118 # Count max single energy alert for a rule ID... 1119 if MaxSingleEnergyAlertType == 'Yes': 1120 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] += 1 1121 1122 return TorsionAlertsInfo 1123 1124 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo): 1125 """Write out molecule.""" 1126 1127 if OptionsInfo["CountMode"]: 1128 return True 1129 1130 SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo) 1131 1132 try: 1133 Writer.write(Mol) 1134 except Exception as ErrMsg: 1135 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg)) 1136 return False 1137 1138 return True 1139 1140 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo): 1141 """Setup molecule properties containing alerts information for rotatable bonds.""" 1142 1143 if not OptionsInfo["OutfileAlerts"]: 1144 return 1145 1146 SDFieldIDsToLabels = OptionsInfo["SDFieldIDsToLabels"] 1147 Precision = OptionsInfo["Precision"] 1148 1149 # Setup rotatable bonds count... 1150 RotBondsCount = 0 1151 if RotBondsAlertsInfo is not None: 1152 RotBondsCount = len(RotBondsAlertsInfo["IDs"]) 1153 Mol.SetProp(SDFieldIDsToLabels["RotBondsCountLabel"], "%s" % RotBondsCount) 1154 1155 if RotBondsAlertsInfo is not None: 1156 # Setup total energy along with lower and upper bounds... 1157 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergy"], Precision)) 1158 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLowerBoundCILabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyLowerBound"], Precision)) 1159 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyUpperBoundCILabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyUpperBound"], Precision)) 1160 1161 # Setup max single energy and alert count... 1162 if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]: 1163 Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyLabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["MaxSingleEnergy"], Precision)) 1164 Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyAlertsCountLabel"], "%s" % ("NA" if RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"] is None else RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"])) 1165 1166 Mol.SetProp(SDFieldIDsToLabels["AnglesNotObservedCountLabel"], "%s" % ("NA" if RotBondsAlertsInfo["AnglesNotObservedCount"] is None else RotBondsAlertsInfo["AnglesNotObservedCount"])) 1167 1168 1169 # Setup alert information for rotatable bonds... 1170 AlertsInfoValues = [] 1171 1172 # Delimiter for multiple values corresponding to specific set of information for 1173 # a rotatable bond. For example: TorsionAtomIndices 1174 ValuesDelim = OptionsInfo["IntraSetValuesDelim"] 1175 1176 # Delimiter for various values for a rotatable bond... 1177 RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"] 1178 1179 # Delimiter for values corresponding to multiple rotatable bonds... 1180 AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"] 1181 1182 if RotBondsAlertsInfo is not None: 1183 for ID in RotBondsAlertsInfo["IDs"]: 1184 if not RotBondsAlertsInfo["MatchStatus"][ID]: 1185 continue 1186 1187 if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo): 1188 continue 1189 1190 RotBondValues = [] 1191 1192 # Bond atom indices... 1193 Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]] 1194 RotBondValues.append(ValuesDelim.join(Values)) 1195 1196 # Torsion atom indices... 1197 TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim) 1198 RotBondValues.append(TorsionAtomIndices) 1199 1200 # Torsion angle... 1201 RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngle"][ID]) 1202 1203 # Energy along with its lower and upper bound confidence interval... 1204 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["Energy"][ID], Precision)) 1205 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyLowerBound"][ID], Precision)) 1206 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyUpperBound"][ID], Precision)) 1207 1208 # Hierarchy class and subclass names... 1209 RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassName"][ID]) 1210 RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassName"][ID]) 1211 1212 # Torsion rule SMARTS... 1213 RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]) 1214 1215 # Energy method... 1216 RotBondValues.append("%s" % RotBondsAlertsInfo["EnergyMethod"][ID]) 1217 1218 # Angle not observed... 1219 RotBondValues.append("%s" % SetupAngleNotObservedValue(RotBondsAlertsInfo["AngleNotObserved"][ID])) 1220 1221 # Max single energy alert status... 1222 RotBondValues.append("%s" % SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID])) 1223 1224 # Track joined values for a rotatable bond... 1225 AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues)) 1226 1227 if len(AlertsInfoValues): 1228 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues))) 1229 1230 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo): 1231 """Write out molecule.""" 1232 1233 if OptionsInfo["CountMode"]: 1234 return 1235 1236 SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo) 1237 1238 Writer.write(Mol) 1239 1240 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo): 1241 """Setup molecule properties containing alerts information for torsion alerts 1242 fileted by Rule IDs.""" 1243 1244 # Delete torsion alerts information for rotatable bonds... 1245 if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]): 1246 Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]) 1247 1248 # Delimiter for values... 1249 IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"] 1250 InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"] 1251 1252 # Setup alert rule information... 1253 AlertRuleInfoValues = [] 1254 1255 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID]) 1256 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID]) 1257 1258 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID]) 1259 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["EnergyMethod"][TorsionRuleID]) 1260 1261 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues))) 1262 1263 # Setup max single energy alert count for torsion rule... 1264 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxSingleEnergyAlertsCountLabel"], "%s" % TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleID]) 1265 1266 # Setup angle not observed count for torsion rule... 1267 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAnglesNotObservedCountLabel"], "%s" % TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleID]) 1268 1269 # Setup torsion rule alerts... 1270 # "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod AngleNotObserved MaxSingleEnergyAlert) 1271 AlertsInfoValues = [] 1272 for Index in range(0, len(TorsionAlertsInfo["AtomIndices"][TorsionRuleID])): 1273 RotBondInfoValues = [] 1274 1275 # Bond atom indices... 1276 Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]] 1277 RotBondInfoValues.append(IntraSetValuesDelim.join(Values)) 1278 1279 # Torsion atom indices retrieved from the filtered SD file and stored as strings... 1280 Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]] 1281 RotBondInfoValues.append(IntraSetValuesDelim.join(Values)) 1282 1283 # Torsion angle... 1284 RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngle"][TorsionRuleID][Index]) 1285 1286 # Energy and its bounds... 1287 RotBondInfoValues.append(TorsionAlertsInfo["Energy"][TorsionRuleID][Index]) 1288 RotBondInfoValues.append(TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleID][Index]) 1289 RotBondInfoValues.append(TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleID][Index]) 1290 1291 # Angle not observed...... 1292 RotBondInfoValues.append(TorsionAlertsInfo["AngleNotObserved"][TorsionRuleID][Index]) 1293 1294 # Max single energy alert type... 1295 RotBondInfoValues.append(TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleID][Index]) 1296 1297 # Track alerts informaiton... 1298 AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues)) 1299 1300 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"], "%s" % (InterSetValuesDelim.join(AlertsInfoValues))) 1301 1302 def SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo): 1303 """Skip rotatble bond alert info for a specific bond during writing to output files.""" 1304 1305 if not OptionsInfo["OutfileAlertsOnly"]: 1306 return False 1307 1308 if RotBondsAlertsInfo["RotBondsAlertsStatus"] is None: 1309 return True 1310 1311 Status = False 1312 if OptionsInfo["TotalEnergyMode"]: 1313 if not RotBondsAlertsInfo["RotBondsAlertsStatus"]: 1314 Status = True 1315 elif OptionsInfo["MaxSingleEnergyMode"]: 1316 if RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] is None or not RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]: 1317 Status = True 1318 elif OptionsInfo["TotalOrMaxSingleEnergyMode"]: 1319 if not RotBondsAlertsInfo["RotBondsAlertsStatus"]: 1320 Status = True 1321 1322 return Status 1323 1324 def SetupEnergyValueForSDField(Value, Precision): 1325 """Setup energy value for SD field.""" 1326 1327 if Value is None or math.isnan(Value) or math.isinf(Value): 1328 return "NA" 1329 1330 return "%.*f" % (Precision, Value) 1331 1332 def SetupAngleNotObservedValue(Value): 1333 """Setup angle not observed value.""" 1334 1335 if Value is None: 1336 return "NA" 1337 1338 return "Yes" if Value else "No" 1339 1340 def SetupMaxSingleEnergyAlertStatusValue(Value): 1341 """Setup max single energy alert status value.""" 1342 1343 if Value is None: 1344 return "NA" 1345 1346 return "Yes" if Value else "No" 1347 1348 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim): 1349 """Setup torsion atom indices value for output files.""" 1350 1351 Values = ["%s" % Value for Value in TorsionAtomIndicesList] 1352 1353 return ValuesDelim.join(Values) 1354 1355 def SetupOutfilesWriters(): 1356 """Setup molecule and summary writers.""" 1357 1358 OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None} 1359 1360 # Writers for SD files... 1361 WriterRemaining, WriterFiltered = SetupMoleculeWriters() 1362 OutfilesWriters["WriterRemaining"] = WriterRemaining 1363 OutfilesWriters["WriterFiltered"] = WriterFiltered 1364 1365 # Writer for alert summary CSV file... 1366 WriterAlertSummary = SetupAlertSummaryWriter() 1367 OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary 1368 1369 return OutfilesWriters 1370 1371 def SetupMoleculeWriters(): 1372 """Setup molecule writers.""" 1373 1374 Writer = None 1375 WriterFiltered = None 1376 1377 if OptionsInfo["CountMode"]: 1378 return (Writer, WriterFiltered) 1379 1380 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 1381 if Writer is None: 1382 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 1383 MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"]) 1384 1385 if OptionsInfo["OutfileFilteredMode"]: 1386 WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"]) 1387 if WriterFiltered is None: 1388 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"]) 1389 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"]) 1390 1391 return (Writer, WriterFiltered) 1392 1393 def SetupAlertSummaryWriter(): 1394 """Setup a alert summary writer.""" 1395 1396 Writer = None 1397 1398 if OptionsInfo["CountMode"]: 1399 return Writer 1400 1401 if not OptionsInfo["OutfileSummaryMode"]: 1402 return Writer 1403 1404 Outfile = OptionsInfo["OutfileSummary"] 1405 Writer = open(Outfile, "w") 1406 if Writer is None: 1407 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 1408 1409 MiscUtil.PrintInfo("Generating file %s..." % Outfile) 1410 1411 return Writer 1412 1413 def CloseOutfilesWriters(OutfilesWriters): 1414 """Close outfile writers.""" 1415 1416 for WriterType, Writer in OutfilesWriters.items(): 1417 if Writer is not None: 1418 Writer.close() 1419 1420 def SetupByRuleOutfilesWriters(RuleIDs): 1421 """Setup by rule outfiles writers.""" 1422 1423 # Initialize... 1424 OutfilesWriters = {} 1425 for RuleID in RuleIDs: 1426 OutfilesWriters[RuleID] = None 1427 1428 if OptionsInfo["CountMode"]: 1429 return OutfilesWriters 1430 1431 if not OptionsInfo["OutfilesFilteredByRulesMode"]: 1432 return OutfilesWriters 1433 1434 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1435 OutfilesRoot = "%s_Filtered_TopRule" % FileName 1436 OutfilesExt = "sdf" 1437 1438 MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"] 1439 MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt)) 1440 1441 # Delete any existing output files... 1442 Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt)) 1443 if len(Outfiles): 1444 MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt)) 1445 for Outfile in Outfiles: 1446 try: 1447 os.remove(Outfile) 1448 except Exception as ErrMsg: 1449 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg) 1450 1451 RuleIndex = 0 1452 for RuleID in RuleIDs: 1453 RuleIndex += 1 1454 Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt) 1455 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 1456 if Writer is None: 1457 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 1458 1459 OutfilesWriters[RuleID] = Writer 1460 1461 return OutfilesWriters 1462 1463 def CloseByRuleOutfilesWriters(OutfilesWriters): 1464 """Close by rule outfile writers.""" 1465 1466 for RuleID, Writer in OutfilesWriters.items(): 1467 if Writer is not None: 1468 Writer.close() 1469 1470 def ProcessTorsionLibraryInfo(): 1471 """Process torsion library information.""" 1472 1473 RetrieveTorsionLibraryInfo() 1474 ListTorsionLibraryInfo() 1475 1476 def RetrieveTorsionLibraryInfo(Quiet = False): 1477 """Retrieve torsion library information.""" 1478 1479 TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"] 1480 if TorsionLibraryFilePath is None: 1481 TorsionLibraryFile = "TorsionStrainEnergyLibrary.xml" 1482 MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath() 1483 TorsionLibraryFilePath = os.path.join(MayaChemToolsDataDir, TorsionLibraryFile) 1484 if not Quiet: 1485 MiscUtil.PrintInfo("\nRetrieving data from default torsion strain energy library file %s..." % TorsionLibraryFile) 1486 else: 1487 TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"] 1488 if not Quiet: 1489 MiscUtil.PrintInfo("\nRetrieving data from torsion strain energy library file %s..." % TorsionLibraryFilePath) 1490 1491 TorsionLibraryInfo["TorsionLibElementTree"] = TorsionLibraryUtil.RetrieveTorsionLibraryInfo(TorsionLibraryFilePath) 1492 1493 def ListTorsionLibraryInfo(): 1494 """List torsion library information.""" 1495 1496 TorsionLibraryUtil.ListTorsionLibraryInfo(TorsionLibraryInfo["TorsionLibElementTree"]) 1497 1498 def SetupTorsionLibraryInfo(Quiet = False): 1499 """Setup torsion library information for matching rotatable bonds.""" 1500 1501 if not Quiet: 1502 MiscUtil.PrintInfo("\nSetting up torsion library information for matching rotatable bonds...") 1503 1504 TorsionLibraryUtil.SetupTorsionLibraryInfoForMatchingRotatableBonds(TorsionLibraryInfo) 1505 1506 def ProcessRotatableBondsSMARTSMode(): 1507 """"Process SMARTS pattern for rotatable bonds.""" 1508 1509 RotBondsMode = OptionsInfo["RotBondsSMARTSMode"] 1510 RotBondsSMARTSPattern = None 1511 if re.match("NonStrict", RotBondsMode, re.I): 1512 RotBondsSMARTSPattern = "[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]" 1513 elif re.match("SemiStrict", RotBondsMode, re.I): 1514 RotBondsSMARTSPattern = "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]" 1515 elif re.match("Strict", RotBondsMode, re.I): 1516 RotBondsSMARTSPattern = "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]" 1517 elif re.match("Specify", RotBondsMode, re.I): 1518 RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPatternSpecified"] 1519 RotBondsSMARTSPattern = RotBondsSMARTSPattern.strip() 1520 if not len(RotBondsSMARTSPattern): 1521 MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode) 1522 else: 1523 MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode) 1524 1525 RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPattern) 1526 if RotBondsPatternMol is None: 1527 if re.match("Specify", RotBondsMode, re.I): 1528 MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPattern)) 1529 else: 1530 MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The default rotatable bonds SMARTS pattern, \"%s\", used for, \"%s\", value of \"-r, --rotBondsSMARTSMode\" option is not valid." % (RotBondsSMARTSPattern, RotBondsMode)) 1531 1532 OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern 1533 1534 def ProcessSDFieldLabelsOption(): 1535 """Process SD data field label option.""" 1536 1537 ParamsOptionName = "--outfileSDFieldLabels" 1538 ParamsOptionValue = Options["--outfileSDFieldLabels"] 1539 1540 ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TotalEnergyLabel": "TotalEnergy", "TotalEnergyLowerBoundCILabel": "TotalEnergyLowerBoundCI", "TotalEnergyUpperBoundCILabel": "TotalEnergyUpperBoundCI", "MaxSingleEnergyLabel": "MaxSingleEnergy", "MaxSingleEnergyAlertsCountLabel": "MaxSingleEnergyAlertsCount", "AnglesNotObservedCountLabel": "AnglesNotObservedCount", "TorsionAlertsLabel": "TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass HierarchySubClass TorsionRule EnergyMethod AngleNotObserved MaxSingleEnergyAlert)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionRule EnergyMethod)", "TorsionRuleMaxSingleEnergyAlertsCountLabel": "TorsionRuleMaxSingleEnergyAlertsCount", "TorsionRuleAnglesNotObservedCountLabel": "TorsionRuleAnglesNotObservedCount", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved MaxSingleEnergyAlert)"} 1541 1542 if re.match("^auto$", ParamsOptionValue, re.I): 1543 OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels 1544 return 1545 1546 # Setup a canonical paramater names... 1547 ValidParamNames = [] 1548 CanonicalParamNamesMap = {} 1549 for ParamName in sorted(ParamsIDsToLabels): 1550 ValidParamNames.append(ParamName) 1551 CanonicalParamNamesMap[ParamName.lower()] = ParamName 1552 1553 ParamsOptionValue = ParamsOptionValue.strip() 1554 if not ParamsOptionValue: 1555 PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName) 1556 1557 ParamsOptionValueWords = ParamsOptionValue.split(",") 1558 if len(ParamsOptionValueWords) % 2: 1559 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName)) 1560 1561 # Validate paramater name and value pairs... 1562 for Index in range(0, len(ParamsOptionValueWords), 2): 1563 Name = ParamsOptionValueWords[Index].strip() 1564 Value = ParamsOptionValueWords[Index + 1].strip() 1565 1566 CanonicalName = Name.lower() 1567 if not CanonicalName in CanonicalParamNamesMap: 1568 MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames))) 1569 1570 ParamName = CanonicalParamNamesMap[CanonicalName] 1571 ParamValue = Value 1572 1573 # Set value... 1574 ParamsIDsToLabels[ParamName] = ParamValue 1575 1576 OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels 1577 1578 def ProcessOptions(): 1579 """Process and validate command line arguments and options.""" 1580 1581 MiscUtil.PrintInfo("Processing options...") 1582 1583 # Validate options... 1584 ValidateOptions() 1585 1586 TotalEnergyMode, MaxSingleEnergyMode, TotalOrMaxSingleEnergyMode = [False] * 3 1587 AlertsMode = Options["--alertsMode"] 1588 if re.match("^TotalEnergy$", AlertsMode, re.I): 1589 TotalEnergyMode = True 1590 elif re.match("^MaxSingleEnergy$", AlertsMode, re.I): 1591 MaxSingleEnergyMode = True 1592 elif re.match("^TotalOrMaxSingleEnergy$", AlertsMode, re.I): 1593 TotalOrMaxSingleEnergyMode = True 1594 OptionsInfo["AlertsMode"] = AlertsMode 1595 OptionsInfo["TotalEnergyMode"] = TotalEnergyMode 1596 OptionsInfo["MaxSingleEnergyMode"] = MaxSingleEnergyMode 1597 OptionsInfo["TotalOrMaxSingleEnergyMode"] = TotalOrMaxSingleEnergyMode 1598 1599 OptionsInfo["FilterTorsionsNotObserved"] = True if re.match("^yes$", Options["--filterTorsionsNotObserved"], re.I) else False 1600 1601 OptionsInfo["MaxSingleEnergyCutoff"] = float(Options["--alertsMaxSingleEnergyCutoff"]) 1602 OptionsInfo["TotalEnergyCutoff"] = float(Options["--alertsTotalEnergyCutoff"]) 1603 1604 OptionsInfo["Infile"] = Options["--infile"] 1605 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 1606 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 1607 1608 OptionsInfo["Outfile"] = Options["--outfile"] 1609 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) 1610 1611 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1612 OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt) 1613 OptionsInfo["OutfileFiltered"] = OutfileFiltered 1614 OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False 1615 1616 OptionsInfo["OutfileSummary"] = "%s_AlertsSummary.csv" % (FileName) 1617 1618 OutfileSummaryMode = Options["--outfileSummary"] 1619 if re.match("^auto$", OutfileSummaryMode, re.I): 1620 OutfileSummaryMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no' 1621 OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", OutfileSummaryMode, re.I) else False 1622 1623 if re.match("^yes$", Options["--outfileSummary"], re.I): 1624 if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I): 1625 MiscUtil.PrintError("The value \"%s\" specified for \"--outfileSummary\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"])) 1626 1627 OutfilesFilteredByRulesMode = Options["--outfilesFilteredByRules"] 1628 if re.match("^auto$", OutfilesFilteredByRulesMode, re.I): 1629 OutfilesFilteredByRulesMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no' 1630 OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", OutfilesFilteredByRulesMode, re.I) else False 1631 1632 if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I): 1633 if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I): 1634 MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"])) 1635 1636 OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False 1637 1638 OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"] 1639 if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I): 1640 OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount) 1641 OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount 1642 OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False 1643 1644 OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False 1645 1646 if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I): 1647 if not re.match("^yes$", Options["--outfileAlerts"], re.I): 1648 MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"])) 1649 1650 OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"] 1651 OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False 1652 1653 ProcessSDFieldLabelsOption() 1654 1655 OptionsInfo["Overwrite"] = Options["--overwrite"] 1656 OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False 1657 1658 OptionsInfo["Precision"] = int(Options["--precision"]) 1659 1660 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 1661 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 1662 1663 OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"] 1664 OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"] 1665 ProcessRotatableBondsSMARTSMode() 1666 1667 OptionsInfo["TorsionEnergyLibraryFile"] = None 1668 if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I): 1669 OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"] 1670 1671 # Setup delimiter for writing out torsion alert information to output files... 1672 OptionsInfo["IntraSetValuesDelim"] = "," 1673 OptionsInfo["InterSetValuesDelim"] = " " 1674 1675 def RetrieveOptions(): 1676 """Retrieve command line arguments and options.""" 1677 1678 # Get options... 1679 global Options 1680 Options = docopt(_docoptUsage_) 1681 1682 # Set current working directory to the specified directory... 1683 WorkingDir = Options["--workingdir"] 1684 if WorkingDir: 1685 os.chdir(WorkingDir) 1686 1687 # Handle examples option... 1688 if "--examples" in Options and Options["--examples"]: 1689 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1690 sys.exit(0) 1691 1692 def ProcessListTorsionLibraryOption(): 1693 """Process list torsion library information.""" 1694 1695 # Validate and process dataFile option for listing torsion library information... 1696 OptionsInfo["TorsionEnergyLibraryFile"] = None 1697 if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I): 1698 MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"]) 1699 OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"] 1700 1701 RetrieveTorsionLibraryInfo() 1702 ListTorsionLibraryInfo() 1703 1704 def ValidateOptions(): 1705 """Validate option values.""" 1706 1707 MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "TotalEnergy MaxSingleEnergy TotalOrMaxSingleEnergy") 1708 1709 MiscUtil.ValidateOptionFloatValue("--alertsMaxSingleEnergyCutoff", Options["--alertsMaxSingleEnergyCutoff"], {">": 0.0}) 1710 MiscUtil.ValidateOptionFloatValue("--alertsTotalEnergyCutoff", Options["--alertsTotalEnergyCutoff"], {">": 0.0}) 1711 1712 MiscUtil.ValidateOptionTextValue("--filterTorsionsNotObserved", Options["--filterTorsionsNotObserved"], "yes no") 1713 1714 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1715 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol") 1716 1717 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 1718 if re.match("^filter$", Options["--mode"], re.I): 1719 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1720 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1721 1722 MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no") 1723 1724 MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no auto") 1725 if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I): 1726 MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0}) 1727 1728 MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no auto") 1729 MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no") 1730 MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly") 1731 1732 MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count") 1733 if re.match("^filter$", Options["--mode"], re.I): 1734 if not Options["--outfile"]: 1735 MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option") 1736 1737 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 1738 1739 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 1740 1741 MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify") 1742 if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I): 1743 if not Options["--rotBondsSMARTSPattern"]: 1744 MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option") 1745 1746 if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I): 1747 MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"]) 1748 1749 # Setup a usage string for docopt... 1750 _docoptUsage_ = """ 1751 RDKitFilterTorsionStrainEnergyAlerts.py - Filter torsion strain energy library alerts 1752 1753 Usage: 1754 RDKitFilterTorsionStrainEnergyAlerts.py [--alertsMode <TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy>] 1755 [--alertsMaxSingleEnergyCutoff <Number>] [--alertsTotalEnergyCutoff <Number>] 1756 [--filterTorsionsNotObserved <yes or no>] [--infileParams <Name,Value,...>] [--mode <filter or count>] 1757 [--mp <yes or no>] [--mpParams <Name,Value,...>] [--outfileAlerts <yes or no>] 1758 [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>] 1759 [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>] 1760 [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>] [--outfileParams <Name,Value,...>] 1761 [--overwrite] [--precision <number>] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>] 1762 [--rotBondsSMARTSPattern <SMARTS>] [--torsionEnergyLibraryFile <FileName or auto>] 1763 [-w <dir>] -i <infile> -o <outfile> 1764 RDKitFilterTorsionStrainEnergyAlerts.py [--torsionEnergyLibraryFile <FileName or auto>] -l | --list 1765 RDKitFilterTorsionStrainEnergyAlerts.py -h | --help | -e | --examples 1766 1767 Description: 1768 Filter strained molecules from an input file for torsion strain energy library 1769 [ Ref 153 ] alerts by matching rotatable bonds against SMARTS patterns specified 1770 for torsion rules in a torsion energy library file and write out appropriate 1771 molecules to output files. The molecules must have 3D coordinates in input file. 1772 The default torsion strain energy library file, TorsionStrainEnergyLibrary.xml, 1773 is available under MAYACHEMTOOLS/lib/data directory. 1774 1775 The data in torsion strain energy library file is organized in a hierarchical 1776 manner. It consists of one generic class and six specific classes at the highest 1777 level. Each class contains multiple subclasses corresponding to named functional 1778 groups or substructure patterns. The subclasses consist of torsion rules sorted 1779 from specific to generic torsion patterns. The torsion rule, in turn, contains a 1780 list of peak values for torsion angles and two tolerance values. A pair of tolerance 1781 values define torsion bins around a torsion peak value. 1782 1783 A strain energy calculation method, 'exact' or 'approximate' [ Ref 153 ], is 1784 associated with each torsion rule for calculating torsion strain energy. The 'exact' 1785 stain energy calculation relies on the energy bins available under the energy histogram 1786 consisting of 36 bins covering angles from -180 to 180. The width of each bin is 10 1787 degree. The energy bins are are defined at the right end points. The first and the 1788 last energy bins correspond to -170 and 180 respectively. The torsion angle is mapped 1789 to a energy bin. An angle offset is calculated for the torsion angle from the the right 1790 end point angle of the bin. The strain energy is estimated for the angle offset based 1791 on the energy difference between the current and previous bins. The torsion strain 1792 energy, in terms of torsion energy units (TEUs), corresponds to the sum of bin strain 1793 energy and the angle offset strain energy. 1794 1795 Energy = BinEnergyDiff/10.0 * BinAngleOffset + BinEnergy[BinNum] 1796 1797 Where: 1798 1799 BinEnergyDiff = BinEnergy[BinNum] - BinEnergy[PreviousBinNum] 1800 BinAngleOffset = TorsionAngle - BinAngleRightSide 1801 1802 The 'approximate' strain energy calculation relies on the angle difference between a 1803 torsion angle and the torsion peaks observed for the torsion rules in the torsion 1804 energy library. The torsion angle is matched to a torsion peak based on the value of 1805 torsion angle difference. It must be less than or equal to the value for the second 1806 tolerance 'tolerance2'. Otherwise, the torsion angle is not observed in the torsion 1807 energy library and a value of 'NA' is assigned for torsion energy along with the lower 1808 and upper bounds on energy at 95% confidence interval. The 'approximate' torsion 1809 energy (TEUs) for observed torsion angle is calculated using the following formula: 1810 1811 Energy = beta_1 * (AngleDiff ** 2) + beta_2 * (AngleDiff ** 4) 1812 1813 The coefficients 'beta_1' and 'beta_2' are available for the observed angles in 1814 the torsion strain energy library. The 'AngleDiff' is the difference between the 1815 torsion angle and the matched torsion peak. 1816 1817 For example: 1818 1819 <library> 1820 <hierarchyClass id1="G" id2="G" name="GG"> 1821 ... 1822 </hierarchyClass> 1823 <hierarchyClass id1="C" id2="O" name="CO"> 1824 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]"> 1825 <torsionRule method="exact" smarts= 1826 "[O:1]=[C:2]!@[O:3]~[CH0:4]"> 1827 <angleList> 1828 <angle score="56.52" tolerance1="20.00" 1829 tolerance2="25.00" value="0.0"/> 1830 </angleList> 1831 <histogram> 1832 <bin count="1"/> 1833 ... 1834 </histogram> 1835 <histogram_shifted> 1836 <bin count="0"/> 1837 ... 1838 </histogram_shifted> 1839 <histogram_converted> 1840 <bin energy="4.67... lower="2.14..." upper="Inf"/> 1841 ... 1842 <bin energy="1.86..." lower="1.58..." upper="2.40..."/> 1843 ... 1844 </histogram_converted> 1845 </torsionRule> 1846 <torsionRule method="approximate" smarts= 1847 "[cH0:1][c:2]([cH0])!@[O:3][p:4]"> 1848 <angleList> 1849 <angle beta_1="0.002..." beta_2="-7.843...e-07" 1850 score="27.14" theta_0="-90.0" tolerance1="30.00" 1851 tolerance2="45.00" value="-90.0"/> 1852 ... 1853 </angleList> 1854 <histogram> 1855 <bin count="0"/> 1856 ... 1857 </histogram> 1858 <histogram_shifted> 1859 <bin count="0"/> 1860 ... 1861 </histogram_shifted> 1862 </torsionRule> 1863 ... 1864 ... 1865 </hierarchyClass> 1866 <hierarchyClass id1="N" id2="C" name="NC"> 1867 ... 1868 </hierarchyClass> 1869 <hierarchyClass id1="S" id2="N" name="SN"> 1870 ... 1871 </hierarchyClass> 1872 <hierarchyClass id1="C" id2="S" name="CS"> 1873 ... 1874 </hierarchyClass> 1875 <hierarchyClass id1="C" id2="C" name="CC"> 1876 ... 1877 </hierarchyClass> 1878 <hierarchyClass id1="S" id2="S" name="SS"> 1879 ... 1880 </hierarchyClass> 1881 </library> 1882 1883 The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern. 1884 A custom SMARTS pattern may be optionally specified to detect rotatable bonds. 1885 Each rotatable bond is matched to a torsion rule in the torsion strain energy library. 1886 The strain energy is calculated for each rotatable bond using the calculation 1887 method, 'exact' or 'approximate', associated with the matched torsion rule. 1888 1889 The total strain energy (TEUs) of a molecule corresponds to the sum of 'exact' and 1890 'approximate' strain energies calculated for all matched rotatable bonds in the 1891 molecule. The total strain energy is set to 'NA' for molecules containing a 'approximate' 1892 energy estimate for a torsion angle not observed in the torsion energy library. In 1893 addition, the lower and upper bounds on energy at 95% confidence interval are 1894 set to 'NA'. 1895 1896 The following output files are generated after the filtering: 1897 1898 <OutfileRoot>.sdf 1899 <OutfileRoot>_Filtered.sdf 1900 <OutfileRoot>_AlertsSummary.csv 1901 <OutfileRoot>_Filtered_TopRule*.sdf 1902 1903 The last two set of outfile files, <OutfileRoot>_AlertsSummary.csv and 1904 <OutfileRoot>_<OutfileRoot>_AlertsSummary.csv, are only generated during filtering 1905 by 'MaxSingleEnergy'. 1906 1907 The supported input file formats are: Mol (.mol), SD (.sdf, .sd) 1908 1909 The supported output file formats are: SD (.sdf, .sd) 1910 1911 Options: 1912 -a, --alertsMode <TotalEnergy,...> [default: TotalEnergy] 1913 Torsion strain energy library alert types to use for filtering molecules 1914 containing rotatable bonds based on the calculated values for the total 1915 torsion strain energy of a molecule and the maximum single strain 1916 energy of a rotatable bond in a molecule. 1917 1918 Possible values: TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy 1919 1920 The strain energy cutoff values in terms of torsion energy units (TEUs) are 1921 used to filter molecules as shown below: 1922 1923 AlertsMode AlertsEnergyCutoffs (TEUs) 1924 1925 TotalEnergy >= TotalEnergyCutoff 1926 1927 MaxSingleEnergy >= MaxSingleEnergyCutoff 1928 1929 TotalOrMaxSingleEnergy >= TotalEnergyCutoff 1930 or >= MaxSingleEnergyCutoff 1931 1932 --alertsMaxSingleEnergyCutoff <Number> [default: 1.8] 1933 Maximum single strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules 1934 based on the maximum value of a single strain energy of a rotatable bond 1935 in a molecule. This option is used during 'MaxSingleEnergy' or 1936 'TotalOrMaxSingleEnergy' values of '-a, --alertsMode' option. 1937 1938 The maximum single strain energy must be greater than or equal to the 1939 specified cutoff value for filtering molecules. 1940 --alertsTotalEnergyCutoff <Number> [default: 6.0] 1941 Total strain strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules 1942 based on total strain energy for all rotatable bonds in a molecule. This 1943 option is used during 'TotalEnergy' or 'TotalOrMaxSingleEnergy' 1944 values of '-a, --alertsMode' option. 1945 1946 The total strain energy must be greater than or equal to the specified 1947 cutoff value for filtering molecules. 1948 --filterTorsionsNotObserved <yes or no> [default: no] 1949 Filter molecules containing torsion angles not observed in torsion strain 1950 energy library. It's not possible to calculate torsion strain energies for 1951 these torsions during 'approximate' match to a specified torsion in the 1952 library. 1953 1954 The 'approximate' strain energy calculation relies on the angle difference 1955 between a torsion angle and the torsion peaks observed for the torsion 1956 rules in the torsion energy library. The torsion angle is matched to a 1957 torsion peak based on the value of torsion angle difference. It must be 1958 less than or equal to the value for the second tolerance 'tolerance2'. 1959 Otherwise, the torsion angle is not observed in the torsion energy library 1960 and a value of 'NA' is assigned for torsion energy along with the lower and 1961 upper bounds on energy at 95% confidence interval. 1962 -e, --examples 1963 Print examples. 1964 -h, --help 1965 Print this help message. 1966 -i, --infile <infile> 1967 Input file name. 1968 --infileParams <Name,Value,...> [default: auto] 1969 A comma delimited list of parameter name and value pairs for reading 1970 molecules from files. The supported parameter names for different file 1971 formats, along with their default values, are shown below: 1972 1973 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 1974 1975 -l, --list 1976 List torsion library information without performing any filtering. 1977 -m, --mode <filter or count> [default: filter] 1978 Specify whether to filter molecules for torsion strain energy library [ Ref 153 ] 1979 alerts by matching rotatable bonds against SMARTS patterns specified for 1980 torsion rules to calculate torsion strain energies and write out the rest 1981 of the molecules to an outfile or simply count the number of matched 1982 molecules marked for filtering. 1983 --mp <yes or no> [default: no] 1984 Use multiprocessing. 1985 1986 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 1987 function employing lazy RDKit data iterable. This allows processing of 1988 arbitrary large data sets without any additional requirements memory. 1989 1990 All input data may be optionally loaded into memory by mp.Pool.map() 1991 before starting worker processes in a process pool by setting the value 1992 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 1993 1994 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 1995 data mode may adversely impact the performance. The '--mpParams' section 1996 provides additional information to tune the value of 'chunkSize'. 1997 --mpParams <Name,Value,...> [default: auto] 1998 A comma delimited list of parameter name and value pairs to configure 1999 multiprocessing. 2000 2001 The supported parameter names along with their default and possible 2002 values are shown below: 2003 2004 chunkSize, auto 2005 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 2006 numProcesses, auto [ Default: mp.cpu_count() ] 2007 2008 These parameters are used by the following functions to configure and 2009 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 2010 mp.Pool.imap(). 2011 2012 The chunkSize determines chunks of input data passed to each worker 2013 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 2014 The default value of chunkSize is dependent on the value of 'inputDataMode'. 2015 2016 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 2017 automatically converts RDKit data iterable into a list, loads all data into 2018 memory, and calculates the default chunkSize using the following method 2019 as shown in its code: 2020 2021 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 2022 if extra: chunkSize += 1 2023 2024 For example, the default chunkSize will be 7 for a pool of 4 worker processes 2025 and 100 data items. 2026 2027 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 2028 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 2029 data into memory. Consequently, the size of input data is not known a priori. 2030 It's not possible to estimate an optimal value for the chunkSize. The default 2031 chunkSize is set to 1. 2032 2033 The default value for the chunkSize during 'Lazy' data mode may adversely 2034 impact the performance due to the overhead associated with exchanging 2035 small chunks of data. It is generally a good idea to explicitly set chunkSize to 2036 a larger value during 'Lazy' input data mode, based on the size of your input 2037 data and number of processes in the process pool. 2038 2039 The mp.Pool.map() function waits for all worker processes to process all 2040 the data and return the results. The mp.Pool.imap() function, however, 2041 returns the the results obtained from worker processes as soon as the 2042 results become available for specified chunks of data. 2043 2044 The order of data in the results returned by both mp.Pool.map() and 2045 mp.Pool.imap() functions always corresponds to the input data. 2046 -o, --outfile <outfile> 2047 Output file name. 2048 --outfileAlerts <yes or no> [default: yes] 2049 Write out alerts information to SD output files. 2050 --outfileAlertsMode <All or AlertsOnly> [default: AlertsOnly] 2051 Write alerts information to SD output files for all alerts or only for alerts 2052 specified by '--AlertsMode' option. Possible values: All or AlertsOnly 2053 This option is only valid for 'Yes' value of '--outfileAlerts' option. 2054 2055 The following alerts information is added to SD output files using 2056 'TorsionAlerts' data field: 2057 2058 RotBondIndices TorsionIndices TorsionAngle 2059 Energy EnergyLowerBoundCI EnergyUpperBoundCI 2060 HierarchyClass HierarchySubClass TorsionRule 2061 EnergyMethod AngleNotObserved MaxSingleEnergyAlert 2062 2063 The following data filelds are added to SD output files based on the value of 2064 '--AlertsMode' option: 2065 2066 TotalEnergy 2067 TotalEnergyLowerBoundCI 2068 TotalEnergyUpperBoundCI 2069 2070 MaxSingleEnergy 2071 MaxSingleEnergyAlertsCount 2072 2073 AnglesNotObservedCount 2074 2075 The 'RotBondsCount' is always added to SD output files containing both 2076 remaining and filtered molecules. 2077 2078 Format: 2079 2080 > <RotBondsCount> 2081 Number 2082 2083 > <TotalEnergy> 2084 Number 2085 2086 > <TotalEnergyLowerBoundCI> 2087 Number 2088 2089 > <TotalEnergyUpperBoundCI> 2090 Number 2091 2092 > <MaxSingleEnergy> 2093 Number 2094 2095 > <MaxSingleEnergyAlertsCount> 2096 Number 2097 2098 > <AnglesNotObservedCount> 2099 Number 2100 2101 > <TorsionAlerts (RotBondIndices TorsionIndices TorsionAngle 2102 Energy EnergyLowerBoundCI EnergyUpperBoundCI 2103 HierarchyClass HierarchySubClass TorsionRule 2104 EnergyMethod AngleNotObserved MaxSingleEnergyAlert)> 2105 AtomIndex2,AtomIndex3 AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4 2106 Angle Energy EnergyLowerBoundCI EnergyUpperBoundCI 2107 ClassName SubClassName SMARTS EnergyMethod Yes|No|NA Yes|No|NA 2108 ... ... ... 2109 ... ... ... 2110 2111 A set of 12 values is written out as value of 'TorsionAlerts' data field for 2112 each torsion in a molecule. The space character is used as a delimiter 2113 to separate values with in a set and across set. The comma character 2114 is used to delimit multiple values for each value in a set. 2115 2116 The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited 2117 values representing atom indices for a rotatable bond and the matched 2118 torsion. 2119 2120 The 'Energy' value is the estimated strain energy for the matched torsion. 2121 The 'EnergyLowerBoundCI' and 'EnergyUpperBoundCI' represent lower and 2122 bound energy estimates at 95% confidence interval. The 'EnergyMethod', 2123 exact or approximate, corresponds to the method employed to estimate 2124 torsion strain energy. 2125 2126 The 'AngleNotObserved' is only valid for 'approximate' value of 'EnergyMethod'. 2127 It has three possible values: Yes, No, or NA. The 'Yes' value indicates that 2128 the 'TorsionAngle' is outside the 'tolerance2' of all peaks for the matched 2129 torsion rule in the torsion library. 2130 2131 The 'MaxSingleEnergyAlert' is valid for the following values of '-a, --alertsMode' 2132 option: 'MaxSingleEnergy' or 'TotalOrMaxSingleEnergy'. It has three possible 2133 values: Yes, No, or NA. It's set to 'NA' for 'Yes' or 'NA' values of 2134 'AngleNotObserved'. The 'Yes' value indicates that the estimated torsion 2135 energy is greater than the specified value for '--alertsMaxSingleEnergyCutoff' 2136 option. 2137 2138 For example: 2139 2140 > <RotBondsCount> (1) 2141 14 2142 2143 > <TotalEnergy> (1) 2144 6.8065 2145 2146 > <TotalEnergyLowerBoundCI> (1) 2147 5.9340 2148 2149 > <TotalEnergyUpperBoundCI> (1) 2150 NA 2151 2152 > <MaxSingleEnergy> (1) 2153 1.7108 2154 2155 > <MaxSingleEnergyAlertsCount> (1) 2156 0 2157 2158 > <AnglesNotObservedCount> (1) 2159 0 2160 2161 > <TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy 2162 EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass 2163 HierarchySubClass TorsionRule EnergyMethod AngleNotObserved 2164 MaxSingleEnergyAlert)> (1) 2165 2,1 48,2,1,0 61.90 0.0159 -0.0320 0.0674 CO Ether [O:1][CX4:2]! 2166 @[O:3][CX4:4] Exact NA No 2,3 1,2,3,4 109.12 1.5640 1.1175 NA CC 2167 None/[CX4][CX3] [O:1][CX4:2]!@[CX3:3]=[O:4] Exact NA No 2168 ... ... ... 2169 2170 --outfileFiltered <yes or no> [default: yes] 2171 Write out a file containing filtered molecules. Its name is automatically 2172 generated from the specified output file. Default: <OutfileRoot>_ 2173 Filtered.<OutfileExt>. 2174 --outfilesFilteredByRules <yes or no> [default: auto] 2175 Write out SD files containing filtered molecules for individual torsion 2176 rules triggering alerts in molecules. The name of SD files are automatically 2177 generated from the specified output file. Default file names: <OutfileRoot>_ 2178 Filtered_TopRule*.sdf. 2179 2180 Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option'; 2181 otherwise, 'no'. 2182 2183 The output files are only generated for 'MaxSingleEnergy' of 2184 '-a, --alertsMode' option. 2185 2186 The following alerts information is added to SD output files: 2187 2188 > <RotBondsCount> 2189 Number 2190 2191 > <TotalEnergy> 2192 Number 2193 2194 > <TotalEnergyLowerBoundCI> 2195 Number 2196 2197 > <TotalEnergyUpperBoundCI> 2198 Number 2199 2200 > <MaxSingleEnergy> 2201 Number 2202 2203 > <MaxSingleEnergyAlertsCount> 2204 Number 2205 2206 > <AnglesNotObservedCount> 2207 Number 2208 2209 > <TorsionRule (HierarchyClass HierarchySubClass TorsionRule 2210 EnergyMethod)> 2211 ClassName SubClassName EnergyMethod SMARTS 2212 ... ... ... 2213 2214 > <TorsionRuleMaxSingleEnergyAlertsCount> 2215 Number 2216 2217 > <TorsionRuleAnglesNotObservedCount> 2218 Number 2219 2220 > <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle 2221 Energy EnergyLowerBoundCI EnergyUpperBoundCI 2222 AngleNotObserved MaxSingleEnergyAlert)> 2223 AtomIndex2,AtomIndex3 AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4 2224 Angle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod 2225 Yes|No|NA Yes|No|NA 2226 ... ... ... 2227 2228 For example: 2229 2230 > <RotBondsCount> (1) 2231 8 2232 2233 > <TotalEnergy> (1) 2234 6.1889 2235 2236 > <TotalEnergyLowerBoundCI> (1) 2237 5.1940 2238 2239 > <TotalEnergyUpperBoundCI> (1) 2240 NA 2241 2242 > <MaxSingleEnergy> (1) 2243 1.9576 2244 2245 > <MaxSingleEnergyAlertsCount> (1) 2246 1 2247 2248 > <AnglesNotObservedCount> (1) 2249 0 2250 2251 > <TorsionRule (HierarchyClass HierarchySubClass TorsionRule 2252 EnergyMethod)> (1) 2253 CC None/[CX4:2][CX4:3] [!#1:1][CX4:2]!@[CX4:3][!#1:4] Exact 2254 2255 > <TorsionRuleMaxSingleEnergyAlertsCount> (1) 2256 0 2257 2258 > <TorsionRuleAnglesNotObservedCount> (1) 2259 0 2260 2261 > <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle 2262 Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved 2263 MaxSingleEnergyAlert)> (1) 2264 1,3 0,1,3,4 72.63 0.8946 0.8756 0.9145 NA No 2265 2266 --outfilesFilteredByRulesMaxCount <All or number> [default: 10] 2267 Write out SD files containing filtered molecules for specified number of 2268 top N torsion rules triggering alerts for the largest number of molecules 2269 or for all torsion rules triggering alerts across all molecules. 2270 2271 These output files are only generated for 'MaxSingleEnergy' value of 2272 '-a, --alertsMode' option. 2273 --outfileSummary <yes or no> [default: auto] 2274 Write out a CVS text file containing summary of torsions rules responsible 2275 for triggering torsion alerts. Its name is automatically generated from the 2276 specified output file. Default: <OutfileRoot>_AlertsSummary.csv. 2277 2278 Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option'; 2279 otherwise, 'no'. 2280 2281 The summary output file is only generated for 'MaxSingleEnergy' of 2282 '-a, --alertsMode' option. 2283 2284 The following alerts information is written to summary text file: 2285 2286 TorsionRule, HierarchyClass, HierarchySubClass, EnergyMethod, 2287 MaxSingleEnergyTorsionAlertTypes, MaxSingleEnergyTorsionAlertCount, 2288 MaxSingleEnergyTorsionAlertMolCount 2289 2290 The double quotes characters are removed from SMART patterns before 2291 before writing them to a CSV file. In addition, the torsion rules are sorted by 2292 TorsionAlertMolCount. 2293 --outfileSDFieldLabels <Type,Label,...> [default: auto] 2294 A comma delimited list of SD data field type and label value pairs for writing 2295 torsion alerts information along with molecules to SD files. 2296 2297 The supported SD data field label type along with their default values are 2298 shown below: 2299 2300 For all SD files: 2301 2302 RotBondsCountLabel, RotBondsCount, 2303 2304 TotalEnergyLabel, TotalEnergy, 2305 TotalEnergyLowerBoundCILabel, TotalEnergyLowerBoundCI, 2306 TotalEnergyUpperBoundCILabel, TotalEnergyUpperBoundCI, 2307 2308 MaxSingleEnergyLabel, MaxSingleEnergy, 2309 MaxSingleEnergyAlertsCountLabel, 2310 MaxSingleEnergyAlertsCount 2311 2312 AnglesNotObservedCountLabel, 2313 AnglesNotObservedCount 2314 2315 TorsionAlertsLabel, TorsionAlerts(RotBondIndices TorsionIndices 2316 TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI 2317 HierarchyClass HierarchySubClass TorsionRule 2318 EnergyMethod AngleNotObserved) 2319 2320 For individual SD files filtered by torsion rules: 2321 2322 TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass 2323 EnergyMethod TorsionRule) 2324 TorsionRuleMaxSingleEnergyAlertsCountLabel, 2325 TorsionRuleMaxSingleEnergyAlertsCount, 2326 TorsionRuleAnglesNotObservedCountLabel, 2327 TorsionRuleAnglesNotObservedCount, 2328 TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices 2329 TorsionIndices TorsionAngle Energy EnergyLowerBoundCI 2330 EnergyUpperBoundCI EnergyMethod AngleObserved) 2331 2332 --outfileParams <Name,Value,...> [default: auto] 2333 A comma delimited list of parameter name and value pairs for writing 2334 molecules to files. The supported parameter names for different file 2335 formats, along with their default values, are shown below: 2336 2337 SD: kekulize,yes,forceV3000,no 2338 2339 --overwrite 2340 Overwrite existing files. 2341 --precision <number> [default: 4] 2342 Floating point precision for writing torsion strain energy values. 2343 -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...> [default: SemiStrict] 2344 SMARTS pattern to use for identifying rotatable bonds in a molecule 2345 for matching against torsion rules in the torsion library. Possible values: 2346 NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches 2347 are filtered to ensure that each atom in the rotatable bond is attached to 2348 at least two heavy atoms. 2349 2350 The following SMARTS patterns are used to identify rotatable bonds for 2351 different modes: 2352 2353 NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1] 2354 2355 SemiStrict: 2356 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br) 2357 &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F) 2358 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])] 2359 2360 Strict: 2361 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br) 2362 &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1]) 2363 &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1]) 2364 &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F) 2365 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])] 2366 2367 The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 2368 'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS 2369 specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 2370 derived from 'Strict' SMARTS patterns for its usage in this script. 2371 2372 You may use any arbitrary SMARTS pattern to identify rotatable bonds by 2373 choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its 2374 value via '--rotBondsSMARTSPattern' option. 2375 --rotBondsSMARTSPattern <SMARTS> 2376 SMARTS pattern for identifying rotatable bonds. This option is only valid 2377 for 'Specify' value of '-r, --rotBondsSMARTSMode' option. 2378 -t, --torsionEnergyLibraryFile <FileName or auto> [default: auto] 2379 Specify a XML file name containing data for torsion starin energy library 2380 hierarchy or use default file, TorsionEnergyLibrary.xml, available in 2381 MAYACHEMTOOLS/lib/data directory. 2382 2383 The format of data in local XML file must match format of the data in Torsion 2384 Library [ Ref 153 ] file available in MAYACHEMTOOLS data directory. 2385 -w, --workingdir <dir> 2386 Location of working directory which defaults to the current directory. 2387 2388 Examples: 2389 To filter molecules containing rotatable bonds with total strain energy value 2390 of >= 6.0 (TEUs) based on torsion rules in the torsion energy library and write 2391 write out SD files containing remaining and filtered molecules, type: 2392 2393 % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf 2394 -o Sample3DOut.sdf 2395 2396 To filter molecules containing any rotatable bonds with strain energy value of 2397 >= 1.8 (TEUs) based on torsion rules in the torsion energy library and write out 2398 SD files containing remaining and filtered molecules, and individual SD files for 2399 torsion rules triggering alerts along with appropriate torsion information for 2400 red alerts, type: 2401 2402 % RDKitFilterTorsionStrainEnergyAlerts.py -a MaxSingleEnergy 2403 -i Sample3D.sdf -o Sample3DOut.sdf 2404 2405 To filter molecules containing rotatable bonds with total strain energy value 2406 of >= 6.0 (TEUs) or any single strain energy value of >= 1.8 (TEUs) and write out 2407 SD files containing remaining and filtered molecules, type: 2408 2409 % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy 2410 -i Sample3D.sdf -o Sample3DOut.sdf 2411 2412 To filter molecules containing rotatable bonds with specific cutoff values for 2413 total or single torsion strain energy and write out SD files containing 2414 remaining and filtered molecules, type: 2415 2416 % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy 2417 -i Sample3D.sdf -o Sample3DOut.sdf --alertsTotalEnergyCutoff 6.0 2418 --alertsMaxSingleEnergyCutoff 1.8 2419 2420 To run the first example for filtering molecules and writing out torsion 2421 information for all alert types to SD files, type: 2422 2423 % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf 2424 -o Sample3DOut.sdf --outfileAlertsMode All 2425 2426 To run the first example for filtering molecules in multiprocessing mode on 2427 all available CPUs without loading all data into memory and write out SD files, 2428 type: 2429 2430 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes -i Sample3D.sdf 2431 -o Sample3DOut.sdf 2432 2433 To run the first example for filtering molecules in multiprocessing mode on 2434 all available CPUs by loading all data into memory and write out a SD files, 2435 type: 2436 2437 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes --mpParams 2438 "inputDataMode, InMemory" -i Sample3D.sdf -o Sample3DOut.sdf 2439 2440 To run the first example for filtering molecules in multiprocessing mode on 2441 specific number of CPUs and chunksize without loading all data into memory 2442 and write out SD files, type: 2443 2444 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes --mpParams 2445 "inputDataMode,lazy,numProcesses,4,chunkSize,8" -i Sample3D.sdf 2446 -o Sample3DOut.sdf 2447 2448 To list information about default torsion library file without performing any 2449 filtering, type: 2450 2451 % RDKitFilterTorsionStrainEnergyAlerts.py -l 2452 2453 To list information about a local torsion library XML file without performing 2454 any, filtering, type: 2455 2456 % RDKitFilterTorsionStrainEnergyAlerts.py --torsionEnergyLibraryFile 2457 TorsionStrainEnergyLibrary.xml -l 2458 2459 Author: 2460 Manish Sud (msud@san.rr.com) 2461 2462 Collaborator: 2463 Pat Walters 2464 2465 See also: 2466 RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionLibraryAlerts.py, 2467 RDKitConvertFileFormat.py, RDKitSearchSMARTS.py 2468 2469 Copyright: 2470 Copyright (C) 2023 Manish Sud. All rights reserved. 2471 2472 This script uses the torsion strain energy library developed by Gu, S.; 2473 Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ]. 2474 2475 The torsion strain enegy library is based on the Torsion Library jointly 2476 developed by the University of Hamburg, Center for Bioinformatics, 2477 Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland. 2478 2479 The functionality available in this script is implemented using RDKit, an 2480 open source toolkit for cheminformatics developed by Greg Landrum. 2481 2482 This file is part of MayaChemTools. 2483 2484 MayaChemTools is free software; you can redistribute it and/or modify it under 2485 the terms of the GNU Lesser General Public License as published by the Free 2486 Software Foundation; either version 3 of the License, or (at your option) any 2487 later version. 2488 2489 """ 2490 2491 if __name__ == "__main__": 2492 main()