1 #!/bin/env python 2 # 3 # File: RDKitFilterTorsionLibraryAlerts.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Collaborator: Pat Walters 7 # 8 # Acknowledgments: Wolfgang Guba, Patrick Penner, Levi Pierce 9 # 10 # Copyright (C) 2023 Manish Sud. All rights reserved. 11 # 12 # This script uses the Torsion Library jointly developed by the University 13 # of Hamburg, Center for Bioinformatics, Hamburg, Germany and 14 # F. Hoffmann-La-Roche Ltd., Basel, Switzerland. 15 # 16 # The functionality available in this script is implemented using RDKit, an 17 # open source toolkit for cheminformatics developed by Greg Landrum. 18 # 19 # This file is part of MayaChemTools. 20 # 21 # MayaChemTools is free software; you can redistribute it and/or modify it under 22 # the terms of the GNU Lesser General Public License as published by the Free 23 # Software Foundation; either version 3 of the License, or (at your option) any 24 # later version. 25 # 26 # MayaChemTools is distributed in the hope that it will be useful, but without 27 # any warranty; without even the implied warranty of merchantability of fitness 28 # for a particular purpose. See the GNU Lesser General Public License for more 29 # details. 30 # 31 # You should have received a copy of the GNU Lesser General Public License 32 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 33 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 34 # Boston, MA, 02111-1307, USA. 35 # 36 37 from __future__ import print_function 38 39 # Add local python path to the global path and import standard library modules... 40 import os 41 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 42 import time 43 import re 44 import glob 45 import multiprocessing as mp 46 import math 47 import numpy as np 48 49 # RDKit imports... 50 try: 51 from rdkit import rdBase 52 from rdkit import Chem 53 from rdkit.Chem import rdMolTransforms 54 except ImportError as ErrMsg: 55 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 56 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 57 sys.exit(1) 58 59 # MayaChemTools imports... 60 try: 61 from docopt import docopt 62 import MiscUtil 63 import RDKitUtil 64 import TorsionLibraryUtil 65 except ImportError as ErrMsg: 66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 68 sys.exit(1) 69 70 ScriptName = os.path.basename(sys.argv[0]) 71 Options = {} 72 OptionsInfo = {} 73 74 TorsionLibraryInfo = {} 75 76 def main(): 77 """Start execution of the script.""" 78 79 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 80 81 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 82 83 # Retrieve command line arguments and options... 84 RetrieveOptions() 85 86 if Options["--list"]: 87 # Handle listing of torsion library information... 88 ProcessListTorsionLibraryOption() 89 else: 90 # Process and validate command line arguments and options... 91 ProcessOptions() 92 93 # Perform actions required by the script... 94 PerformFiltering() 95 96 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 97 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 98 99 def PerformFiltering(): 100 """Filter molecules using SMARTS torsion rules in the torsion library file.""" 101 102 # Process torsion library info... 103 ProcessTorsionLibraryInfo() 104 105 # Set up a pattern molecule for rotatable bonds... 106 RotBondsPatternMol = Chem.MolFromSmarts(OptionsInfo["RotBondsSMARTSPattern"]) 107 108 # Setup a molecule reader... 109 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 110 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 111 112 MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols, RotBondsPatternMol) 113 114 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 115 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 116 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount) 117 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount)) 118 119 MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount) 120 MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount)) 121 122 def ProcessMolecules(Mols, RotBondsPatternMol): 123 """Process and filter molecules.""" 124 125 if OptionsInfo["MPMode"]: 126 return ProcessMoleculesUsingMultipleProcesses(Mols, RotBondsPatternMol) 127 else: 128 return ProcessMoleculesUsingSingleProcess(Mols, RotBondsPatternMol) 129 130 def ProcessMoleculesUsingSingleProcess(Mols, RotBondsPatternMol): 131 """Process and filter molecules using a single process.""" 132 133 SetupTorsionLibraryInfo() 134 135 MiscUtil.PrintInfo("\nFiltering molecules...") 136 137 OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"] 138 139 # Set up writers... 140 OutfilesWriters = SetupOutfilesWriters() 141 142 WriterRemaining = OutfilesWriters["WriterRemaining"] 143 WriterFiltered = OutfilesWriters["WriterFiltered"] 144 WriterAlertSummary = OutfilesWriters["WriterAlertSummary"] 145 146 # Initialize alerts summary info... 147 TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo() 148 149 (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5 150 for Mol in Mols: 151 MolCount += 1 152 153 if Mol is None: 154 continue 155 156 if RDKitUtil.IsMolEmpty(Mol): 157 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount)) 158 continue 159 160 # Check for 3D flag... 161 if not Mol.GetConformer().Is3D(): 162 MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount)) 163 continue 164 165 ValidMolCount += 1 166 167 # Identify torsion library alerts for rotatable bonds.. 168 RotBondsAlertsStatus, RotBondsAlertsInfo = IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, RotBondsPatternMol) 169 170 TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo) 171 172 # Write out filtered and remaining molecules... 173 WriteStatus = True 174 if RotBondsAlertsStatus: 175 if OutfileFilteredMode: 176 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo) 177 if WriteStatus: 178 FilteredMolWriteCount += 1 179 else: 180 RemainingMolCount += 1 181 WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo) 182 183 if not WriteStatus: 184 WriteFailedCount += 1 185 186 WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo) 187 CloseOutfilesWriters(OutfilesWriters) 188 189 if FilteredMolWriteCount: 190 WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo) 191 192 return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount) 193 194 def ProcessMoleculesUsingMultipleProcesses(Mols, RotBondsPatternMol): 195 """Process and filter molecules using multiprocessing.""" 196 197 MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...") 198 199 MPParams = OptionsInfo["MPParams"] 200 OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"] 201 202 # Set up writers... 203 OutfilesWriters = SetupOutfilesWriters() 204 205 WriterRemaining = OutfilesWriters["WriterRemaining"] 206 WriterFiltered = OutfilesWriters["WriterFiltered"] 207 WriterAlertSummary = OutfilesWriters["WriterAlertSummary"] 208 209 # Initialize alerts summary info... 210 TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo() 211 212 # Setup data for initializing a worker process... 213 MiscUtil.PrintInfo("Encoding options info and rotatable bond pattern molecule...") 214 OptionsInfo["EncodedRotBondsPatternMol"] = RDKitUtil.MolToBase64EncodedMolString(RotBondsPatternMol) 215 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 216 217 # Setup a encoded mols data iterable for a worker process... 218 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 219 220 # Setup process pool along with data initialization for each process... 221 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 222 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 223 224 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 225 226 # Start processing... 227 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 228 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 229 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 230 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 231 else: 232 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 233 234 (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5 235 for Result in Results: 236 MolCount += 1 237 MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result 238 239 if EncodedMol is None: 240 continue 241 ValidMolCount += 1 242 243 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 244 245 TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo) 246 247 # Write out filtered and remaining molecules... 248 WriteStatus = True 249 if RotBondsAlertsStatus: 250 if OutfileFilteredMode: 251 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo) 252 if WriteStatus: 253 FilteredMolWriteCount += 1 254 else: 255 RemainingMolCount += 1 256 WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo) 257 258 if not WriteStatus: 259 WriteFailedCount += 1 260 261 WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo) 262 CloseOutfilesWriters(OutfilesWriters) 263 264 if FilteredMolWriteCount: 265 WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo) 266 267 return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount) 268 269 def InitializeWorkerProcess(*EncodedArgs): 270 """Initialize data for a worker process.""" 271 272 global Options, OptionsInfo 273 274 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 275 276 # Decode Options and OptionInfo... 277 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 278 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 279 280 # Decode RotBondsPatternMol... 281 OptionsInfo["RotBondsPatternMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRotBondsPatternMol"]) 282 283 # Setup torsion library... 284 RetrieveTorsionLibraryInfo(Quiet = True) 285 SetupTorsionLibraryInfo(Quiet = True) 286 287 def WorkerProcess(EncodedMolInfo): 288 """Process data for a worker process.""" 289 290 MolIndex, EncodedMol = EncodedMolInfo 291 292 if EncodedMol is None: 293 return [MolIndex, None, False, None] 294 295 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 296 if RDKitUtil.IsMolEmpty(Mol): 297 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 298 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 299 return [MolIndex, None, False, None] 300 301 # Check for 3D flag... 302 if not Mol.GetConformer().Is3D(): 303 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 304 MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName) 305 return [MolIndex, None, False, None] 306 307 # Identify torsion library alerts for rotatable bonds.. 308 RotBondsAlertsStatus, RotBondsAlertsInfo = IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, OptionsInfo["RotBondsPatternMol"]) 309 310 return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo] 311 312 def IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, RotBondsPatternMol): 313 """Identify rotatable bonds for torsion library alerts.""" 314 315 # Identify rotatable bonds... 316 RotBondsStatus, RotBondsInfo = TorsionLibraryUtil.IdentifyRotatableBondsForTorsionLibraryMatch(TorsionLibraryInfo, Mol, RotBondsPatternMol) 317 if not RotBondsStatus: 318 return (False, None) 319 320 # Identify alerts for rotatable bonds... 321 RotBondsAlertsStatus, RotBondsAlertsInfo = MatchRotatableBondsToTorsionLibrary(Mol, RotBondsInfo) 322 323 return (RotBondsAlertsStatus, RotBondsAlertsInfo) 324 325 def MatchRotatableBondsToTorsionLibrary(Mol, RotBondsInfo): 326 """Match rotatable bond to torsion library.""" 327 328 # Initialize... 329 RotBondsAlertsInfo = InitializeRotatableBondsAlertsInfo() 330 331 # Match rotatable bonds to torsion library... 332 for ID in RotBondsInfo["IDs"]: 333 AtomIndices = RotBondsInfo["AtomIndices"][ID] 334 HierarchyClass = RotBondsInfo["HierarchyClass"][ID] 335 336 MatchStatus, MatchInfo = MatchRotatableBondToTorsionLibrary(Mol, AtomIndices, HierarchyClass) 337 338 if MatchInfo is None or len(MatchInfo) == 0: 339 AlertType, TorsionAtomIndices, TorsionAngle, TorsionAngleViolation, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRulePeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, TorsionRuleSMARTS = [None] * 11 340 else: 341 AlertType, TorsionAtomIndices, TorsionAngle, TorsionAngleViolation, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRulePeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, TorsionRuleSMARTS = MatchInfo 342 343 # Track alerts information... 344 RotBondsAlertsInfo["IDs"].append(ID) 345 RotBondsAlertsInfo["MatchStatus"][ID] = MatchStatus 346 RotBondsAlertsInfo["AlertTypes"][ID] = AlertType 347 RotBondsAlertsInfo["AtomIndices"][ID] = AtomIndices 348 RotBondsAlertsInfo["TorsionAtomIndices"][ID] = TorsionAtomIndices 349 RotBondsAlertsInfo["TorsionAngles"][ID] = TorsionAngle 350 RotBondsAlertsInfo["TorsionAngleViolations"][ID] = TorsionAngleViolation 351 RotBondsAlertsInfo["HierarchyClassNames"][ID] = HierarchyClassName 352 RotBondsAlertsInfo["HierarchySubClassNames"][ID] = HierarchySubClassName 353 RotBondsAlertsInfo["TorsionRuleNodeID"][ID] = TorsionRuleNodeID 354 RotBondsAlertsInfo["TorsionRulePeaks"][ID] = TorsionRulePeaks 355 RotBondsAlertsInfo["TorsionRuleTolerances1"][ID] = TorsionRuleTolerances1 356 RotBondsAlertsInfo["TorsionRuleTolerances2"][ID] = TorsionRuleTolerances2 357 RotBondsAlertsInfo["TorsionRuleSMARTS"][ID] = TorsionRuleSMARTS 358 359 # Count alert types... 360 if AlertType is not None: 361 if AlertType not in RotBondsAlertsInfo["Count"]: 362 RotBondsAlertsInfo["Count"][AlertType] = 0 363 RotBondsAlertsInfo["Count"][AlertType] += 1 364 365 # Setup alert status for rotatable bonds... 366 RotBondsAlertsStatus = False 367 AlertsCount = 0 368 for ID in RotBondsInfo["IDs"]: 369 if RotBondsAlertsInfo["AlertTypes"][ID] in OptionsInfo["SpecifiedAlertsModeList"]: 370 AlertsCount += 1 371 if AlertsCount >= OptionsInfo["MinAlertsCount"]: 372 RotBondsAlertsStatus = True 373 break 374 375 return (RotBondsAlertsStatus, RotBondsAlertsInfo) 376 377 def InitializeRotatableBondsAlertsInfo(): 378 """Initialize alerts information for rotatable bonds.""" 379 380 RotBondsAlertsInfo = {} 381 RotBondsAlertsInfo["IDs"] = [] 382 383 for DataLabel in ["MatchStatus", "AlertTypes", "AtomIndices", "TorsionAtomIndices", "TorsionAngles", "TorsionAngleViolations", "HierarchyClassNames", "HierarchySubClassNames", "TorsionRuleNodeID", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "TorsionRuleSMARTS", "Count"]: 384 RotBondsAlertsInfo[DataLabel] = {} 385 386 return RotBondsAlertsInfo 387 388 def MatchRotatableBondToTorsionLibrary(Mol, RotBondAtomIndices, RotBondHierarchyClass): 389 """Match rotatable bond to torsion library.""" 390 391 if TorsionLibraryUtil.IsSpecificHierarchyClass(TorsionLibraryInfo, RotBondHierarchyClass): 392 MatchStatus, MatchInfo = MatchRotatableBondAgainstSpecificHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass) 393 if not MatchStatus: 394 MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass) 395 else: 396 MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass) 397 398 return (MatchStatus, MatchInfo) 399 400 def MatchRotatableBondAgainstSpecificHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass): 401 """Match rotatable bond against a specific hierarchy class.""" 402 403 HierarchyClassElementNode = None 404 if RotBondHierarchyClass in TorsionLibraryInfo["SpecificClasses"]["ElementNode"]: 405 HierarchyClassElementNode = TorsionLibraryInfo["SpecificClasses"]["ElementNode"][RotBondHierarchyClass] 406 407 if HierarchyClassElementNode is None: 408 return (False, None, None, None) 409 410 TorsionLibraryUtil.TrackHierarchyClassElementNode(TorsionLibraryInfo, HierarchyClassElementNode) 411 MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, HierarchyClassElementNode) 412 TorsionLibraryUtil.RemoveLastHierarchyClassElementNodeFromTracking(TorsionLibraryInfo) 413 414 return (MatchStatus, MatchInfo) 415 416 def MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass): 417 """Match rotatable bond against a generic hierarchy class.""" 418 419 HierarchyClassElementNode = TorsionLibraryUtil.GetGenericHierarchyClassElementNode(TorsionLibraryInfo) 420 if HierarchyClassElementNode is None: 421 return (False, None) 422 423 TorsionLibraryUtil.TrackHierarchyClassElementNode(TorsionLibraryInfo, HierarchyClassElementNode) 424 425 # Match hierarchy subclasses before matching torsion rules... 426 MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchySubClasses(Mol, RotBondAtomIndices, HierarchyClassElementNode) 427 428 if not MatchStatus: 429 MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyTorsionRules(Mol, RotBondAtomIndices, HierarchyClassElementNode) 430 431 TorsionLibraryUtil.RemoveLastHierarchyClassElementNodeFromTracking(TorsionLibraryInfo) 432 433 return (MatchStatus, MatchInfo) 434 435 def MatchRotatableBondAgainstGenericHierarchySubClasses(Mol, RotBondAtomIndices, HierarchyClassElementNode): 436 """Match rotatable bond againat generic hierarchy subclasses.""" 437 438 for ElementChildNode in HierarchyClassElementNode: 439 if ElementChildNode.tag != "hierarchySubClass": 440 continue 441 442 SubClassMatchStatus = ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 443 444 if SubClassMatchStatus: 445 MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 446 447 if MatchStatus: 448 return (MatchStatus, MatchInfo) 449 450 return(False, None) 451 452 def MatchRotatableBondAgainstGenericHierarchyTorsionRules(Mol, RotBondAtomIndices, HierarchyClassElementNode): 453 """Match rotatable bond againat torsion rules generic hierarchy class.""" 454 455 for ElementChildNode in HierarchyClassElementNode: 456 if ElementChildNode.tag != "torsionRule": 457 continue 458 459 MatchStatus, MatchInfo = ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 460 461 if MatchStatus: 462 return (MatchStatus, MatchInfo) 463 464 return(False, None) 465 466 def ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode): 467 """Process element node to recursively match rotatable bond against hierarchy 468 subclasses and torsion rules.""" 469 470 for ElementChildNode in ElementNode: 471 if ElementChildNode.tag == "hierarchySubClass": 472 SubClassMatchStatus = ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 473 474 if SubClassMatchStatus: 475 TorsionLibraryUtil.TrackHierarchySubClassElementNode(TorsionLibraryInfo, ElementChildNode) 476 477 MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 478 if MatchStatus: 479 TorsionLibraryUtil.RemoveLastHierarchySubClassElementNodeFromTracking(TorsionLibraryInfo) 480 return (MatchStatus, MatchInfo) 481 482 TorsionLibraryUtil.RemoveLastHierarchySubClassElementNodeFromTracking(TorsionLibraryInfo) 483 484 elif ElementChildNode.tag == "torsionRule": 485 MatchStatus, MatchInfo = ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode) 486 487 if MatchStatus: 488 return (MatchStatus, MatchInfo) 489 490 return (False, None) 491 492 def ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode): 493 """Process hierarchy subclass element to match rotatable bond.""" 494 495 # Setup subclass SMARTS pattern mol... 496 SubClassPatternMol = TorsionLibraryUtil.SetupHierarchySubClassElementPatternMol(TorsionLibraryInfo, ElementNode) 497 if SubClassPatternMol is None: 498 return False 499 500 # Match SMARTS pattern... 501 SubClassPatternMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, SubClassPatternMol, Mol.GetSubstructMatches(SubClassPatternMol, useChirality = False)) 502 if len(SubClassPatternMatches) == 0: 503 return False 504 505 # Match rotatable bond indices... 506 RotBondAtomIndex1, RotBondAtomIndex2 = RotBondAtomIndices 507 MatchStatus = False 508 for SubClassPatternMatch in SubClassPatternMatches: 509 if len(SubClassPatternMatch) == 2: 510 # Matched to pattern containing map atom numbers ":2" and ":3"... 511 CentralAtomsIndex1, CentralAtomsIndex2 = SubClassPatternMatch 512 elif len(SubClassPatternMatch) == 4: 513 # Matched to pattern containing map atom numbers ":1", ":2", ":3" and ":4"... 514 CentralAtomsIndex1 = SubClassPatternMatch[1] 515 CentralAtomsIndex2 = SubClassPatternMatch[2] 516 elif len(SubClassPatternMatch) == 3: 517 SubClassSMARTSPattern = ElementNode.get("smarts") 518 if TorsionLibraryUtil.DoesSMARTSContainsMappedAtoms(SubClassSMARTSPattern, [":2", ":3", ":4"]): 519 # Matched to pattern containing map atom numbers ":2", ":3" and ":4"... 520 CentralAtomsIndex1 = SubClassPatternMatch[0] 521 CentralAtomsIndex2 = SubClassPatternMatch[1] 522 else: 523 # Matched to pattern containing map atom numbers ":1", ":2" and ":3"... 524 CentralAtomsIndex1 = SubClassPatternMatch[1] 525 CentralAtomsIndex2 = SubClassPatternMatch[2] 526 else: 527 continue 528 529 if CentralAtomsIndex1 != CentralAtomsIndex2: 530 if ((CentralAtomsIndex1 == RotBondAtomIndex1 and CentralAtomsIndex2 == RotBondAtomIndex2) or (CentralAtomsIndex1 == RotBondAtomIndex2 and CentralAtomsIndex2 == RotBondAtomIndex1)): 531 MatchStatus = True 532 break 533 534 return (MatchStatus) 535 536 def ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode): 537 """Process torsion rule element to match rotatable bond.""" 538 539 # Retrieve torsion matched to rotatable bond... 540 TorsionAtomIndices, TorsionAngle = MatchTorsionRuleToRotatableBond(Mol, RotBondAtomIndices, ElementNode) 541 if TorsionAtomIndices is None: 542 return (False, None) 543 544 # Setup torsion angles info for matched torsion rule... 545 TorsionAnglesInfo = TorsionLibraryUtil.SetupTorsionRuleAnglesInfo(TorsionLibraryInfo, ElementNode) 546 if TorsionAnglesInfo is None: 547 return (False, None) 548 549 # Setup torsion alert type and angle violation... 550 AlertType, TorsionAngleViolation = SetupTorsionAlertTypeForRotatableBond(TorsionAnglesInfo, TorsionAngle) 551 552 # Setup hierarchy class and subclass names... 553 HierarchyClassName, HierarchySubClassName = TorsionLibraryUtil.SetupHierarchyClassAndSubClassNamesForRotatableBond(TorsionLibraryInfo) 554 555 # Setup rule node ID... 556 TorsionRuleNodeID = ElementNode.get("NodeID") 557 558 # Setup SMARTS... 559 TorsionRuleSMARTS = ElementNode.get("smarts") 560 if " " in TorsionRuleSMARTS: 561 TorsionRuleSMARTS = TorsionRuleSMARTS.replace(" ", "") 562 563 # Setup torsion peaks and tolerances... 564 TorsionRulePeaks = TorsionAnglesInfo["ValuesList"] 565 TorsionRuleTolerances1 = TorsionAnglesInfo["Tolerances1List"] 566 TorsionRuleTolerances2 = TorsionAnglesInfo["Tolerances2List"] 567 568 MatchInfo = [AlertType, TorsionAtomIndices, TorsionAngle, TorsionAngleViolation, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRulePeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, TorsionRuleSMARTS] 569 570 # Setup match status... 571 MatchStatus = True 572 573 return (MatchStatus, MatchInfo) 574 575 def MatchTorsionRuleToRotatableBond(Mol, RotBondAtomIndices, ElementNode): 576 """Retrieve matched torsion for torsion rule matched to rotatable bond.""" 577 578 # Get torsion matches... 579 TorsionMatches = GetMatchesForTorsionRule(Mol, ElementNode) 580 if TorsionMatches is None or len(TorsionMatches) == 0: 581 return (None, None) 582 583 # Identify the first torsion match corresponding to central atoms in RotBondAtomIndices... 584 RotBondAtomIndex1, RotBondAtomIndex2 = RotBondAtomIndices 585 for TorsionMatch in TorsionMatches: 586 CentralAtomIndex1 = TorsionMatch[1] 587 CentralAtomIndex2 = TorsionMatch[2] 588 589 if ((CentralAtomIndex1 == RotBondAtomIndex1 and CentralAtomIndex2 == RotBondAtomIndex2) or (CentralAtomIndex1 == RotBondAtomIndex2 and CentralAtomIndex2 == RotBondAtomIndex1)): 590 TorsionAngle = CalculateTorsionAngle(Mol, TorsionMatch) 591 592 return (TorsionMatch, TorsionAngle) 593 594 return (None, None) 595 596 def CalculateTorsionAngle(Mol, TorsionMatch): 597 """Calculate torsion angle.""" 598 599 if type(TorsionMatch[3]) is list: 600 return CalculateTorsionAngleUsingNitrogenLonePairPosition(Mol, TorsionMatch) 601 602 # Calculate torsion angle using torsion atom indices.. 603 MolConf = Mol.GetConformer(0) 604 TorsionAngle = rdMolTransforms.GetDihedralDeg(MolConf, TorsionMatch[0], TorsionMatch[1], TorsionMatch[2], TorsionMatch[3]) 605 TorsionAngle = round(TorsionAngle, 2) 606 607 return TorsionAngle 608 609 def CalculateTorsionAngleUsingNitrogenLonePairPosition(Mol, TorsionMatch): 610 """Calculate torsion angle using nitrogen lone pair positon.""" 611 612 # Setup a carbon atom as position holder for lone pair position... 613 TmpMol = Chem.RWMol(Mol) 614 LonePairAtomIndex = TmpMol.AddAtom(Chem.Atom(6)) 615 616 TmpMolConf = TmpMol.GetConformer(0) 617 TmpMolConf.SetAtomPosition(LonePairAtomIndex, TorsionMatch[3]) 618 619 TorsionAngle = rdMolTransforms.GetDihedralDeg(TmpMolConf, TorsionMatch[0], TorsionMatch[1], TorsionMatch[2], LonePairAtomIndex) 620 TorsionAngle = round(TorsionAngle, 2) 621 622 return TorsionAngle 623 624 def GetMatchesForTorsionRule(Mol, ElementNode): 625 """Get matches for torsion rule.""" 626 627 # Match torsions... 628 TorsionMatches = None 629 if IsNitogenLonePairTorsionRule(ElementNode): 630 TorsionMatches = GetSubstructureMatchesForNitrogenLonePairTorsionRule(Mol, ElementNode) 631 else: 632 TorsionMatches = GetSubstructureMatchesForTorsionRule(Mol, ElementNode) 633 634 if TorsionMatches is None or len(TorsionMatches) == 0: 635 return TorsionMatches 636 637 # Filter torsion matches... 638 FiltertedTorsionMatches = [] 639 for TorsionMatch in TorsionMatches: 640 if len(TorsionMatch) != 4: 641 continue 642 643 # Ignore matches containing hydrogen atoms as first or last atom... 644 if Mol.GetAtomWithIdx(TorsionMatch[0]).GetAtomicNum() == 1: 645 continue 646 if type(TorsionMatch[3]) is int: 647 # May contains a list for type two nitrogen lone pair match... 648 if Mol.GetAtomWithIdx(TorsionMatch[3]).GetAtomicNum() == 1: 649 continue 650 FiltertedTorsionMatches.append(TorsionMatch) 651 652 return FiltertedTorsionMatches 653 654 def GetSubstructureMatchesForTorsionRule(Mol, ElementNode): 655 """Get substructure matches for a torsion rule.""" 656 657 # Setup torsion rule SMARTS pattern mol.... 658 TorsionRuleNodeID = ElementNode.get("NodeID") 659 TorsionSMARTSPattern = ElementNode.get("smarts") 660 TorsionPatternMol = TorsionLibraryUtil.SetupTorsionRuleElementPatternMol(TorsionLibraryInfo, ElementNode, TorsionRuleNodeID, TorsionSMARTSPattern) 661 if TorsionPatternMol is None: 662 return None 663 664 # Match torsions... 665 TorsionMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = False)) 666 667 return TorsionMatches 668 669 def GetSubstructureMatchesForNitrogenLonePairTorsionRule(Mol, ElementNode): 670 """Get substructure matches for a torsion rule containing N_lp.""" 671 672 if IsTypeOneNitogenLonePairTorsionRule(ElementNode): 673 return GetSubstructureMatchesForTypeOneNitrogenLonePairTorsionRule(Mol, ElementNode) 674 elif IsTypeTwoNitogenLonePairTorsionRule(ElementNode): 675 return GetSubstructureMatchesForTypeTwoNitrogenLonePairTorsionRule(Mol, ElementNode) 676 677 return None 678 679 def GetSubstructureMatchesForTypeOneNitrogenLonePairTorsionRule(Mol, ElementNode): 680 """Get substructure matches for a torsion rule containing N_lp and four mapped atoms.""" 681 682 # For example: 683 # [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4] 684 # [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4] 685 # ... ... ... 686 687 TorsionRuleNodeID = ElementNode.get("NodeID") 688 TorsionPatternMol, LonePairMapNumber = SetupNitrogenLonePairTorsionRuleElementInfo(ElementNode, TorsionRuleNodeID) 689 690 if TorsionPatternMol is None: 691 return None 692 693 if LonePairMapNumber is None: 694 return None 695 696 # Match torsions... 697 TorsionMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = False)) 698 699 # Filter matches... 700 FiltertedTorsionMatches = [] 701 for TorsionMatch in TorsionMatches: 702 if len(TorsionMatch) != 4: 703 continue 704 705 # Check for Nitogen atom at LonePairMapNumber... 706 LonePairNitrogenAtom = Mol.GetAtomWithIdx(TorsionMatch[LonePairMapNumber - 1]) 707 if LonePairNitrogenAtom.GetSymbol() != "N": 708 continue 709 710 # Make sure LonePairNitrogenAtom is planar... 711 # test 712 PlanarityStatus = IsLonePairNitrogenAtomPlanar(Mol, LonePairNitrogenAtom) 713 if PlanarityStatus is None: 714 continue 715 716 if not PlanarityStatus: 717 continue 718 719 FiltertedTorsionMatches.append(TorsionMatch) 720 721 return FiltertedTorsionMatches 722 723 def GetSubstructureMatchesForTypeTwoNitrogenLonePairTorsionRule(Mol, ElementNode): 724 """Get substructure matches for a torsion rule containing N_lp and three mapped atoms.""" 725 726 # For example: 727 # [!#1:1][CX4:2]!@[NX3;"N_lp":3] 728 # [!#1:1][$(S(=O)=O):2]!@["N_lp":3]@[Cr3] 729 # [C:1][$(S(=O)=O):2]!@["N_lp":3] 730 # [c:1][$(S(=O)=O):2]!@["N_lp":3] 731 # [!#1:1][$(S(=O)=O):2]!@["N_lp":3] 732 # ... ... ... 733 734 TorsionRuleNodeID = ElementNode.get("NodeID") 735 TorsionPatternMol, LonePairMapNumber = SetupNitrogenLonePairTorsionRuleElementInfo(ElementNode, TorsionRuleNodeID) 736 737 if TorsionPatternMol is None: 738 return None 739 740 if not IsValidTypeTwoNitrogenLonePairMapNumber(LonePairMapNumber): 741 return None 742 743 # Match torsions... 744 TorsionMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = False)) 745 746 # Filter matches... 747 FiltertedTorsionMatches = [] 748 for TorsionMatch in TorsionMatches: 749 if len(TorsionMatch) != 3: 750 continue 751 752 # Check for Nitogen atom at LonePairMapNumber... 753 LonePairNitrogenAtom = Mol.GetAtomWithIdx(TorsionMatch[LonePairMapNumber - 1]) 754 if LonePairNitrogenAtom.GetSymbol() != "N": 755 continue 756 757 # Make sure LonePairNitrogenAtom is not planar... 758 PlanarityStatus = IsLonePairNitrogenAtomPlanar(Mol, LonePairNitrogenAtom) 759 760 if PlanarityStatus is None: 761 continue 762 763 if PlanarityStatus: 764 continue 765 766 # Calculate lone pair coordinates for a non-planar nitrogen... 767 LonePairPosition = CalculateLonePairCoordinatesForNitrogenAtom(Mol, LonePairNitrogenAtom) 768 if LonePairPosition is None: 769 continue 770 771 # Append lone pair coodinate list to list of torsion match containing atom indices... 772 TorsionMatch.append(LonePairPosition) 773 774 # Track torsion matches... 775 FiltertedTorsionMatches.append(TorsionMatch) 776 777 return FiltertedTorsionMatches 778 779 def SetupNitrogenLonePairTorsionRuleElementInfo(ElementNode, TorsionRuleNodeID): 780 """Setup pattern molecule and lone pair map number for type one and type 781 two nitrogen lone pair rules.""" 782 783 TorsionPatternMol, LonePairMapNumber = [None] * 2 784 785 if TorsionRuleNodeID in TorsionLibraryInfo["DataCache"]["TorsionRulePatternMol"]: 786 TorsionPatternMol = TorsionLibraryInfo["DataCache"]["TorsionRulePatternMol"][TorsionRuleNodeID] 787 LonePairMapNumber = TorsionLibraryInfo["DataCache"]["TorsionRuleLonePairMapNumber"][TorsionRuleNodeID] 788 else: 789 # Setup torsion pattern... 790 TorsionSMARTSPattern = ElementNode.get("smarts") 791 TorsionSMARTSPattern, LonePairMapNumber = ProcessSMARTSForNitrogenLonePairTorsionRule(TorsionSMARTSPattern) 792 793 # Setup torsion pattern mol... 794 TorsionPatternMol = Chem.MolFromSmarts(TorsionSMARTSPattern) 795 if TorsionPatternMol is None: 796 MiscUtil.PrintWarning("Ignoring torsion rule element containing invalid map atoms numbers in SMARTS pattern %s" % TorsionSMARTSPattern) 797 798 # Cache data... 799 TorsionLibraryInfo["DataCache"]["TorsionRulePatternMol"][TorsionRuleNodeID] = TorsionPatternMol 800 TorsionLibraryInfo["DataCache"]["TorsionRuleLonePairMapNumber"][TorsionRuleNodeID] = LonePairMapNumber 801 802 return (TorsionPatternMol, LonePairMapNumber) 803 804 def IsLonePairNitrogenAtomPlanar(Mol, NitrogenAtom): 805 """Check for the planarity of nitrogen atom and its three neighbors.""" 806 807 AllowHydrogenNbrs = OptionsInfo["NitrogenLonePairParams"]["AllowHydrogenNbrs"] 808 Tolerance = OptionsInfo["NitrogenLonePairParams"]["PlanarityTolerance"] 809 810 # Get neighbors... 811 if AllowHydrogenNbrs: 812 AtomNeighbors = NitrogenAtom.GetNeighbors() 813 else: 814 AtomNeighbors = RDKitUtil.GetHeavyAtomNeighbors(NitrogenAtom) 815 816 if len(AtomNeighbors) != 3: 817 return None 818 819 # Setup atom positions... 820 AtomPositions = [] 821 MolAtomsPositions = RDKitUtil.GetAtomPositions(Mol) 822 823 # Neighbor positions... 824 for AtomNbr in AtomNeighbors: 825 AtomNbrIndex = AtomNbr.GetIdx() 826 AtomPositions.append(MolAtomsPositions[AtomNbrIndex]) 827 828 # Nitrogen position... 829 NitrogenAtomIndex = NitrogenAtom.GetIdx() 830 AtomPositions.append(MolAtomsPositions[NitrogenAtomIndex]) 831 832 Status = AreFourPointsCoplanar(AtomPositions[0], AtomPositions[1], AtomPositions[2], AtomPositions[3], Tolerance) 833 834 return Status 835 836 def AreFourPointsCoplanar(Point1, Point2, Point3, Point4, Tolerance = 1.0): 837 """Check whether four points are coplanar with in the threshold of 1 degree.""" 838 839 # Setup normalized direction vectors... 840 VectorP2P1 = NormalizeVector(np.subtract(Point2, Point1)) 841 VectorP3P1 = NormalizeVector(np.subtract(Point3, Point1)) 842 VectorP1P4 = NormalizeVector(np.subtract(Point1, Point4)) 843 844 # Calculate angle between VectorP1P4 and normal to vectors VectorP2P1 and VectorP3P1... 845 PlaneP1P2P3Normal = NormalizeVector(np.cross(VectorP2P1, VectorP3P1)) 846 PlanarityAngle = np.arccos(np.clip(np.dot(PlaneP1P2P3Normal, VectorP1P4), -1.0, 1.0)) 847 848 Status = math.isclose(PlanarityAngle, math.radians(90), abs_tol=math.radians(Tolerance)) 849 850 return Status 851 852 def NormalizeVector(Vector): 853 """Normalize vector.""" 854 855 Norm = np.linalg.norm(Vector) 856 857 return Vector if math.isclose(Norm, 0.0, abs_tol = 1e-08) else Vector/Norm 858 859 def CalculateLonePairCoordinatesForNitrogenAtom(Mol, NitrogenAtom): 860 """Calculate approximate lone pair coordinates for non-plannar nitrogen atom.""" 861 862 AllowHydrogenNbrs = OptionsInfo["NitrogenLonePairParams"]["AllowHydrogenNbrs"] 863 864 # Get neighbors... 865 if AllowHydrogenNbrs: 866 AtomNeighbors = NitrogenAtom.GetNeighbors() 867 else: 868 AtomNeighbors = RDKitUtil.GetHeavyAtomNeighbors(NitrogenAtom) 869 870 if len(AtomNeighbors) != 3: 871 return None 872 873 # Setup positions for nitrogen and its neghbors... 874 MolAtomsPositions = RDKitUtil.GetAtomPositions(Mol) 875 876 NitrogenPosition = MolAtomsPositions[NitrogenAtom.GetIdx()] 877 NbrPositions = [] 878 for AtomNbr in AtomNeighbors: 879 NbrPositions.append(MolAtomsPositions[AtomNbr.GetIdx()]) 880 Nbr1Position, Nbr2Position, Nbr3Position = NbrPositions 881 882 # Setup normalized direction vectors... 883 VectorP2P1 = NormalizeVector(np.subtract(Nbr2Position, Nbr1Position)) 884 VectorP3P1 = NormalizeVector(np.subtract(Nbr3Position, Nbr1Position)) 885 VectorP1P4 = NormalizeVector(np.subtract(Nbr1Position, NitrogenPosition)) 886 887 # Calculate angle between VectorP1P4 and normal to vectors VectorP2P1 and VectorP3P1... 888 PlaneP1P2P3Normal = NormalizeVector(np.cross(VectorP2P1, VectorP3P1)) 889 PlanarityAngle = np.arccos(np.clip(np.dot(PlaneP1P2P3Normal, VectorP1P4), -1.0, 1.0)) 890 891 # Check for reversing the direction of the normal... 892 if PlanarityAngle < math.radians(90): 893 PlaneP1P2P3Normal = PlaneP1P2P3Normal * -1 894 895 # Add normal to nitrogen cooridnates for the approximate coordinates of the 896 # one pair. The exact VSEPR coordinates of the lone pair are not necessary to 897 # calculate the torsion angle... 898 LonePairPosition = NitrogenPosition + PlaneP1P2P3Normal 899 900 return list(LonePairPosition) 901 902 def ProcessSMARTSForNitrogenLonePairTorsionRule(SMARTSPattern): 903 """Process SMARTS pattern for a torion rule containing N_lp.""" 904 905 LonePairMapNumber = GetNitrogenLonePairMapNumber(SMARTSPattern) 906 907 # Remove double quotes around N_lp.. 908 SMARTSPattern = re.sub("\"N_lp\"", "N_lp", SMARTSPattern, re.I) 909 910 # Remove N_lp specification from SMARTS pattern for torsion rule... 911 if re.search("\[N_lp", SMARTSPattern, re.I): 912 # Handle missing NX3... 913 SMARTSPattern = re.sub("\[N_lp", "[NX3", SMARTSPattern) 914 else: 915 SMARTSPattern = re.sub(";N_lp", "", SMARTSPattern) 916 917 return (SMARTSPattern, LonePairMapNumber) 918 919 def GetNitrogenLonePairMapNumber(SMARTSPattern): 920 """Get atom map number for nitrogen involved in N_lp.""" 921 922 LonePairMapNumber = None 923 924 SMARTSPattern = re.sub("\"N_lp\"", "N_lp", SMARTSPattern, re.I) 925 MatchedMappedAtoms = re.findall("N_lp:[0-9]", SMARTSPattern, re.I) 926 927 if len(MatchedMappedAtoms) == 1: 928 LonePairMapNumber = int(re.sub("N_lp:", "", MatchedMappedAtoms[0])) 929 930 return LonePairMapNumber 931 932 def IsNitogenLonePairTorsionRule(ElementNode): 933 """Check for the presence of N_lp in SMARTS pattern for a torsion rule.""" 934 935 if "N_lp" not in ElementNode.get("smarts"): 936 return False 937 938 LonePairMatches = re.findall("N_lp", ElementNode.get("smarts"), re.I) 939 940 return True if len(LonePairMatches) == 1 else False 941 942 def IsTypeOneNitogenLonePairTorsionRule(ElementNode): 943 """Check for the presence four mapped atoms in a SMARTS pattern containing 944 N_lp for a torsion rule.""" 945 946 # For example: 947 # [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4] 948 # [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4] 949 # ... ... ... 950 951 MatchedMappedAtoms = re.findall(":[0-9]", ElementNode.get("smarts"), re.I) 952 953 return True if len(MatchedMappedAtoms) == 4 else False 954 955 def IsTypeTwoNitogenLonePairTorsionRule(ElementNode): 956 """Check for the presence three mapped atoms in a SMARTS pattern containing 957 N_lp for a torsion rule.""" 958 959 # For example: 960 # [!#1:1][CX4:2]!@[NX3;"N_lp":3] 961 # [!#1:1][$(S(=O)=O):2]!@["N_lp":3]@[Cr3] 962 # [C:1][$(S(=O)=O):2]!@["N_lp":3] 963 # [c:1][$(S(=O)=O):2]!@["N_lp":3] 964 # [!#1:1][$(S(=O)=O):2]!@["N_lp":3] 965 # 966 967 MatchedMappedAtoms = re.findall(":[0-9]", ElementNode.get("smarts"), re.I) 968 969 return True if len(MatchedMappedAtoms) == 3 else False 970 971 def IsValidTypeTwoNitogenLonePairTorsionRule(ElementNode): 972 """Validate atom map number for nitrogen involved in N_lp for type two nitrogen 973 lone pair torsion rule.""" 974 975 LonePairMapNumber = GetNitrogenLonePairMapNumber(ElementNode.get("smarts")) 976 977 return IsValidTypeTwoNitrogenLonePairMapNumber(LonePairMapNumber) 978 979 def IsValidTypeTwoNitrogenLonePairMapNumber(LonePairMapNumber): 980 """Check that the atom map number is 3.""" 981 982 return True if LonePairMapNumber is not None and LonePairMapNumber == 3 else False 983 984 def SetupTorsionAlertTypeForRotatableBond(TorsionAnglesInfo, TorsionAngle): 985 """Setup torsion alert type and angle violation for a rotatable bond.""" 986 987 TorsionCategory, TorsionAngleViolation = [None, None] 988 989 for ID in TorsionAnglesInfo["IDs"]: 990 if IsTorsionAngleInWithinTolerance(TorsionAngle, TorsionAnglesInfo["Value"][ID], TorsionAnglesInfo["Tolerance1"][ID]): 991 TorsionCategory = "Green" 992 TorsionAngleViolation = 0.0 993 break 994 995 if IsTorsionAngleInWithinTolerance(TorsionAngle, TorsionAnglesInfo["Value"][ID], TorsionAnglesInfo["Tolerance2"][ID]): 996 TorsionCategory = "Orange" 997 TorsionAngleViolation = CalculateTorsionAngleViolation(TorsionAngle, TorsionAnglesInfo["ValuesIn360RangeList"], TorsionAnglesInfo["Tolerances1List"]) 998 break 999 1000 if TorsionCategory is None: 1001 TorsionCategory = "Red" 1002 TorsionAngleViolation = CalculateTorsionAngleViolation(TorsionAngle, TorsionAnglesInfo["ValuesIn360RangeList"], TorsionAnglesInfo["Tolerances2List"]) 1003 1004 return (TorsionCategory, TorsionAngleViolation) 1005 1006 def IsTorsionAngleInWithinTolerance(TorsionAngle, TorsionPeak, TorsionTolerance): 1007 """Check torsion angle against torsion tolerance.""" 1008 1009 TorsionAngleDiff = TorsionLibraryUtil.CalculateTorsionAngleDifference(TorsionPeak, TorsionAngle) 1010 1011 return True if (abs(TorsionAngleDiff) <= TorsionTolerance) else False 1012 1013 def CalculateTorsionAngleViolation(TorsionAngle, TorsionPeaks, TorsionTolerances): 1014 """Calculate torsion angle violation.""" 1015 1016 TorsionAngleViolation = None 1017 1018 # Map angle to 0 to 360 range. TorsionPeaks values must be in this range... 1019 if TorsionAngle < 0: 1020 TorsionAngle = TorsionAngle + 360 1021 1022 # Identify the closest torsion peak index... 1023 if len(TorsionPeaks) == 1: 1024 NearestPeakIndex = 0 1025 else: 1026 NearestPeakIndex = min(range(len(TorsionPeaks)), key=lambda Index: abs(TorsionPeaks[Index] - TorsionAngle)) 1027 1028 # Calculate torsion angle violation from the nearest peak and its tolerance value... 1029 TorsionAngleDiff = TorsionLibraryUtil.CalculateTorsionAngleDifference(TorsionPeaks[NearestPeakIndex], TorsionAngle) 1030 TorsionAngleViolation = abs(abs(TorsionAngleDiff) - TorsionTolerances[NearestPeakIndex]) 1031 1032 return TorsionAngleViolation 1033 1034 def InitializeTorsionAlertsSummaryInfo(): 1035 """Initialize torsion alerts summary.""" 1036 1037 if OptionsInfo["CountMode"]: 1038 return None 1039 1040 if not OptionsInfo["TrackAlertsSummaryInfo"]: 1041 return None 1042 1043 TorsionAlertsSummaryInfo = {} 1044 TorsionAlertsSummaryInfo["RuleIDs"] = [] 1045 1046 for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "AlertTypes", "AlertTypesMolCount"]: 1047 TorsionAlertsSummaryInfo[DataLabel] = {} 1048 1049 return TorsionAlertsSummaryInfo 1050 1051 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo): 1052 """Track torsion alerts summary information for matched torsion rules in a 1053 molecule.""" 1054 1055 if OptionsInfo["CountMode"]: 1056 return 1057 1058 if not OptionsInfo["TrackAlertsSummaryInfo"]: 1059 return 1060 1061 if RotBondsAlertsInfo is None: 1062 return 1063 1064 MolAlertsInfo = {} 1065 MolAlertsInfo["RuleIDs"] = [] 1066 MolAlertsInfo["AlertTypes"] = {} 1067 1068 for ID in RotBondsAlertsInfo["IDs"]: 1069 if not RotBondsAlertsInfo["MatchStatus"][ID]: 1070 continue 1071 1072 if OptionsInfo["OutfileAlertsOnly"]: 1073 if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]: 1074 continue 1075 1076 AlertType = RotBondsAlertsInfo["AlertTypes"][ID] 1077 TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID] 1078 TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID] 1079 1080 # Track data for torsion alert summary information across molecules... 1081 if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]: 1082 TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID) 1083 TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID 1084 1085 TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS 1086 TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassNames"][ID] 1087 TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassNames"][ID] 1088 1089 TorsionAlertsSummaryInfo["TorsionRulePeaks"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRulePeaks"][ID] 1090 TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRuleTolerances1"][ID] 1091 TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRuleTolerances2"][ID] 1092 1093 # Initialize number of alert types across all molecules... 1094 TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID] = {} 1095 1096 # Initialize number of molecules flagged by each alert type... 1097 TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID] = {} 1098 1099 if AlertType not in TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID]: 1100 TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0 1101 TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] = 0 1102 1103 TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1 1104 1105 # Track data for torsion alert information in a molecule... 1106 if TorsionRuleNodeID not in MolAlertsInfo["AlertTypes"]: 1107 MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID) 1108 MolAlertsInfo["AlertTypes"][TorsionRuleNodeID] = {} 1109 1110 if AlertType not in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]: 1111 MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0 1112 MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1 1113 1114 # Track number of molecules flagged by a specific torsion alert... 1115 for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]: 1116 for AlertType in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]: 1117 if MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType]: 1118 TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] += 1 1119 1120 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo): 1121 """Write out torsion alerts summary informatio to a CSV file.""" 1122 1123 if OptionsInfo["CountMode"]: 1124 return 1125 1126 if not OptionsInfo["OutfileSummaryMode"]: 1127 return 1128 1129 if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0: 1130 return 1131 1132 # Write headers... 1133 QuoteValues = True 1134 Values = ["TorsionRule", "TorsionPeaks", "Tolerances1", "Tolerances2", "HierarchyClass", "HierarchySubClass", "TorsionAlertTypes", "TorsionAlertCount", "TorsionAlertMolCount"] 1135 Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues)) 1136 1137 SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo) 1138 1139 # Write alerts information... 1140 for ID in SortedRuleIDs: 1141 # Remove any double quotes in SMARTS... 1142 RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID] 1143 RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I) 1144 1145 HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID] 1146 HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID] 1147 1148 TorsionPeaks = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRulePeaks"][ID]], ",") 1149 TorsionRuleTolerances1 = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][ID]], ",") 1150 TorsionRuleTolerances2 = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][ID]], ",") 1151 1152 AlertTypes = [] 1153 AlertTypeCount = [] 1154 AlertTypeMolCount = [] 1155 for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]): 1156 AlertTypes.append(AlertType) 1157 AlertTypeCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypes"][ID][AlertType]) 1158 AlertTypeMolCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType]) 1159 1160 Values = [RuleSMARTS, TorsionPeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, HierarchyClassName, HierarchySubClassName, "%s" % MiscUtil.JoinWords(AlertTypes, ","), "%s" % (MiscUtil.JoinWords(AlertTypeCount, ",")), "%s" % (MiscUtil.JoinWords(AlertTypeMolCount, ","))] 1161 Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues)) 1162 1163 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo): 1164 """Sort torsion rule IDs by alert types molecule count in descending order.""" 1165 1166 SortedRuleIDs = [] 1167 1168 RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"] 1169 if len(RuleIDs) == 0: 1170 return SortedRuleIDs 1171 1172 # Setup a map from AlertTypesMolCount to IDs for sorting alerts... 1173 RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"] 1174 MolCountMap = {} 1175 for ID in RuleIDs: 1176 MolCount = 0 1177 for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]): 1178 MolCount += TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType] 1179 MolCountMap[ID] = MolCount 1180 1181 SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True) 1182 1183 return SortedRuleIDs 1184 1185 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo): 1186 """Write out torsion alerts SD files for individual torsion rules.""" 1187 1188 if OptionsInfo["CountMode"]: 1189 return 1190 1191 if not OptionsInfo["OutfilesFilteredByRulesMode"]: 1192 return 1193 1194 if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0: 1195 return 1196 1197 # Setup a molecule reader for filtered molecules... 1198 FilteredMols = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"]) 1199 1200 # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules... 1201 TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo) 1202 1203 # Setup writers... 1204 ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs) 1205 1206 for Mol in FilteredMols: 1207 # Retrieve torsion alerts info... 1208 TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo) 1209 if TorsionAlertsInfo is None: 1210 continue 1211 1212 for TorsionRuleID in TorsionRuleIDs: 1213 if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]: 1214 continue 1215 1216 WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo) 1217 1218 CloseByRuleOutfilesWriters(ByRuleOutfilesWriters) 1219 1220 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo): 1221 """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules.""" 1222 1223 # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to 1224 # the least number of unique molecules... 1225 RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo) 1226 1227 # Select torsion rule IDs for writing out SD files... 1228 if not OptionsInfo["OutfilesFilteredByRulesAllMode"]: 1229 MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"] 1230 if MaxRuleIDs < len(RuleIDs): 1231 RuleIDs = RuleIDs[0:MaxRuleIDs] 1232 1233 return RuleIDs 1234 1235 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo): 1236 """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds.""" 1237 1238 TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"] 1239 TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None 1240 1241 if TorsionAlerts is None or len(TorsionAlerts) == 0: 1242 return None 1243 1244 # Initialize for tracking by rule IDs... 1245 TorsionAlertsInfo = {} 1246 TorsionAlertsInfo["RuleIDs"] = [] 1247 1248 for DataLabel in ["RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "AlertTypes", "AtomIndices", "TorsionAtomIndices", "TorsionAngles", "TorsionAngleViolations", "AlertTypesCount"]: 1249 TorsionAlertsInfo[DataLabel] = {} 1250 1251 ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"] 1252 TorsionAlertsSetSize = 11 1253 1254 TorsionAlertsWords = TorsionAlerts.split() 1255 if len(TorsionAlertsWords) % TorsionAlertsSetSize: 1256 MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize)) 1257 1258 ID = 0 1259 for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize): 1260 ID += 1 1261 1262 RotBondIndices, TorsionAlertType, TorsionIndices, TorsionAngle, TorsionAngleViolation, HierarchyClass, HierarchySubClass, TorsionPeaks, Tolerances1, Tolerances2, TorsionRule = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize] 1263 RotBondIndices = RotBondIndices.split(ValuesDelimiter) 1264 TorsionIndices = TorsionIndices.split(ValuesDelimiter) 1265 TorsionPeaks = TorsionPeaks.split(ValuesDelimiter) 1266 Tolerances1 = Tolerances1.split(ValuesDelimiter) 1267 Tolerances2 = Tolerances2.split(ValuesDelimiter) 1268 1269 if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]: 1270 MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule) 1271 continue 1272 TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule] 1273 1274 # Track data for torsion alerts in a molecule... 1275 if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]: 1276 TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID) 1277 1278 TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule 1279 TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass 1280 TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass 1281 TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleNodeID] = TorsionPeaks 1282 TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = Tolerances1 1283 TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = Tolerances2 1284 1285 TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID] = [] 1286 TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = [] 1287 TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = [] 1288 TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID] = [] 1289 TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID] = [] 1290 1291 TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID] = {} 1292 1293 # Track multiple values for a rule ID... 1294 TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID].append(TorsionAlertType) 1295 TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices) 1296 TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices) 1297 TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID].append(TorsionAngle) 1298 TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID].append(TorsionAngleViolation) 1299 1300 # Count alert type for a rule ID... 1301 if TorsionAlertType not in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID]: 1302 TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] = 0 1303 TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] += 1 1304 1305 return TorsionAlertsInfo 1306 1307 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo): 1308 """Write out molecule.""" 1309 1310 if OptionsInfo["CountMode"]: 1311 return True 1312 1313 SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo) 1314 1315 try: 1316 Writer.write(Mol) 1317 except Exception as ErrMsg: 1318 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg)) 1319 return False 1320 1321 return True 1322 1323 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo): 1324 """Setup molecule properties containing alerts information for rotatable bonds.""" 1325 1326 if not OptionsInfo["OutfileAlerts"]: 1327 return 1328 1329 # Setup rotatable bonds count... 1330 RotBondsCount = 0 1331 if RotBondsAlertsInfo is not None: 1332 RotBondsCount = len(RotBondsAlertsInfo["IDs"]) 1333 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["RotBondsCountLabel"], "%s" % RotBondsCount) 1334 1335 # Setup alert counts for rotatable bonds... 1336 AlertsCount = [] 1337 if RotBondsAlertsInfo is not None: 1338 for AlertType in ["Green", "Orange", "Red"]: 1339 if AlertType in RotBondsAlertsInfo["Count"]: 1340 AlertsCount.append("%s" % RotBondsAlertsInfo["Count"][AlertType]) 1341 else: 1342 AlertsCount.append("0") 1343 1344 if len(AlertsCount): 1345 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsCountLabel"], "%s" % MiscUtil.JoinWords(AlertsCount, " ")) 1346 1347 # Setup alert information for rotatable bonds... 1348 AlertsInfoValues = [] 1349 1350 # Delimiter for multiple values corresponding to specific set of information for 1351 # a rotatable bond. For example: TorsionAtomIndices 1352 ValuesDelim = OptionsInfo["IntraSetValuesDelim"] 1353 1354 # Delimiter for various values for a rotatable bond... 1355 RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"] 1356 1357 # Delimiter for values corresponding to multiple rotatable bonds... 1358 AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"] 1359 1360 if RotBondsAlertsInfo is not None: 1361 for ID in RotBondsAlertsInfo["IDs"]: 1362 if not RotBondsAlertsInfo["MatchStatus"][ID]: 1363 continue 1364 1365 if OptionsInfo["OutfileAlertsOnly"]: 1366 if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]: 1367 continue 1368 1369 RotBondValues = [] 1370 1371 # Bond atom indices... 1372 Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]] 1373 RotBondValues.append(ValuesDelim.join(Values)) 1374 1375 # Alert type... 1376 RotBondValues.append(RotBondsAlertsInfo["AlertTypes"][ID]) 1377 1378 # Torsion atom indices... 1379 TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim) 1380 RotBondValues.append(TorsionAtomIndices) 1381 1382 # Torsion angle... 1383 RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngles"][ID]) 1384 1385 # Torsion angle violation... 1386 RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngleViolations"][ID]) 1387 1388 # Hierarchy class and subclass names... 1389 RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassNames"][ID]) 1390 RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassNames"][ID]) 1391 1392 # Torsion rule peaks... 1393 Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRulePeaks"][ID]] 1394 RotBondValues.append(ValuesDelim.join(Values)) 1395 1396 # Torsion rule tolerances... 1397 Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances1"][ID]] 1398 RotBondValues.append(ValuesDelim.join(Values)) 1399 Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances2"][ID]] 1400 RotBondValues.append(ValuesDelim.join(Values)) 1401 1402 # Torsion rule SMARTS... 1403 RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]) 1404 1405 # Track joined values for a rotatable bond... 1406 AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues)) 1407 1408 if len(AlertsInfoValues): 1409 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues))) 1410 1411 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo): 1412 """Write out molecule.""" 1413 1414 if OptionsInfo["CountMode"]: 1415 return 1416 1417 SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo) 1418 1419 Writer.write(Mol) 1420 1421 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo): 1422 """Setup molecule properties containing alerts information for torsion alerts 1423 fileted by Rule IDs.""" 1424 1425 # Delete torsion alerts information for rotatable bonds... 1426 if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]): 1427 Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]) 1428 1429 # Delimiter for values... 1430 IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"] 1431 InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"] 1432 1433 # Setup alert rule information... 1434 AlertRuleInfoValues = [] 1435 1436 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID]) 1437 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID]) 1438 1439 Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleID]] 1440 AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values)) 1441 1442 Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleID]] 1443 AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values)) 1444 Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleID]] 1445 AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values)) 1446 1447 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID]) 1448 1449 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues))) 1450 1451 # Setup alerts count for torsion rule... 1452 AlertsCount = [] 1453 for AlertType in ["Green", "Orange", "Red"]: 1454 if AlertType in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID]: 1455 AlertsCount.append("%s" % TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID][AlertType]) 1456 else: 1457 AlertsCount.append("0") 1458 1459 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsCountLabel"], "%s" % (InterSetValuesDelim.join(AlertsCount))) 1460 1461 # Setup torsion rule alerts... 1462 AlertsInfoValues = [] 1463 for Index in range(0, len(TorsionAlertsInfo["AlertTypes"][TorsionRuleID])): 1464 RotBondInfoValues = [] 1465 1466 # Bond atom indices... 1467 Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]] 1468 RotBondInfoValues.append(IntraSetValuesDelim.join(Values)) 1469 1470 # Alert type... 1471 RotBondInfoValues.append(TorsionAlertsInfo["AlertTypes"][TorsionRuleID][Index]) 1472 1473 # Torsion atom indices retrieved from the filtered SD file and stored as strings... 1474 Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]] 1475 RotBondInfoValues.append(IntraSetValuesDelim.join(Values)) 1476 1477 # Torsion angle... 1478 RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngles"][TorsionRuleID][Index]) 1479 1480 # Torsion angle violation... 1481 RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID][Index]) 1482 1483 # Track alerts informaiton... 1484 AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues)) 1485 1486 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"], "%s" % (InterSetValuesDelim.join(AlertsInfoValues))) 1487 1488 # Setup torsion rule alert max angle violation... 1489 TorsionAngleViolations = [float(Angle) for Angle in TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID]] 1490 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxAngleViolationLabel"], "%.2f" % (max(TorsionAngleViolations))) 1491 1492 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim): 1493 """Setup torsion atom indices value for output files.""" 1494 1495 # Check for any list values in the list of torsion atom indices used as placeholders 1496 # for positions of lone pairs in torsion rules containing N_lp... 1497 TorsionAtomsInfo = [] 1498 for Value in TorsionAtomIndicesList: 1499 if type(Value) is list: 1500 TorsionAtomsInfo.append("N_lp") 1501 else: 1502 TorsionAtomsInfo.append(Value) 1503 1504 Values = ["%s" % Value for Value in TorsionAtomsInfo] 1505 1506 return ValuesDelim.join(Values) 1507 1508 def SetupOutfilesWriters(): 1509 """Setup molecule and summary writers.""" 1510 1511 OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None} 1512 1513 # Writers for SD files... 1514 WriterRemaining, WriterFiltered = SetupMoleculeWriters() 1515 OutfilesWriters["WriterRemaining"] = WriterRemaining 1516 OutfilesWriters["WriterFiltered"] = WriterFiltered 1517 1518 # Writer for alert summary CSV file... 1519 WriterAlertSummary = SetupAlertSummaryWriter() 1520 OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary 1521 1522 return OutfilesWriters 1523 1524 def SetupMoleculeWriters(): 1525 """Setup molecule writers.""" 1526 1527 Writer = None 1528 WriterFiltered = None 1529 1530 if OptionsInfo["CountMode"]: 1531 return (Writer, WriterFiltered) 1532 1533 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 1534 if Writer is None: 1535 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 1536 MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"]) 1537 1538 if OptionsInfo["OutfileFilteredMode"]: 1539 WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"]) 1540 if WriterFiltered is None: 1541 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"]) 1542 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"]) 1543 1544 return (Writer, WriterFiltered) 1545 1546 def SetupAlertSummaryWriter(): 1547 """Setup a alert summary writer.""" 1548 1549 Writer = None 1550 1551 if OptionsInfo["CountMode"]: 1552 return Writer 1553 1554 if not OptionsInfo["OutfileSummaryMode"]: 1555 return Writer 1556 1557 Outfile = OptionsInfo["OutfileSummary"] 1558 Writer = open(Outfile, "w") 1559 if Writer is None: 1560 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 1561 1562 MiscUtil.PrintInfo("Generating file %s..." % Outfile) 1563 1564 return Writer 1565 1566 def CloseOutfilesWriters(OutfilesWriters): 1567 """Close outfile writers.""" 1568 1569 for WriterType, Writer in OutfilesWriters.items(): 1570 if Writer is not None: 1571 Writer.close() 1572 1573 def SetupByRuleOutfilesWriters(RuleIDs): 1574 """Setup by rule outfiles writers.""" 1575 1576 # Initialize... 1577 OutfilesWriters = {} 1578 for RuleID in RuleIDs: 1579 OutfilesWriters[RuleID] = None 1580 1581 if OptionsInfo["CountMode"]: 1582 return OutfilesWriters 1583 1584 if not OptionsInfo["OutfilesFilteredByRulesMode"]: 1585 return OutfilesWriters 1586 1587 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1588 OutfilesRoot = "%s_Filtered_TopRule" % FileName 1589 OutfilesExt = "sdf" 1590 1591 MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"] 1592 MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt)) 1593 1594 # Delete any existing output files... 1595 Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt)) 1596 if len(Outfiles): 1597 MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt)) 1598 for Outfile in Outfiles: 1599 try: 1600 os.remove(Outfile) 1601 except Exception as ErrMsg: 1602 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg) 1603 1604 RuleIndex = 0 1605 for RuleID in RuleIDs: 1606 RuleIndex += 1 1607 Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt) 1608 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 1609 if Writer is None: 1610 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 1611 1612 OutfilesWriters[RuleID] = Writer 1613 1614 return OutfilesWriters 1615 1616 def CloseByRuleOutfilesWriters(OutfilesWriters): 1617 """Close by rule outfile writers.""" 1618 1619 for RuleID, Writer in OutfilesWriters.items(): 1620 if Writer is not None: 1621 Writer.close() 1622 1623 def ProcessTorsionLibraryInfo(): 1624 """Process torsion library information.""" 1625 1626 RetrieveTorsionLibraryInfo() 1627 ListTorsionLibraryInfo() 1628 1629 def RetrieveTorsionLibraryInfo(Quiet = False): 1630 """Retrieve torsion library information.""" 1631 1632 TorsionLibraryFilePath = OptionsInfo["TorsionLibraryFile"] 1633 if TorsionLibraryFilePath is None: 1634 TorsionLibraryFile = "TorsionLibrary.xml" 1635 MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath() 1636 TorsionLibraryFilePath = os.path.join(MayaChemToolsDataDir, TorsionLibraryFile) 1637 if not Quiet: 1638 MiscUtil.PrintInfo("\nRetrieving data from default torsion library file %s..." % TorsionLibraryFile) 1639 else: 1640 TorsionLibraryFilePath = OptionsInfo["TorsionLibraryFile"] 1641 if not Quiet: 1642 MiscUtil.PrintInfo("\nRetrieving data from torsion library file %s..." % TorsionLibraryFilePath) 1643 1644 TorsionLibraryInfo["TorsionLibElementTree"] = TorsionLibraryUtil.RetrieveTorsionLibraryInfo(TorsionLibraryFilePath) 1645 1646 def ListTorsionLibraryInfo(): 1647 """List torsion library information.""" 1648 1649 TorsionLibraryUtil.ListTorsionLibraryInfo(TorsionLibraryInfo["TorsionLibElementTree"]) 1650 1651 def SetupTorsionLibraryInfo(Quiet = False): 1652 """Setup torsion library information for matching rotatable bonds.""" 1653 1654 if not Quiet: 1655 MiscUtil.PrintInfo("\nSetting up torsion library information for matching rotatable bonds...") 1656 1657 TorsionLibraryUtil.SetupTorsionLibraryInfoForMatchingRotatableBonds(TorsionLibraryInfo) 1658 1659 def ProcessRotatableBondsSMARTSMode(): 1660 """"Process SMARTS pattern for rotatable bonds.""" 1661 1662 RotBondsMode = OptionsInfo["RotBondsSMARTSMode"] 1663 RotBondsSMARTSPattern = None 1664 if re.match("NonStrict", RotBondsMode, re.I): 1665 RotBondsSMARTSPattern = "[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]" 1666 elif re.match("SemiStrict", RotBondsMode, re.I): 1667 RotBondsSMARTSPattern = "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]" 1668 elif re.match("Strict", RotBondsMode, re.I): 1669 RotBondsSMARTSPattern = "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]" 1670 elif re.match("Specify", RotBondsMode, re.I): 1671 RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPatternSpecified"] 1672 RotBondsSMARTSPattern = RotBondsSMARTSPattern.strip() 1673 if not len(RotBondsSMARTSPattern): 1674 MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode) 1675 else: 1676 MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode) 1677 1678 RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPattern) 1679 if RotBondsPatternMol is None: 1680 if re.match("Specify", RotBondsMode, re.I): 1681 MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPattern)) 1682 else: 1683 MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The default rotatable bonds SMARTS pattern, \"%s\", used for, \"%s\", value of \"-r, --rotBondsSMARTSMode\" option is not valid." % (RotBondsSMARTSPattern, RotBondsMode)) 1684 1685 OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern 1686 1687 def ProcessSDFieldLabelsOption(): 1688 """Process SD data field label option.""" 1689 1690 ParamsOptionName = "--outfileSDFieldLabels" 1691 ParamsOptionValue = Options["--outfileSDFieldLabels"] 1692 1693 ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TorsionAlertsCountLabel": "TorsionAlertsCount (Green Orange Red)", "TorsionAlertsLabel": "TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)", "TorsionRuleAlertsCountLabel": "TorsionRuleAlertsCount (Green Orange Red)", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)", "TorsionRuleMaxAngleViolationLabel": "TorsionRuleMaxAngleViolation"} 1694 1695 if re.match("^auto$", ParamsOptionValue, re.I): 1696 OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels 1697 return 1698 1699 # Setup a canonical paramater names... 1700 ValidParamNames = [] 1701 CanonicalParamNamesMap = {} 1702 for ParamName in sorted(ParamsIDsToLabels): 1703 ValidParamNames.append(ParamName) 1704 CanonicalParamNamesMap[ParamName.lower()] = ParamName 1705 1706 ParamsOptionValue = ParamsOptionValue.strip() 1707 if not ParamsOptionValue: 1708 PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName) 1709 1710 ParamsOptionValueWords = ParamsOptionValue.split(",") 1711 if len(ParamsOptionValueWords) % 2: 1712 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName)) 1713 1714 # Validate paramater name and value pairs... 1715 for Index in range(0, len(ParamsOptionValueWords), 2): 1716 Name = ParamsOptionValueWords[Index].strip() 1717 Value = ParamsOptionValueWords[Index + 1].strip() 1718 1719 CanonicalName = Name.lower() 1720 if not CanonicalName in CanonicalParamNamesMap: 1721 MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames))) 1722 1723 ParamName = CanonicalParamNamesMap[CanonicalName] 1724 ParamValue = Value 1725 1726 # Set value... 1727 ParamsIDsToLabels[ParamName] = ParamValue 1728 1729 OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels 1730 1731 def ProcessOptionNitrogenLonePairParameters(): 1732 """Process nitrogen lone pair parameters option.""" 1733 1734 ParamsOptionName = "--nitrogenLonePairParams" 1735 ParamsOptionValue = Options["--nitrogenLonePairParams"] 1736 1737 ParamsInfo = {"AllowHydrogenNbrs": True, "PlanarityTolerance": 1.0,} 1738 1739 if re.match("^auto$", ParamsOptionValue, re.I): 1740 OptionsInfo["NitrogenLonePairParams"] = ParamsInfo 1741 return 1742 1743 # Setup a canonical paramater names... 1744 ValidParamNames = [] 1745 CanonicalParamNamesMap = {} 1746 for ParamName in sorted(ParamsInfo): 1747 ValidParamNames.append(ParamName) 1748 CanonicalParamNamesMap[ParamName.lower()] = ParamName 1749 1750 ParamsOptionValue = ParamsOptionValue.strip() 1751 if not ParamsOptionValue: 1752 PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName) 1753 1754 ParamsOptionValueWords = ParamsOptionValue.split(",") 1755 if len(ParamsOptionValueWords) % 2: 1756 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName)) 1757 1758 # Validate paramater name and value pairs... 1759 for Index in range(0, len(ParamsOptionValueWords), 2): 1760 Name = ParamsOptionValueWords[Index].strip() 1761 Value = ParamsOptionValueWords[Index + 1].strip() 1762 1763 CanonicalName = Name.lower() 1764 if not CanonicalName in CanonicalParamNamesMap: 1765 MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames))) 1766 1767 ParamName = CanonicalParamNamesMap[CanonicalName] 1768 ParamValue = Value 1769 1770 if re.match("^PlanarityTolerance$", ParamName, re.I): 1771 Value = float(Value) 1772 if Value < 0: 1773 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: >= 0" % (Value, Name, ParamsOptionName)) 1774 ParamValue = Value 1775 elif re.match("^AllowHydrogenNbrs$", ParamName, re.I): 1776 if not re.match("^(yes|no)$", Value, re.I): 1777 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: yes or no" % (Value, Name, ParamsOptionName)) 1778 ParamValue = True if re.match("^yes$", Value, re.I) else False 1779 1780 # Set value... 1781 ParamsInfo[ParamName] = ParamValue 1782 1783 OptionsInfo["NitrogenLonePairParams"] = ParamsInfo 1784 1785 def ProcessOptions(): 1786 """Process and validate command line arguments and options.""" 1787 1788 MiscUtil.PrintInfo("Processing options...") 1789 1790 # Validate options... 1791 ValidateOptions() 1792 1793 OptionsInfo["Infile"] = Options["--infile"] 1794 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 1795 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 1796 1797 OptionsInfo["Outfile"] = Options["--outfile"] 1798 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) 1799 1800 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1801 OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt) 1802 OptionsInfo["OutfileFiltered"] = OutfileFiltered 1803 OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False 1804 1805 OutfileSummary = "%s_AlertsSummary.csv" % (FileName) 1806 OptionsInfo["OutfileSummary"] = OutfileSummary 1807 OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", Options["--outfileSummary"], re.I) else False 1808 1809 OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I) else False 1810 OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False 1811 1812 OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"] 1813 if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I): 1814 OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount) 1815 OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount 1816 OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False 1817 1818 OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False 1819 1820 if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I): 1821 if not re.match("^yes$", Options["--outfileAlerts"], re.I): 1822 MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"])) 1823 1824 OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"] 1825 OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False 1826 1827 ProcessSDFieldLabelsOption() 1828 1829 OptionsInfo["Overwrite"] = Options["--overwrite"] 1830 OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False 1831 1832 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 1833 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 1834 1835 ProcessOptionNitrogenLonePairParameters() 1836 1837 OptionsInfo["AlertsMode"] = Options["--alertsMode"] 1838 OptionsInfo["SpecifiedAlertsModeList"] = [] 1839 if re.match("^Red$", Options["--alertsMode"], re.I): 1840 OptionsInfo["SpecifiedAlertsModeList"].append("Red") 1841 elif re.match("^RedAndOrange$", Options["--alertsMode"], re.I): 1842 OptionsInfo["SpecifiedAlertsModeList"].append("Red") 1843 OptionsInfo["SpecifiedAlertsModeList"].append("Orange") 1844 1845 OptionsInfo["MinAlertsCount"] = int(Options["--alertsMinCount"]) 1846 1847 OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"] 1848 OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"] 1849 ProcessRotatableBondsSMARTSMode() 1850 1851 OptionsInfo["TorsionLibraryFile"] = None 1852 if not re.match("^auto$", Options["--torsionLibraryFile"], re.I): 1853 OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"] 1854 1855 # Setup delimiter for writing out torsion alert information to output files... 1856 OptionsInfo["IntraSetValuesDelim"] = "," 1857 OptionsInfo["InterSetValuesDelim"] = " " 1858 1859 def RetrieveOptions(): 1860 """Retrieve command line arguments and options.""" 1861 1862 # Get options... 1863 global Options 1864 Options = docopt(_docoptUsage_) 1865 1866 # Set current working directory to the specified directory... 1867 WorkingDir = Options["--workingdir"] 1868 if WorkingDir: 1869 os.chdir(WorkingDir) 1870 1871 # Handle examples option... 1872 if "--examples" in Options and Options["--examples"]: 1873 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1874 sys.exit(0) 1875 1876 def ProcessListTorsionLibraryOption(): 1877 """Process list torsion library information.""" 1878 1879 # Validate and process dataFile option for listing torsion library information... 1880 OptionsInfo["TorsionLibraryFile"] = None 1881 if not re.match("^auto$", Options["--torsionLibraryFile"], re.I): 1882 MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"]) 1883 OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"] 1884 1885 RetrieveTorsionLibraryInfo() 1886 ListTorsionLibraryInfo() 1887 1888 def ValidateOptions(): 1889 """Validate option values.""" 1890 1891 MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "Red RedAndOrange") 1892 MiscUtil.ValidateOptionIntegerValue("--alertsMinCount", Options["--alertsMinCount"], {">=": 1}) 1893 1894 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1895 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol") 1896 1897 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 1898 if re.match("^filter$", Options["--mode"], re.I): 1899 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1900 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1901 1902 MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no") 1903 1904 MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no") 1905 if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I): 1906 MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0}) 1907 1908 MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no") 1909 MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no") 1910 MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly") 1911 1912 MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count") 1913 if re.match("^filter$", Options["--mode"], re.I): 1914 if not Options["--outfile"]: 1915 MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option") 1916 1917 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 1918 1919 MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify") 1920 if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I): 1921 if not Options["--rotBondsSMARTSPattern"]: 1922 MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option") 1923 1924 if not re.match("^auto$", Options["--torsionLibraryFile"], re.I): 1925 MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"]) 1926 1927 # Setup a usage string for docopt... 1928 _docoptUsage_ = """ 1929 RDKitFilterTorsionLibraryAlerts.py - Filter torsion library alerts 1930 1931 Usage: 1932 RDKitFilterTorsionLibraryAlerts.py [--alertsMode <Red, RedAndOrange>] [--alertsMinCount <Number>] 1933 [--infileParams <Name,Value,...>] [--mode <filter or count>] [--mp <yes or no>] [--mpParams <Name,Value,...>] 1934 [--nitrogenLonePairParams <Name,Value,...>] [--outfileAlerts <yes or no>] 1935 [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>] 1936 [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>] 1937 [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>] 1938 [--outfileParams <Name,Value,...>] [--overwrite] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>] 1939 [--rotBondsSMARTSPattern <SMARTS>] [--torsionLibraryFile <FileName or auto>] [-w <dir>] -i <infile> -o <outfile> 1940 RDKitFilterTorsionLibraryAlerts.py [--torsionLibraryFile <FileName or auto>] -l | --list 1941 RDKitFilterTorsionLibraryAlerts.py -h | --help | -e | --examples 1942 1943 Description: 1944 Filter strained molecules from an input file for torsion library [ Ref 146, 152, 159 ] 1945 alerts by matching rotatable bonds against SMARTS patterns specified for torsion 1946 rules in a torsion library file and write out appropriate molecules to output 1947 files. The molecules must have 3D coordinates in input file. The default torsion 1948 library file, TorsionLibrary.xml, is available under MAYACHEMTOOLS/lib/data 1949 directory. 1950 1951 The data in torsion library file is organized in a hierarchical manner. It consists 1952 of one generic class and six specific classes at the highest level. Each class 1953 contains multiple subclasses corresponding to named functional groups or 1954 substructure patterns. The subclasses consist of torsion rules sorted from 1955 specific to generic torsion patterns. The torsion rule, in turn, contains a list 1956 of peak values for torsion angles and two tolerance values. A pair of tolerance 1957 values define torsion bins around a torsion peak value. For example: 1958 1959 <library> 1960 <hierarchyClass name="GG" id1="G" id2="G"> 1961 ... 1962 </hierarchyClass> 1963 <hierarchyClass name="CO" id1="C" id2="O"> 1964 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]"> 1965 <torsionRule smarts="[O:1]=[C:2]!@[O:3]~[CH0:4]"> 1966 <angleList> 1967 <angle value="0.0" tolerance1="20.00" 1968 tolerance2="25.00" score="56.52"/> 1969 </angleList> 1970 </torsionRule> 1971 ... 1972 ... 1973 ... 1974 </hierarchyClass> 1975 <hierarchyClass name="NC" id1="N" id2="C"> 1976 ... 1977 </hierarchyClass> 1978 <hierarchyClass name="SN" id1="S" id2="N"> 1979 ... 1980 </hierarchyClass> 1981 <hierarchyClass name="CS" id1="C" id2="S"> 1982 ... 1983 </hierarchyClass> 1984 <hierarchyClass name="CC" id1="C" id2="C"> 1985 ... 1986 </hierarchyClass> 1987 <hierarchyClass name="SS" id1="S" id2="S"> 1988 ... 1989 </hierarchyClass> 1990 </library> 1991 1992 The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern. 1993 A custom SMARTS pattern may be optionally specified to detect rotatable bonds. 1994 Each rotatable bond is matched to a torsion rule in the torsion library and 1995 assigned one of the following three alert categories: Green, Orange or Red. The 1996 rotatable bond is marked Green or Orange for the measured angle of the torsion 1997 pattern within the first or second tolerance bins around a torsion peak. 1998 Otherwise, it's marked Red implying that the measured angle is not observed in 1999 the structure databases employed to generate the torsion library. 2000 2001 The following output files are generated after the filtering: 2002 2003 <OutfileRoot>.sdf 2004 <OutfileRoot>_Filtered.sdf 2005 <OutfileRoot>_AlertsSummary.csv 2006 <OutfileRoot>_Filtered_TopRule*.sdf 2007 2008 The supported input file formats are: Mol (.mol), SD (.sdf, .sd) 2009 2010 The supported output file formats are: SD (.sdf, .sd) 2011 2012 Options: 2013 -a, --alertsMode <Red, RedAndOrange> [default: Red] 2014 Torsion library alert types to use for filtering molecules containing 2015 rotatable bonds marked with Green, Orange, or Red alerts. Possible 2016 values: Red or RedAndOrange. 2017 --alertsMinCount <Number> [default: 1] 2018 Minimum number of rotatable bond alerts in a molecule for filtering the 2019 molecule. 2020 -e, --examples 2021 Print examples. 2022 -h, --help 2023 Print this help message. 2024 -i, --infile <infile> 2025 Input file name. 2026 --infileParams <Name,Value,...> [default: auto] 2027 A comma delimited list of parameter name and value pairs for reading 2028 molecules from files. The supported parameter names for different file 2029 formats, along with their default values, are shown below: 2030 2031 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 2032 2033 -l, --list 2034 List torsion library information without performing any filtering. 2035 -m, --mode <filter or count> [default: filter] 2036 Specify whether to filter molecules for torsion library [ Ref 146, 152, 159 ] alerts 2037 by matching rotatable bonds against SMARTS patterns specified for torsion 2038 rules and write out the rest of the molecules to an outfile or simply count 2039 the number of matched molecules marked for filtering. 2040 --mp <yes or no> [default: no] 2041 Use multiprocessing. 2042 2043 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 2044 function employing lazy RDKit data iterable. This allows processing of 2045 arbitrary large data sets without any additional requirements memory. 2046 2047 All input data may be optionally loaded into memory by mp.Pool.map() 2048 before starting worker processes in a process pool by setting the value 2049 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 2050 2051 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 2052 data mode may adversely impact the performance. The '--mpParams' section 2053 provides additional information to tune the value of 'chunkSize'. 2054 --mpParams <Name,Value,...> [default: auto] 2055 A comma delimited list of parameter name and value pairs to configure 2056 multiprocessing. 2057 2058 The supported parameter names along with their default and possible 2059 values are shown below: 2060 2061 chunkSize, auto 2062 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 2063 numProcesses, auto [ Default: mp.cpu_count() ] 2064 2065 These parameters are used by the following functions to configure and 2066 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 2067 mp.Pool.imap(). 2068 2069 The chunkSize determines chunks of input data passed to each worker 2070 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 2071 The default value of chunkSize is dependent on the value of 'inputDataMode'. 2072 2073 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 2074 automatically converts RDKit data iterable into a list, loads all data into 2075 memory, and calculates the default chunkSize using the following method 2076 as shown in its code: 2077 2078 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 2079 if extra: chunkSize += 1 2080 2081 For example, the default chunkSize will be 7 for a pool of 4 worker processes 2082 and 100 data items. 2083 2084 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 2085 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 2086 data into memory. Consequently, the size of input data is not known a priori. 2087 It's not possible to estimate an optimal value for the chunkSize. The default 2088 chunkSize is set to 1. 2089 2090 The default value for the chunkSize during 'Lazy' data mode may adversely 2091 impact the performance due to the overhead associated with exchanging 2092 small chunks of data. It is generally a good idea to explicitly set chunkSize to 2093 a larger value during 'Lazy' input data mode, based on the size of your input 2094 data and number of processes in the process pool. 2095 2096 The mp.Pool.map() function waits for all worker processes to process all 2097 the data and return the results. The mp.Pool.imap() function, however, 2098 returns the the results obtained from worker processes as soon as the 2099 results become available for specified chunks of data. 2100 2101 The order of data in the results returned by both mp.Pool.map() and 2102 mp.Pool.imap() functions always corresponds to the input data. 2103 -n, --nitrogenLonePairParams <Name,Value,...> [default: auto] 2104 A comma delimited list of parameter name and value pairs to match 2105 torsion SMARTS patterns containing non-standard construct 'N_lp' 2106 corresponding to nitrogen lone pair. 2107 2108 The supported parameter names along with their default and possible 2109 values are shown below: 2110 2111 allowHydrogenNbrs, yes [ Possible values: yes or no ] 2112 planarityTolerance, 1 [Possible values: >=0] 2113 2114 These parameters are used during the matching of torsion rules containing 2115 'N_lp' in their SMARTS patterns. The 'allowHydrogensNbrs' allows the use 2116 hydrogen neighbors attached to nitrogen during the determination of its 2117 planarity. The 'planarityTolerance' in degrees represents the tolerance 2118 allowed for nitrogen to be considered coplanar with its three neighbors. 2119 2120 The torsion rules containing 'N_lp' in their SMARTS patterns are categorized 2121 into the following two types of rules: 2122 2123 TypeOne: 2124 2125 [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4] 2126 [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4] 2127 ... ... ... 2128 2129 TypeTwo: 2130 2131 [!#1:1][CX4:2]!@[NX3;"N_lp":3] 2132 [C:1][$(S(=O)=O):2]!@["N_lp":3] 2133 ... ... ... 2134 2135 The torsions are matched to torsion rules containing 'N_lp' using specified 2136 SMARTS patterns without the 'N_lp' along with additional constraints using 2137 the following methodology: 2138 2139 TypeOne: 2140 2141 . SMARTS pattern must contain four mapped atoms and the third 2142 mapped atom must be a nitrogen matched with 'NX3:3' 2143 . Nitrogen atom must have 3 neighbors. The 'allowHydrogens' 2144 parameter controls inclusion of hydrogens as its neighbors. 2145 . Nitrogen atom and its 3 neighbors must be coplanar. 2146 'planarityTolerance' parameter provides tolerance in degrees 2147 for nitrogen to be considered coplanar with its 3 neighbors. 2148 2149 TypeTwo: 2150 2151 . SMARTS pattern must contain three mapped atoms and the third 2152 mapped atom must be a nitrogen matched with 'NX3:3'. The 2153 third mapped atom may contain only 'N_lp:3' The missing 'NX3' 2154 is automatically detected. 2155 . Nitrogen atom must have 3 neighbors. 'allowHydrogens' 2156 parameter controls inclusion of hydrogens as neighbors. 2157 . Nitrogen atom and its 3 neighbors must not be coplanar. 2158 'planarityTolerance' parameter provides tolerance in degrees 2159 for nitrogen to be considered coplanar with its 3 neighbors. 2160 . Nitrogen lone pair position equivalent to VSEPR theory is 2161 determined based on the position of nitrogen and its neighbors. 2162 A vector normal to 3 nitrogen neighbors is calculated and added 2163 to the coordinates of nitrogen atom to determine the approximate 2164 position of the lone pair. It is used as the fourth position to 2165 calculate the torsion angle. 2166 2167 -o, --outfile <outfile> 2168 Output file name. 2169 --outfileAlerts <yes or no> [default: yes] 2170 Write out alerts information to SD output files. 2171 --outfileAlertsMode <All or AlertsOnly> [default: AlertsOnly] 2172 Write alerts information to SD output files for all alerts or only for alerts 2173 specified by '--AlertsMode' option. Possible values: All or AlertsOnly 2174 This option is only valid for 'Yes' value of '--outfileAlerts' option. 2175 2176 The following alerts information is added to SD output files using 2177 'TorsionAlerts' data field: 2178 2179 RotBondIndices TorsionAlert TorsionIndices TorsionAngle 2180 TorsionAngleViolation HierarchyClass HierarchySubClass 2181 TorsionRule TorsionPeaks Tolerances1 Tolerances2 2182 2183 The 'RotBondsCount' and 'TorsionAlertsCount' data fields are always added 2184 to SD output files containing both remaining and filtered molecules. 2185 2186 Format: 2187 2188 > <RotBondsCount> 2189 Number 2190 2191 > <TorsionAlertsCount (Green Orange Red)> 2192 Number Number Number 2193 2194 > <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices 2195 TorsionAngle TorsionAngleViolation HierarchyClass 2196 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 2197 TorsionRule)> 2198 AtomIndex2,AtomIndex3 AlertType AtomIndex1,AtomIndex2,AtomIndex3, 2199 AtomIndex4 Angle AngleViolation ClassName SubClassName 2200 CommaDelimPeakValues CommaDelimTol1Values CommDelimTol2Values 2201 SMARTS ... ... ... 2202 ... ... ... 2203 2204 A set of 11 values is written out as value of 'TorsionAlerts' data field for 2205 each torsion in a molecule. The space character is used as a delimiter 2206 to separate values with in a set and across set. The comma character 2207 is used to delimit multiple values for each value in a set. 2208 2209 The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited 2210 values representing atom indices for a rotatable bond and matched torsion. 2211 The 'TorsionPeaks', 'Tolerances1', and 'Tolerances2' contain same number 2212 of comma delimited values corresponding to torsion angle peaks and 2213 tolerance intervals specified in torsion library. For example: 2214 2215 ... ... ... 2216 > <RotBondsCount> (1) 2217 7 2218 2219 > <TorsionAlertsCount (Green Orange Red)> (1) 2220 3 2 2 2221 2222 > <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices 2223 TorsionAngle TorsionAngleViolation HierarchyClass 2224 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 2225 TorsionRule)> 2226 1,2 Red 32,2,1,0 0.13 149.87 NC Anilines 180.0 10.0 30.0 [cH0:1][c:2] 2227 ([cH,nX2H0])!@[NX3H1:3][CX4:4] 8,9 Red 10,9,8,28 -0.85 GG 2228 None -90.0,90.0 30.0,30.0 60.0,60.0 [cH1:1][a:2]([cH1])!@[a:3] 2229 ([cH0])[cH0:4] 2230 ... ... ... 2231 2232 --outfileFiltered <yes or no> [default: yes] 2233 Write out a file containing filtered molecules. Its name is automatically 2234 generated from the specified output file. Default: <OutfileRoot>_ 2235 Filtered.<OutfileExt>. 2236 --outfilesFilteredByRules <yes or no> [default: yes] 2237 Write out SD files containing filtered molecules for individual torsion 2238 rules triggering alerts in molecules. The name of SD files are automatically 2239 generated from the specified output file. Default file names: <OutfileRoot>_ 2240 Filtered_TopRule*.sdf 2241 2242 The following alerts information is added to SD output files: 2243 2244 > <RotBondsCount> 2245 Number 2246 2247 > <TorsionAlertsCount (Green Orange Red)> 2248 Number Number Number 2249 2250 > <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks 2251 Tolerances1 Tolerances2 TorsionRule)> 2252 ClassName SubClassName CommaDelimPeakValues CommaDelimTol1Values 2253 CommDelimTol2Values SMARTS ... ... ... 2254 ... ... ... 2255 2256 > <TorsionRuleAlertsCount (Green Orange Red)> 2257 Number Number Number 2258 2259 > <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices 2260 TorsionAngle TorsionAngleViolation)> 2261 AtomIndex2,AtomIndex3 AlertType AtomIndex1,AtomIndex2,AtomIndex3, 2262 AtomIndex4 Angle AngleViolation ... ... ... 2263 2264 > <TorsionRuleMaxAngleViolation> 2265 Number 2266 ... ... ... 2267 2268 For example: 2269 2270 ... ... ... 2271 > <RotBondsCount> (1) 2272 7 2273 2274 > <TorsionAlertsCount (Green Orange Red)> (1) 2275 3 2 2 2276 2277 > <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks 2278 Tolerances1 Tolerances2 TorsionRule)> (1) 2279 NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]([cH,nX2H0])!@[NX3H1:3][CX4:4] 2280 2281 > <TorsionRuleAlertsCount (Green Orange Red)> (1) 2282 0 0 1 2283 2284 > <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices 2285 TorsionAngle TorsionAngleViolation)> (1) 2286 1,2 Red 32,2,1,0 0.13 149.87 2287 2288 > <TorsionRuleMaxAngleViolation> (1) 2289 149.87 2290 ... ... ... 2291 2292 --outfilesFilteredByRulesMaxCount <All or number> [default: 10] 2293 Write out SD files containing filtered molecules for specified number of 2294 top N torsion rules triggering alerts for the largest number of molecules 2295 or for all torsion rules triggering alerts across all molecules. 2296 --outfileSummary <yes or no> [default: yes] 2297 Write out a CVS text file containing summary of torsions rules responsible 2298 for triggering torsion alerts. Its name is automatically generated from the 2299 specified output file. Default: <OutfileRoot>_AlertsSummary.csv. 2300 2301 The following alerts information is written to summary text file: 2302 2303 TorsionRule, TorsionPeaks, Tolerances1, Tolerances2, 2304 HierarchyClass, HierarchySubClass, TorsionAlertType, 2305 TorsionAlertCount, TorsionAlertMolCount 2306 2307 The double quotes characters are removed from SMART patterns before 2308 before writing them to a CSV file. In addition, the torsion rules are sorted by 2309 TorsionAlertMolCount. For example: 2310 2311 "TorsionRule","TorsionPeaks","Tolerances1","Tolerances2", 2312 "HierarchyClass","HierarchySubClass","TorsionAlertTypes", 2313 "TorsionAlertCount","TorsionAlertMolCount" 2314 "[!#1:1][CX4H2:2]!@[CX4H2:3][!#1:4]","-60.0,60.0,180.0", 2315 "20.0,20.0,20.0","30.0,30.0,30.0","CC","None/[CX4:2][CX4:3]", 2316 "Red","16","11" 2317 ... ... ... 2318 2319 --outfileSDFieldLabels <Type,Label,...> [default: auto] 2320 A comma delimited list of SD data field type and label value pairs for writing 2321 torsion alerts information along with molecules to SD files. 2322 2323 The supported SD data field label type along with their default values are 2324 shown below: 2325 2326 For all SD files: 2327 2328 RotBondsCountLabel, RotBondsCount 2329 TorsionAlertsCountLabel, TorsionAlertsCount (Green Orange Red) 2330 TorsionAlertsLabel, TorsionAlerts (RotBondIndices TorsionAlert 2331 TorsionIndices TorsionAngle TorsionAngleViolation 2332 HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 2333 Tolerances2 TorsionRule) 2334 2335 For individual SD files filtered by torsion rules: 2336 2337 TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass 2338 TorsionPeaks Tolerances1 Tolerances2 TorsionRule) 2339 TorsionRuleAlertsCountLabel, TorsionRuleAlertsCount (Green Orange 2340 Red) 2341 TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices 2342 TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation) 2343 TorsionRuleMaxAngleViolationLabel, TorsionRuleMaxAngleViolation 2344 2345 --outfileParams <Name,Value,...> [default: auto] 2346 A comma delimited list of parameter name and value pairs for writing 2347 molecules to files. The supported parameter names for different file 2348 formats, along with their default values, are shown below: 2349 2350 SD: kekulize,yes,forceV3000,no 2351 2352 --overwrite 2353 Overwrite existing files. 2354 -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...> [default: SemiStrict] 2355 SMARTS pattern to use for identifying rotatable bonds in a molecule 2356 for matching against torsion rules in the torsion library. Possible values: 2357 NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches 2358 are filtered to ensure that each atom in the rotatable bond is attached to 2359 at least two heavy atoms. 2360 2361 The following SMARTS patterns are used to identify rotatable bonds for 2362 different modes: 2363 2364 NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1] 2365 2366 SemiStrict: 2367 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br) 2368 &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F) 2369 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])] 2370 2371 Strict: 2372 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br) 2373 &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1]) 2374 &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1]) 2375 &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F) 2376 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])] 2377 2378 The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 2379 'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS 2380 specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 2381 derived from 'Strict' SMARTS patterns for its usage in this script. 2382 2383 You may use any arbitrary SMARTS pattern to identify rotatable bonds by 2384 choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its 2385 value via '--rotBondsSMARTSPattern' option. 2386 --rotBondsSMARTSPattern <SMARTS> 2387 SMARTS pattern for identifying rotatable bonds. This option is only valid 2388 for 'Specify' value of '-r, --rotBondsSMARTSMode' option. 2389 -t, --torsionLibraryFile <FileName or auto> [default: auto] 2390 Specify a XML file name containing data for torsion library hierarchy or use 2391 default file, TorsionLibrary.xml, available in MAYACHEMTOOLS/lib/data 2392 directory. 2393 2394 The format of data in local XML file must match format of the data in Torsion 2395 Library [ Ref 146, 152, 159 ] file available in MAYACHEMTOOLS data directory. 2396 -w, --workingdir <dir> 2397 Location of working directory which defaults to the current directory. 2398 2399 Examples: 2400 To filter molecules containing any rotatable bonds marked with Red alerts 2401 based on torsion rules in the torsion library and write out SD files containing 2402 remaining and filtered molecules, and individual SD files for torsion rules 2403 triggering alerts along with appropriate torsion information for red alerts, 2404 type: 2405 2406 % RDKitFilterTorsionLibraryAlerts.py -i Sample3D.sdf -o Sample3DOut.sdf 2407 2408 To run the first example for only counting number of alerts without writing 2409 out any SD files, type: 2410 2411 % RDKitFilterTorsionLibraryAlerts.py -m count -i Sample3D.sdf -o 2412 Sample3DOut.sdf 2413 2414 To run the first example for filtertering molecules marked with Orange or 2415 Red alerts and write out SD files, tye: 2416 2417 % RDKitFilterTorsionLibraryAlerts.py -m Filter --alertsMode RedAndOrange 2418 -i Sample3D.sdf -o Sample3DOut.sdf 2419 2420 To run the first example for filtering molecules and writing out torsion 2421 information for all alert types to SD files, type: 2422 2423 % RDKitFilterTorsionLibraryAlerts.py --outfileAlertsMode All 2424 -i Sample3D.sdf -o Sample3DOut.sdf 2425 2426 To run the first example for filtering molecules in multiprocessing mode on 2427 all available CPUs without loading all data into memory and write out SD files, 2428 type: 2429 2430 % RDKitFilterTorsionLibraryAlerts.py --mp yes -i Sample3D.sdf 2431 -o Sample3DOut.sdf 2432 2433 To run the first example for filtering molecules in multiprocessing mode on 2434 all available CPUs by loading all data into memory and write out a SD files, 2435 type: 2436 2437 % RDKitFilterTorsionLibraryAlerts.py --mp yes --mpParams 2438 "inputDataMode, InMemory" -i Sample3D.sdf -o Sample3DOut.sdf 2439 2440 To run the first example for filtering molecules in multiprocessing mode on 2441 specific number of CPUs and chunksize without loading all data into memory 2442 and write out SD files, type: 2443 2444 % RDKitFilterTorsionLibraryAlerts.py --mp yes --mpParams 2445 "inputDataMode,lazy,numProcesses,4,chunkSize,8" -i Sample3D.sdf 2446 -o Sample3DOut.sdf 2447 2448 To list information about default torsion library file without performing any 2449 filtering, type: 2450 2451 % RDKitFilterTorsionLibraryAlerts.py -l 2452 2453 To list information about a local torsion library XML file without performing 2454 any, filtering, type: 2455 2456 % RDKitFilterTorsionLibraryAlerts.py --torsionLibraryFile 2457 TorsionLibrary.xml -l 2458 2459 Author: 2460 Manish Sud (msud@san.rr.com) 2461 2462 Collaborator: 2463 Pat Walters 2464 2465 Acknowledgments: 2466 Wolfgang Guba, Patrick Penner, Levi Pierce 2467 2468 See also: 2469 RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionStrainEnergyAlerts.py, 2470 RDKitConvertFileFormat.py, RDKitSearchSMARTS.py 2471 2472 Copyright: 2473 Copyright (C) 2023 Manish Sud. All rights reserved. 2474 2475 This script uses the Torsion Library jointly developed by the University 2476 of Hamburg, Center for Bioinformatics, Hamburg, Germany and 2477 F. Hoffmann-La-Roche Ltd., Basel, Switzerland. 2478 2479 The functionality available in this script is implemented using RDKit, an 2480 open source toolkit for cheminformatics developed by Greg Landrum. 2481 2482 This file is part of MayaChemTools. 2483 2484 MayaChemTools is free software; you can redistribute it and/or modify it under 2485 the terms of the GNU Lesser General Public License as published by the Free 2486 Software Foundation; either version 3 of the License, or (at your option) any 2487 later version. 2488 2489 """ 2490 2491 if __name__ == "__main__": 2492 main()