MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitFilterTorsionLibraryAlerts.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Collaborator: Pat Walters
    7 #
    8 # Acknowledgments: Wolfgang Guba, Patrick Penner, Levi Pierce
    9 #
   10 # Copyright (C) 2023 Manish Sud. All rights reserved.
   11 #
   12 # This script uses the Torsion Library jointly developed by the University
   13 # of Hamburg, Center for Bioinformatics, Hamburg, Germany and
   14 # F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
   15 #
   16 # The functionality available in this script is implemented using RDKit, an
   17 # open source toolkit for cheminformatics developed by Greg Landrum.
   18 #
   19 # This file is part of MayaChemTools.
   20 #
   21 # MayaChemTools is free software; you can redistribute it and/or modify it under
   22 # the terms of the GNU Lesser General Public License as published by the Free
   23 # Software Foundation; either version 3 of the License, or (at your option) any
   24 # later version.
   25 #
   26 # MayaChemTools is distributed in the hope that it will be useful, but without
   27 # any warranty; without even the implied warranty of merchantability of fitness
   28 # for a particular purpose.  See the GNU Lesser General Public License for more
   29 # details.
   30 #
   31 # You should have received a copy of the GNU Lesser General Public License
   32 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   33 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   34 # Boston, MA, 02111-1307, USA.
   35 #
   36 
   37 from __future__ import print_function
   38 
   39 # Add local python path to the global path and import standard library modules...
   40 import os
   41 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   42 import time
   43 import re
   44 import glob
   45 import multiprocessing as mp
   46 import math
   47 import numpy as np
   48 
   49 # RDKit imports...
   50 try:
   51     from rdkit import rdBase
   52     from rdkit import Chem
   53     from rdkit.Chem import rdMolTransforms
   54 except ImportError as ErrMsg:
   55     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   56     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   57     sys.exit(1)
   58 
   59 # MayaChemTools imports...
   60 try:
   61     from docopt import docopt
   62     import MiscUtil
   63     import RDKitUtil
   64     import TorsionLibraryUtil
   65 except ImportError as ErrMsg:
   66     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   67     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   68     sys.exit(1)
   69 
   70 ScriptName = os.path.basename(sys.argv[0])
   71 Options = {}
   72 OptionsInfo = {}
   73 
   74 TorsionLibraryInfo = {}
   75 
   76 def main():
   77     """Start execution of the script."""
   78     
   79     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   80     
   81     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   82     
   83     # Retrieve command line arguments and options...
   84     RetrieveOptions()
   85     
   86     if  Options["--list"]:
   87         # Handle listing of torsion library information...
   88         ProcessListTorsionLibraryOption()
   89     else:
   90         # Process and validate command line arguments and options...
   91         ProcessOptions()
   92         
   93         # Perform actions required by the script...
   94         PerformFiltering()
   95     
   96     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   97     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   98 
   99 def PerformFiltering():
  100     """Filter molecules using SMARTS torsion rules in the torsion library file."""
  101 
  102     # Process torsion library info...
  103     ProcessTorsionLibraryInfo()
  104     
  105     # Set up a pattern molecule for rotatable bonds...
  106     RotBondsPatternMol = Chem.MolFromSmarts(OptionsInfo["RotBondsSMARTSPattern"])
  107     
  108     # Setup a molecule reader...
  109     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  110     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  111     
  112     MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols, RotBondsPatternMol)
  113 
  114     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  115     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  116     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  117     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount))
  118 
  119     MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
  120     MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
  121 
  122 def ProcessMolecules(Mols, RotBondsPatternMol):
  123     """Process and filter molecules."""
  124     
  125     if OptionsInfo["MPMode"]:
  126         return ProcessMoleculesUsingMultipleProcesses(Mols, RotBondsPatternMol)
  127     else:
  128         return ProcessMoleculesUsingSingleProcess(Mols, RotBondsPatternMol)
  129 
  130 def ProcessMoleculesUsingSingleProcess(Mols, RotBondsPatternMol):
  131     """Process and filter molecules using a single process."""
  132     
  133     SetupTorsionLibraryInfo()
  134     
  135     MiscUtil.PrintInfo("\nFiltering molecules...")
  136     
  137     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  138 
  139     # Set up writers...
  140     OutfilesWriters = SetupOutfilesWriters()
  141     
  142     WriterRemaining = OutfilesWriters["WriterRemaining"]
  143     WriterFiltered = OutfilesWriters["WriterFiltered"]
  144     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  145     
  146     # Initialize alerts summary info...
  147     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  148 
  149     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  150     for Mol in Mols:
  151         MolCount += 1
  152         
  153         if Mol is None:
  154             continue
  155         
  156         if RDKitUtil.IsMolEmpty(Mol):
  157             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount))
  158             continue
  159 
  160         # Check for 3D flag...
  161         if not Mol.GetConformer().Is3D():
  162             MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount))
  163             continue
  164         
  165         ValidMolCount += 1
  166         
  167         # Identify torsion library alerts for rotatable bonds..
  168         RotBondsAlertsStatus, RotBondsAlertsInfo = IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, RotBondsPatternMol)
  169 
  170         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  171         
  172         # Write out filtered and remaining molecules...
  173         WriteStatus = True
  174         if RotBondsAlertsStatus:
  175             if OutfileFilteredMode:
  176                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  177                 if WriteStatus:
  178                     FilteredMolWriteCount += 1
  179         else:
  180             RemainingMolCount += 1
  181             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  182         
  183         if not WriteStatus:
  184             WriteFailedCount += 1
  185 
  186     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  187     CloseOutfilesWriters(OutfilesWriters)
  188 
  189     if FilteredMolWriteCount:
  190         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  191     
  192     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  193 
  194 def ProcessMoleculesUsingMultipleProcesses(Mols, RotBondsPatternMol):
  195     """Process and filter molecules using multiprocessing."""
  196 
  197     MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
  198     
  199     MPParams = OptionsInfo["MPParams"]
  200     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  201     
  202     # Set up writers...
  203     OutfilesWriters = SetupOutfilesWriters()
  204     
  205     WriterRemaining = OutfilesWriters["WriterRemaining"]
  206     WriterFiltered = OutfilesWriters["WriterFiltered"]
  207     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  208     
  209     # Initialize alerts summary info...
  210     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  211     
  212     # Setup data for initializing a worker process...
  213     MiscUtil.PrintInfo("Encoding options info and rotatable bond pattern molecule...")
  214     OptionsInfo["EncodedRotBondsPatternMol"] = RDKitUtil.MolToBase64EncodedMolString(RotBondsPatternMol)
  215     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  216 
  217     # Setup a encoded mols data iterable for a worker process...
  218     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  219     
  220     # Setup process pool along with data initialization for each process...
  221     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  222     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  223     
  224     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  225     
  226     # Start processing...
  227     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  228         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  229     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  230         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  231     else:
  232         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  233     
  234     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  235     for Result in Results:
  236         MolCount += 1
  237         MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result
  238         
  239         if EncodedMol is None:
  240             continue
  241         ValidMolCount += 1
  242         
  243         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  244         
  245         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  246         
  247         # Write out filtered and remaining molecules...
  248         WriteStatus = True
  249         if RotBondsAlertsStatus:
  250             if OutfileFilteredMode:
  251                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  252                 if WriteStatus:
  253                     FilteredMolWriteCount += 1
  254         else:
  255             RemainingMolCount += 1
  256             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  257         
  258         if not WriteStatus:
  259             WriteFailedCount += 1
  260     
  261     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  262     CloseOutfilesWriters(OutfilesWriters)
  263     
  264     if FilteredMolWriteCount:
  265         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  266     
  267     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  268 
  269 def InitializeWorkerProcess(*EncodedArgs):
  270     """Initialize data for a worker process."""
  271     
  272     global Options, OptionsInfo
  273 
  274     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  275 
  276     # Decode Options and OptionInfo...
  277     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  278     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  279 
  280     # Decode RotBondsPatternMol...
  281     OptionsInfo["RotBondsPatternMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRotBondsPatternMol"])
  282 
  283     # Setup torsion library...
  284     RetrieveTorsionLibraryInfo(Quiet = True)
  285     SetupTorsionLibraryInfo(Quiet = True)
  286 
  287 def WorkerProcess(EncodedMolInfo):
  288     """Process data for a worker process."""
  289 
  290     MolIndex, EncodedMol = EncodedMolInfo
  291     
  292     if EncodedMol is None:
  293         return [MolIndex, None, False, None]
  294     
  295     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  296     if RDKitUtil.IsMolEmpty(Mol):
  297         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  298         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  299         return [MolIndex, None, False, None]
  300         
  301     # Check for 3D flag...
  302     if not Mol.GetConformer().Is3D():
  303         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  304         MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName)
  305         return [MolIndex, None, False, None]
  306     
  307     # Identify torsion library alerts for rotatable bonds..
  308     RotBondsAlertsStatus, RotBondsAlertsInfo = IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, OptionsInfo["RotBondsPatternMol"])
  309 
  310     return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo]
  311 
  312 def  IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, RotBondsPatternMol):
  313     """Identify rotatable bonds for torsion library alerts."""
  314     
  315     # Identify rotatable bonds...
  316     RotBondsStatus, RotBondsInfo = TorsionLibraryUtil.IdentifyRotatableBondsForTorsionLibraryMatch(TorsionLibraryInfo, Mol, RotBondsPatternMol)
  317     if not RotBondsStatus:
  318         return (False, None)
  319 
  320     # Identify alerts for rotatable bonds...
  321     RotBondsAlertsStatus, RotBondsAlertsInfo = MatchRotatableBondsToTorsionLibrary(Mol, RotBondsInfo)
  322 
  323     return (RotBondsAlertsStatus, RotBondsAlertsInfo)
  324 
  325 def MatchRotatableBondsToTorsionLibrary(Mol, RotBondsInfo):
  326     """Match rotatable bond to torsion library."""
  327 
  328     # Initialize...
  329     RotBondsAlertsInfo = InitializeRotatableBondsAlertsInfo()
  330     
  331     # Match rotatable bonds to torsion library...
  332     for ID in RotBondsInfo["IDs"]:
  333         AtomIndices = RotBondsInfo["AtomIndices"][ID]
  334         HierarchyClass = RotBondsInfo["HierarchyClass"][ID]
  335 
  336         MatchStatus, MatchInfo = MatchRotatableBondToTorsionLibrary(Mol, AtomIndices, HierarchyClass)
  337 
  338         if MatchInfo is None or len(MatchInfo) == 0:
  339             AlertType, TorsionAtomIndices, TorsionAngle, TorsionAngleViolation, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRulePeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, TorsionRuleSMARTS = [None] * 11
  340         else:
  341             AlertType, TorsionAtomIndices, TorsionAngle, TorsionAngleViolation, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRulePeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, TorsionRuleSMARTS = MatchInfo
  342 
  343         # Track alerts information...
  344         RotBondsAlertsInfo["IDs"].append(ID)
  345         RotBondsAlertsInfo["MatchStatus"][ID] = MatchStatus
  346         RotBondsAlertsInfo["AlertTypes"][ID] = AlertType
  347         RotBondsAlertsInfo["AtomIndices"][ID] = AtomIndices
  348         RotBondsAlertsInfo["TorsionAtomIndices"][ID] = TorsionAtomIndices
  349         RotBondsAlertsInfo["TorsionAngles"][ID] = TorsionAngle
  350         RotBondsAlertsInfo["TorsionAngleViolations"][ID] = TorsionAngleViolation
  351         RotBondsAlertsInfo["HierarchyClassNames"][ID] = HierarchyClassName
  352         RotBondsAlertsInfo["HierarchySubClassNames"][ID] = HierarchySubClassName
  353         RotBondsAlertsInfo["TorsionRuleNodeID"][ID] = TorsionRuleNodeID
  354         RotBondsAlertsInfo["TorsionRulePeaks"][ID] = TorsionRulePeaks
  355         RotBondsAlertsInfo["TorsionRuleTolerances1"][ID] = TorsionRuleTolerances1
  356         RotBondsAlertsInfo["TorsionRuleTolerances2"][ID] = TorsionRuleTolerances2
  357         RotBondsAlertsInfo["TorsionRuleSMARTS"][ID] = TorsionRuleSMARTS
  358 
  359         #  Count alert types...
  360         if AlertType is not None:
  361             if AlertType not in RotBondsAlertsInfo["Count"]:
  362                 RotBondsAlertsInfo["Count"][AlertType] = 0
  363             RotBondsAlertsInfo["Count"][AlertType] += 1
  364     
  365     # Setup alert status for rotatable bonds...
  366     RotBondsAlertsStatus = False
  367     AlertsCount = 0
  368     for ID in RotBondsInfo["IDs"]:
  369         if RotBondsAlertsInfo["AlertTypes"][ID] in OptionsInfo["SpecifiedAlertsModeList"]:
  370             AlertsCount += 1
  371             if AlertsCount >= OptionsInfo["MinAlertsCount"]:
  372                 RotBondsAlertsStatus = True
  373                 break
  374     
  375     return (RotBondsAlertsStatus, RotBondsAlertsInfo)
  376 
  377 def InitializeRotatableBondsAlertsInfo():
  378     """Initialize alerts information for rotatable bonds."""
  379     
  380     RotBondsAlertsInfo = {}
  381     RotBondsAlertsInfo["IDs"] = []
  382 
  383     for DataLabel in ["MatchStatus", "AlertTypes", "AtomIndices", "TorsionAtomIndices", "TorsionAngles", "TorsionAngleViolations", "HierarchyClassNames", "HierarchySubClassNames", "TorsionRuleNodeID", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "TorsionRuleSMARTS", "Count"]:
  384         RotBondsAlertsInfo[DataLabel] = {}
  385         
  386     return RotBondsAlertsInfo
  387     
  388 def MatchRotatableBondToTorsionLibrary(Mol, RotBondAtomIndices, RotBondHierarchyClass):
  389     """Match rotatable bond to torsion library."""
  390 
  391     if TorsionLibraryUtil.IsSpecificHierarchyClass(TorsionLibraryInfo, RotBondHierarchyClass):
  392         MatchStatus, MatchInfo = MatchRotatableBondAgainstSpecificHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass)
  393         if not MatchStatus:
  394             MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass)
  395     else:
  396         MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass)
  397 
  398     return (MatchStatus, MatchInfo)
  399 
  400 def MatchRotatableBondAgainstSpecificHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass):
  401     """Match rotatable bond against a specific hierarchy class."""
  402 
  403     HierarchyClassElementNode = None
  404     if RotBondHierarchyClass in TorsionLibraryInfo["SpecificClasses"]["ElementNode"]:
  405         HierarchyClassElementNode = TorsionLibraryInfo["SpecificClasses"]["ElementNode"][RotBondHierarchyClass]
  406     
  407     if HierarchyClassElementNode is None:
  408         return (False, None, None, None)
  409 
  410     TorsionLibraryUtil.TrackHierarchyClassElementNode(TorsionLibraryInfo, HierarchyClassElementNode)
  411     MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, HierarchyClassElementNode)
  412     TorsionLibraryUtil.RemoveLastHierarchyClassElementNodeFromTracking(TorsionLibraryInfo)
  413     
  414     return (MatchStatus, MatchInfo)
  415 
  416 def MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass):
  417     """Match rotatable bond against a generic hierarchy class."""
  418     
  419     HierarchyClassElementNode = TorsionLibraryUtil.GetGenericHierarchyClassElementNode(TorsionLibraryInfo)
  420     if HierarchyClassElementNode is None:
  421         return (False, None)
  422 
  423     TorsionLibraryUtil.TrackHierarchyClassElementNode(TorsionLibraryInfo, HierarchyClassElementNode)
  424     
  425     #  Match hierarchy subclasses before matching torsion rules...
  426     MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchySubClasses(Mol, RotBondAtomIndices, HierarchyClassElementNode)
  427     
  428     if not MatchStatus:
  429         MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyTorsionRules(Mol, RotBondAtomIndices, HierarchyClassElementNode)
  430     
  431     TorsionLibraryUtil.RemoveLastHierarchyClassElementNodeFromTracking(TorsionLibraryInfo)
  432     
  433     return (MatchStatus, MatchInfo)
  434 
  435 def MatchRotatableBondAgainstGenericHierarchySubClasses(Mol, RotBondAtomIndices, HierarchyClassElementNode):
  436     """Match rotatable bond againat generic hierarchy subclasses."""
  437 
  438     for ElementChildNode in HierarchyClassElementNode:
  439         if ElementChildNode.tag != "hierarchySubClass":
  440             continue
  441         
  442         SubClassMatchStatus = ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  443         
  444         if SubClassMatchStatus:
  445             MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  446             
  447             if MatchStatus:
  448                 return (MatchStatus, MatchInfo)
  449         
  450     return(False, None)
  451 
  452 def MatchRotatableBondAgainstGenericHierarchyTorsionRules(Mol, RotBondAtomIndices, HierarchyClassElementNode):
  453     """Match rotatable bond againat torsion rules generic hierarchy class."""
  454     
  455     for ElementChildNode in HierarchyClassElementNode:
  456         if ElementChildNode.tag != "torsionRule":
  457             continue
  458         
  459         MatchStatus, MatchInfo = ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  460         
  461         if MatchStatus:
  462             return (MatchStatus, MatchInfo)
  463 
  464     return(False, None)
  465     
  466 def ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode):
  467     """Process element node to recursively match rotatable bond against hierarchy
  468     subclasses and torsion rules."""
  469     
  470     for ElementChildNode in ElementNode:
  471         if ElementChildNode.tag == "hierarchySubClass":
  472             SubClassMatchStatus = ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  473             
  474             if SubClassMatchStatus:
  475                 TorsionLibraryUtil.TrackHierarchySubClassElementNode(TorsionLibraryInfo, ElementChildNode)
  476                 
  477                 MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  478                 if MatchStatus:
  479                     TorsionLibraryUtil.RemoveLastHierarchySubClassElementNodeFromTracking(TorsionLibraryInfo)
  480                     return (MatchStatus, MatchInfo)
  481             
  482                 TorsionLibraryUtil.RemoveLastHierarchySubClassElementNodeFromTracking(TorsionLibraryInfo)
  483             
  484         elif ElementChildNode.tag == "torsionRule":
  485             MatchStatus, MatchInfo = ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  486             
  487             if MatchStatus:
  488                 return (MatchStatus, MatchInfo)
  489     
  490     return (False, None)
  491 
  492 def ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode):
  493     """Process hierarchy subclass element to match rotatable bond."""
  494     
  495     # Setup subclass SMARTS pattern mol...
  496     SubClassPatternMol = TorsionLibraryUtil.SetupHierarchySubClassElementPatternMol(TorsionLibraryInfo, ElementNode)
  497     if SubClassPatternMol is None:
  498         return False
  499 
  500     # Match SMARTS pattern...
  501     SubClassPatternMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, SubClassPatternMol, Mol.GetSubstructMatches(SubClassPatternMol, useChirality = False))
  502     if len(SubClassPatternMatches) == 0:
  503         return False
  504 
  505     # Match rotatable bond indices...
  506     RotBondAtomIndex1, RotBondAtomIndex2 = RotBondAtomIndices
  507     MatchStatus = False
  508     for SubClassPatternMatch in SubClassPatternMatches:
  509         if len(SubClassPatternMatch) == 2:
  510             # Matched to pattern containing map atom numbers ":2" and ":3"...
  511             CentralAtomsIndex1, CentralAtomsIndex2 = SubClassPatternMatch
  512         elif len(SubClassPatternMatch) == 4:
  513             # Matched to pattern containing map atom numbers ":1", ":2", ":3" and ":4"...
  514             CentralAtomsIndex1 = SubClassPatternMatch[1]
  515             CentralAtomsIndex2 = SubClassPatternMatch[2]
  516         elif len(SubClassPatternMatch) == 3:
  517             SubClassSMARTSPattern = ElementNode.get("smarts")
  518             if TorsionLibraryUtil.DoesSMARTSContainsMappedAtoms(SubClassSMARTSPattern, [":2", ":3", ":4"]):
  519                 # Matched to pattern containing map atom numbers ":2", ":3" and ":4"...
  520                 CentralAtomsIndex1 = SubClassPatternMatch[0]
  521                 CentralAtomsIndex2 = SubClassPatternMatch[1]
  522             else:
  523                 # Matched to pattern containing map atom numbers ":1", ":2" and ":3"...
  524                 CentralAtomsIndex1 = SubClassPatternMatch[1]
  525                 CentralAtomsIndex2 = SubClassPatternMatch[2]
  526         else:
  527             continue
  528 
  529         if CentralAtomsIndex1 != CentralAtomsIndex2:
  530             if ((CentralAtomsIndex1 == RotBondAtomIndex1 and CentralAtomsIndex2 == RotBondAtomIndex2) or (CentralAtomsIndex1 == RotBondAtomIndex2 and CentralAtomsIndex2 == RotBondAtomIndex1)):
  531                 MatchStatus = True
  532                 break
  533     
  534     return (MatchStatus)
  535 
  536 def ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode):
  537     """Process torsion rule element to match rotatable bond."""
  538     
  539     #  Retrieve torsion matched to rotatable bond...
  540     TorsionAtomIndices, TorsionAngle = MatchTorsionRuleToRotatableBond(Mol, RotBondAtomIndices, ElementNode)
  541     if TorsionAtomIndices is None:
  542         return (False, None)
  543     
  544     # Setup torsion angles info for matched torsion rule...
  545     TorsionAnglesInfo = TorsionLibraryUtil.SetupTorsionRuleAnglesInfo(TorsionLibraryInfo, ElementNode)
  546     if TorsionAnglesInfo is None:
  547         return (False, None)
  548     
  549     #   Setup torsion alert type and angle violation...
  550     AlertType, TorsionAngleViolation = SetupTorsionAlertTypeForRotatableBond(TorsionAnglesInfo, TorsionAngle)
  551 
  552     # Setup hierarchy class and subclass names...
  553     HierarchyClassName, HierarchySubClassName = TorsionLibraryUtil.SetupHierarchyClassAndSubClassNamesForRotatableBond(TorsionLibraryInfo)
  554     
  555     # Setup rule node ID...
  556     TorsionRuleNodeID = ElementNode.get("NodeID")
  557     
  558     # Setup SMARTS...
  559     TorsionRuleSMARTS = ElementNode.get("smarts")
  560     if " " in TorsionRuleSMARTS:
  561         TorsionRuleSMARTS = TorsionRuleSMARTS.replace(" ", "")
  562     
  563     # Setup torsion peaks and tolerances...
  564     TorsionRulePeaks = TorsionAnglesInfo["ValuesList"]
  565     TorsionRuleTolerances1 = TorsionAnglesInfo["Tolerances1List"]
  566     TorsionRuleTolerances2 = TorsionAnglesInfo["Tolerances2List"]
  567     
  568     MatchInfo = [AlertType, TorsionAtomIndices, TorsionAngle, TorsionAngleViolation, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRulePeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, TorsionRuleSMARTS]
  569 
  570     # Setup match status...
  571     MatchStatus = True
  572     
  573     return (MatchStatus, MatchInfo)
  574 
  575 def MatchTorsionRuleToRotatableBond(Mol, RotBondAtomIndices, ElementNode):
  576     """Retrieve matched torsion for torsion rule matched to rotatable bond."""
  577 
  578     # Get torsion matches...
  579     TorsionMatches = GetMatchesForTorsionRule(Mol, ElementNode)
  580     if TorsionMatches is None or len(TorsionMatches) == 0:
  581         return (None, None)
  582 
  583     # Identify the first torsion match corresponding to central atoms in RotBondAtomIndices...
  584     RotBondAtomIndex1, RotBondAtomIndex2 = RotBondAtomIndices
  585     for TorsionMatch in TorsionMatches:
  586         CentralAtomIndex1 = TorsionMatch[1]
  587         CentralAtomIndex2 = TorsionMatch[2]
  588         
  589         if ((CentralAtomIndex1 == RotBondAtomIndex1 and CentralAtomIndex2 == RotBondAtomIndex2) or (CentralAtomIndex1 == RotBondAtomIndex2 and CentralAtomIndex2 == RotBondAtomIndex1)):
  590             TorsionAngle = CalculateTorsionAngle(Mol, TorsionMatch)
  591             
  592             return (TorsionMatch, TorsionAngle)
  593             
  594     return (None, None)
  595 
  596 def CalculateTorsionAngle(Mol, TorsionMatch):
  597     """Calculate torsion angle."""
  598 
  599     if type(TorsionMatch[3]) is list:
  600         return CalculateTorsionAngleUsingNitrogenLonePairPosition(Mol, TorsionMatch)
  601 
  602     # Calculate torsion angle using torsion atom indices..
  603     MolConf = Mol.GetConformer(0)
  604     TorsionAngle = rdMolTransforms.GetDihedralDeg(MolConf, TorsionMatch[0], TorsionMatch[1], TorsionMatch[2], TorsionMatch[3])
  605     TorsionAngle = round(TorsionAngle, 2)
  606     
  607     return TorsionAngle
  608 
  609 def CalculateTorsionAngleUsingNitrogenLonePairPosition(Mol, TorsionMatch):
  610     """Calculate torsion angle using nitrogen lone pair positon."""
  611 
  612     # Setup a carbon atom as position holder for lone pair position...
  613     TmpMol = Chem.RWMol(Mol)
  614     LonePairAtomIndex = TmpMol.AddAtom(Chem.Atom(6))
  615     
  616     TmpMolConf = TmpMol.GetConformer(0)
  617     TmpMolConf.SetAtomPosition(LonePairAtomIndex, TorsionMatch[3])
  618 
  619     TorsionAngle = rdMolTransforms.GetDihedralDeg(TmpMolConf, TorsionMatch[0], TorsionMatch[1], TorsionMatch[2], LonePairAtomIndex)
  620     TorsionAngle = round(TorsionAngle, 2)
  621 
  622     return TorsionAngle
  623 
  624 def GetMatchesForTorsionRule(Mol, ElementNode):
  625     """Get matches for torsion rule."""
  626 
  627     # Match torsions...
  628     TorsionMatches = None
  629     if IsNitogenLonePairTorsionRule(ElementNode):
  630         TorsionMatches = GetSubstructureMatchesForNitrogenLonePairTorsionRule(Mol, ElementNode)
  631     else:
  632         TorsionMatches = GetSubstructureMatchesForTorsionRule(Mol, ElementNode)
  633     
  634     if TorsionMatches is None or len(TorsionMatches) == 0:
  635         return TorsionMatches
  636 
  637     # Filter torsion matches...
  638     FiltertedTorsionMatches = []
  639     for TorsionMatch in TorsionMatches:
  640         if len(TorsionMatch) != 4:
  641             continue
  642 
  643         # Ignore matches containing hydrogen atoms as first or last atom...
  644         if Mol.GetAtomWithIdx(TorsionMatch[0]).GetAtomicNum() == 1:
  645             continue
  646         if type(TorsionMatch[3]) is int:
  647             # May contains a list for type two nitrogen lone pair match...
  648             if Mol.GetAtomWithIdx(TorsionMatch[3]).GetAtomicNum() == 1:
  649                 continue
  650         FiltertedTorsionMatches.append(TorsionMatch)
  651     
  652     return FiltertedTorsionMatches
  653 
  654 def GetSubstructureMatchesForTorsionRule(Mol, ElementNode):
  655     """Get substructure matches for a torsion rule."""
  656     
  657     # Setup torsion rule SMARTS pattern mol....
  658     TorsionRuleNodeID = ElementNode.get("NodeID")
  659     TorsionSMARTSPattern = ElementNode.get("smarts")
  660     TorsionPatternMol = TorsionLibraryUtil.SetupTorsionRuleElementPatternMol(TorsionLibraryInfo, ElementNode, TorsionRuleNodeID, TorsionSMARTSPattern)
  661     if TorsionPatternMol is None:
  662         return None
  663     
  664     # Match torsions...
  665     TorsionMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = False))
  666     
  667     return TorsionMatches
  668 
  669 def GetSubstructureMatchesForNitrogenLonePairTorsionRule(Mol, ElementNode):
  670     """Get substructure matches for a torsion rule containing N_lp."""
  671 
  672     if IsTypeOneNitogenLonePairTorsionRule(ElementNode):
  673         return GetSubstructureMatchesForTypeOneNitrogenLonePairTorsionRule(Mol, ElementNode)
  674     elif IsTypeTwoNitogenLonePairTorsionRule(ElementNode):
  675         return GetSubstructureMatchesForTypeTwoNitrogenLonePairTorsionRule(Mol, ElementNode)
  676     
  677     return None
  678 
  679 def GetSubstructureMatchesForTypeOneNitrogenLonePairTorsionRule(Mol, ElementNode):
  680     """Get substructure matches for a torsion rule containing N_lp and four mapped atoms."""
  681 
  682     # For example:
  683     #    [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4]
  684     #    [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4]
  685     #    ... ... ...
  686 
  687     TorsionRuleNodeID = ElementNode.get("NodeID")
  688     TorsionPatternMol, LonePairMapNumber = SetupNitrogenLonePairTorsionRuleElementInfo(ElementNode, TorsionRuleNodeID)
  689     
  690     if TorsionPatternMol is None:
  691         return None
  692     
  693     if LonePairMapNumber is None:
  694         return None
  695     
  696     # Match torsions...
  697     TorsionMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = False))
  698 
  699     # Filter matches...
  700     FiltertedTorsionMatches = []
  701     for TorsionMatch in TorsionMatches:
  702         if len(TorsionMatch) != 4:
  703             continue
  704         
  705         # Check for Nitogen atom at LonePairMapNumber...
  706         LonePairNitrogenAtom = Mol.GetAtomWithIdx(TorsionMatch[LonePairMapNumber - 1])
  707         if LonePairNitrogenAtom.GetSymbol() != "N":
  708             continue
  709         
  710         # Make sure LonePairNitrogenAtom is planar...
  711         #  test
  712         PlanarityStatus = IsLonePairNitrogenAtomPlanar(Mol, LonePairNitrogenAtom)
  713         if PlanarityStatus is None:
  714             continue
  715         
  716         if  not PlanarityStatus:
  717             continue
  718         
  719         FiltertedTorsionMatches.append(TorsionMatch)
  720         
  721     return FiltertedTorsionMatches
  722     
  723 def GetSubstructureMatchesForTypeTwoNitrogenLonePairTorsionRule(Mol, ElementNode):
  724     """Get substructure matches for a torsion rule containing N_lp and three mapped atoms."""
  725 
  726     # For example:
  727     # [!#1:1][CX4:2]!@[NX3;"N_lp":3]
  728     # [!#1:1][$(S(=O)=O):2]!@["N_lp":3]@[Cr3]
  729     # [C:1][$(S(=O)=O):2]!@["N_lp":3]
  730     # [c:1][$(S(=O)=O):2]!@["N_lp":3]
  731     # [!#1:1][$(S(=O)=O):2]!@["N_lp":3]
  732     #    ... ... ...
  733     
  734     TorsionRuleNodeID = ElementNode.get("NodeID")
  735     TorsionPatternMol, LonePairMapNumber = SetupNitrogenLonePairTorsionRuleElementInfo(ElementNode, TorsionRuleNodeID)
  736     
  737     if TorsionPatternMol is None:
  738         return None
  739     
  740     if not IsValidTypeTwoNitrogenLonePairMapNumber(LonePairMapNumber):
  741         return None
  742 
  743     # Match torsions...
  744     TorsionMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = False))
  745 
  746     # Filter matches...
  747     FiltertedTorsionMatches = []
  748     for TorsionMatch in TorsionMatches:
  749         if len(TorsionMatch) != 3:
  750             continue
  751         
  752         # Check for Nitogen atom at LonePairMapNumber...
  753         LonePairNitrogenAtom = Mol.GetAtomWithIdx(TorsionMatch[LonePairMapNumber - 1])
  754         if LonePairNitrogenAtom.GetSymbol() != "N":
  755             continue
  756 
  757         # Make sure LonePairNitrogenAtom is not planar...
  758         PlanarityStatus = IsLonePairNitrogenAtomPlanar(Mol, LonePairNitrogenAtom)
  759         
  760         if PlanarityStatus is None:
  761             continue
  762         
  763         if  PlanarityStatus:
  764             continue
  765         
  766         # Calculate lone pair coordinates for a non-planar nitrogen...
  767         LonePairPosition = CalculateLonePairCoordinatesForNitrogenAtom(Mol, LonePairNitrogenAtom)
  768         if LonePairPosition is None:
  769             continue
  770 
  771         # Append lone pair coodinate list to list of torsion match containing atom indices...
  772         TorsionMatch.append(LonePairPosition)
  773 
  774         # Track torsion matches...
  775         FiltertedTorsionMatches.append(TorsionMatch)
  776         
  777     return FiltertedTorsionMatches
  778 
  779 def SetupNitrogenLonePairTorsionRuleElementInfo(ElementNode, TorsionRuleNodeID):
  780     """Setup pattern molecule and lone pair map number for type one and type
  781     two nitrogen lone pair rules."""
  782 
  783     TorsionPatternMol, LonePairMapNumber = [None] * 2
  784     
  785     if TorsionRuleNodeID in TorsionLibraryInfo["DataCache"]["TorsionRulePatternMol"]:
  786         TorsionPatternMol = TorsionLibraryInfo["DataCache"]["TorsionRulePatternMol"][TorsionRuleNodeID]
  787         LonePairMapNumber = TorsionLibraryInfo["DataCache"]["TorsionRuleLonePairMapNumber"][TorsionRuleNodeID]
  788     else:
  789         # Setup torsion pattern...
  790         TorsionSMARTSPattern = ElementNode.get("smarts")
  791         TorsionSMARTSPattern, LonePairMapNumber = ProcessSMARTSForNitrogenLonePairTorsionRule(TorsionSMARTSPattern)
  792         
  793         # Setup torsion pattern mol...
  794         TorsionPatternMol = Chem.MolFromSmarts(TorsionSMARTSPattern)
  795         if TorsionPatternMol is None:
  796             MiscUtil.PrintWarning("Ignoring torsion rule element containing invalid map atoms numbers in SMARTS pattern %s" % TorsionSMARTSPattern)
  797 
  798         # Cache data...
  799         TorsionLibraryInfo["DataCache"]["TorsionRulePatternMol"][TorsionRuleNodeID] = TorsionPatternMol
  800         TorsionLibraryInfo["DataCache"]["TorsionRuleLonePairMapNumber"][TorsionRuleNodeID] = LonePairMapNumber
  801 
  802     return (TorsionPatternMol, LonePairMapNumber)
  803 
  804 def IsLonePairNitrogenAtomPlanar(Mol, NitrogenAtom):
  805     """Check for the planarity of nitrogen atom and its three neighbors."""
  806 
  807     AllowHydrogenNbrs = OptionsInfo["NitrogenLonePairParams"]["AllowHydrogenNbrs"]
  808     Tolerance = OptionsInfo["NitrogenLonePairParams"]["PlanarityTolerance"]
  809     
  810     # Get neighbors...
  811     if AllowHydrogenNbrs:
  812         AtomNeighbors = NitrogenAtom.GetNeighbors()
  813     else:
  814         AtomNeighbors = RDKitUtil.GetHeavyAtomNeighbors(NitrogenAtom)
  815 
  816     if len(AtomNeighbors) != 3:
  817         return None
  818 
  819     # Setup atom positions...
  820     AtomPositions = []
  821     MolAtomsPositions = RDKitUtil.GetAtomPositions(Mol)
  822 
  823     # Neighbor positions...
  824     for AtomNbr in AtomNeighbors:
  825         AtomNbrIndex = AtomNbr.GetIdx()
  826         AtomPositions.append(MolAtomsPositions[AtomNbrIndex])
  827 
  828     # Nitrogen position...
  829     NitrogenAtomIndex = NitrogenAtom.GetIdx()
  830     AtomPositions.append(MolAtomsPositions[NitrogenAtomIndex])
  831     
  832     Status =  AreFourPointsCoplanar(AtomPositions[0], AtomPositions[1], AtomPositions[2], AtomPositions[3], Tolerance)
  833     
  834     return Status
  835 
  836 def AreFourPointsCoplanar(Point1, Point2, Point3, Point4, Tolerance = 1.0):
  837     """Check whether four points are coplanar with in the threshold of 1 degree."""
  838 
  839     # Setup  normalized direction vectors...
  840     VectorP2P1 = NormalizeVector(np.subtract(Point2, Point1))
  841     VectorP3P1 = NormalizeVector(np.subtract(Point3, Point1))
  842     VectorP1P4 = NormalizeVector(np.subtract(Point1, Point4))
  843 
  844     # Calculate angle between VectorP1P4 and normal to vectors VectorP2P1 and VectorP3P1...
  845     PlaneP1P2P3Normal = NormalizeVector(np.cross(VectorP2P1, VectorP3P1))
  846     PlanarityAngle = np.arccos(np.clip(np.dot(PlaneP1P2P3Normal, VectorP1P4), -1.0, 1.0))
  847     
  848     Status = math.isclose(PlanarityAngle, math.radians(90), abs_tol=math.radians(Tolerance))
  849 
  850     return Status
  851 
  852 def NormalizeVector(Vector):
  853     """Normalize vector."""
  854 
  855     Norm = np.linalg.norm(Vector)
  856     
  857     return Vector if math.isclose(Norm, 0.0, abs_tol = 1e-08) else Vector/Norm
  858 
  859 def CalculateLonePairCoordinatesForNitrogenAtom(Mol, NitrogenAtom):
  860     """Calculate approximate lone pair coordinates for non-plannar nitrogen atom."""
  861 
  862     AllowHydrogenNbrs = OptionsInfo["NitrogenLonePairParams"]["AllowHydrogenNbrs"]
  863     
  864     # Get neighbors...
  865     if AllowHydrogenNbrs:
  866         AtomNeighbors = NitrogenAtom.GetNeighbors()
  867     else:
  868         AtomNeighbors = RDKitUtil.GetHeavyAtomNeighbors(NitrogenAtom)
  869         
  870     if len(AtomNeighbors) != 3:
  871         return None
  872     
  873     # Setup positions for nitrogen and its neghbors...
  874     MolAtomsPositions = RDKitUtil.GetAtomPositions(Mol)
  875 
  876     NitrogenPosition = MolAtomsPositions[NitrogenAtom.GetIdx()]
  877     NbrPositions = []
  878     for AtomNbr in AtomNeighbors:
  879         NbrPositions.append(MolAtomsPositions[AtomNbr.GetIdx()])
  880     Nbr1Position, Nbr2Position, Nbr3Position = NbrPositions
  881 
  882     # Setup  normalized direction vectors...
  883     VectorP2P1 = NormalizeVector(np.subtract(Nbr2Position, Nbr1Position))
  884     VectorP3P1 = NormalizeVector(np.subtract(Nbr3Position, Nbr1Position))
  885     VectorP1P4 = NormalizeVector(np.subtract(Nbr1Position, NitrogenPosition))
  886 
  887     # Calculate angle between VectorP1P4 and normal to vectors VectorP2P1 and VectorP3P1...
  888     PlaneP1P2P3Normal = NormalizeVector(np.cross(VectorP2P1, VectorP3P1))
  889     PlanarityAngle = np.arccos(np.clip(np.dot(PlaneP1P2P3Normal, VectorP1P4), -1.0, 1.0))
  890 
  891     # Check for reversing the direction of the normal...
  892     if PlanarityAngle < math.radians(90):
  893         PlaneP1P2P3Normal = PlaneP1P2P3Normal * -1
  894 
  895     # Add normal to nitrogen cooridnates for the approximate coordinates of the
  896     # one pair. The exact VSEPR coordinates of the lone pair are not necessary to
  897     # calculate the torsion angle...
  898     LonePairPosition = NitrogenPosition + PlaneP1P2P3Normal
  899     
  900     return list(LonePairPosition)
  901 
  902 def ProcessSMARTSForNitrogenLonePairTorsionRule(SMARTSPattern):
  903     """Process SMARTS pattern for a torion rule containing N_lp."""
  904 
  905     LonePairMapNumber = GetNitrogenLonePairMapNumber(SMARTSPattern)
  906     
  907     # Remove double quotes around N_lp..
  908     SMARTSPattern = re.sub("\"N_lp\"", "N_lp", SMARTSPattern, re.I)
  909     
  910     # Remove N_lp specification from SMARTS pattern for torsion rule...
  911     if re.search("\[N_lp", SMARTSPattern, re.I):
  912         # Handle missing NX3...
  913         SMARTSPattern = re.sub("\[N_lp", "[NX3", SMARTSPattern)
  914     else:
  915         SMARTSPattern = re.sub(";N_lp", "", SMARTSPattern)
  916         
  917     return (SMARTSPattern, LonePairMapNumber)
  918 
  919 def GetNitrogenLonePairMapNumber(SMARTSPattern):
  920     """Get atom map number for nitrogen involved in N_lp."""
  921     
  922     LonePairMapNumber = None
  923     
  924     SMARTSPattern = re.sub("\"N_lp\"", "N_lp", SMARTSPattern, re.I)
  925     MatchedMappedAtoms = re.findall("N_lp:[0-9]", SMARTSPattern, re.I)
  926     
  927     if len(MatchedMappedAtoms) == 1:
  928         LonePairMapNumber = int(re.sub("N_lp:", "", MatchedMappedAtoms[0]))
  929 
  930     return LonePairMapNumber
  931 
  932 def IsNitogenLonePairTorsionRule(ElementNode):
  933     """Check for the presence of N_lp in SMARTS pattern for a torsion rule."""
  934 
  935     if "N_lp" not  in ElementNode.get("smarts"):
  936         return False
  937     
  938     LonePairMatches = re.findall("N_lp", ElementNode.get("smarts"), re.I)
  939     
  940     return True if len(LonePairMatches) == 1 else False
  941     
  942 def IsTypeOneNitogenLonePairTorsionRule(ElementNode):
  943     """Check for the presence four mapped atoms in a SMARTS pattern containing
  944     N_lp for a torsion rule."""
  945 
  946     # For example:
  947     #    [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4]
  948     #    [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4]
  949     #    ... ... ...
  950     
  951     MatchedMappedAtoms = re.findall(":[0-9]", ElementNode.get("smarts"), re.I)
  952 
  953     return True if len(MatchedMappedAtoms) == 4 else False
  954     
  955 def IsTypeTwoNitogenLonePairTorsionRule(ElementNode):
  956     """Check for the presence three mapped atoms in a SMARTS pattern containing
  957     N_lp for a torsion rule."""
  958 
  959     # For example:
  960     # [!#1:1][CX4:2]!@[NX3;"N_lp":3]
  961     # [!#1:1][$(S(=O)=O):2]!@["N_lp":3]@[Cr3]
  962     # [C:1][$(S(=O)=O):2]!@["N_lp":3]
  963     # [c:1][$(S(=O)=O):2]!@["N_lp":3]
  964     # [!#1:1][$(S(=O)=O):2]!@["N_lp":3]
  965     #
  966     
  967     MatchedMappedAtoms = re.findall(":[0-9]", ElementNode.get("smarts"), re.I)
  968 
  969     return True if len(MatchedMappedAtoms) == 3 else False
  970 
  971 def IsValidTypeTwoNitogenLonePairTorsionRule(ElementNode):
  972     """Validate atom map number for nitrogen involved in N_lp for type two nitrogen
  973     lone pair torsion rule."""
  974 
  975     LonePairMapNumber = GetNitrogenLonePairMapNumber(ElementNode.get("smarts"))
  976     
  977     return IsValidTypeTwoNitrogenLonePairMapNumber(LonePairMapNumber)
  978 
  979 def IsValidTypeTwoNitrogenLonePairMapNumber(LonePairMapNumber):
  980     """Check that  the atom map number is 3."""
  981 
  982     return True if LonePairMapNumber is not None and LonePairMapNumber == 3 else False
  983 
  984 def SetupTorsionAlertTypeForRotatableBond(TorsionAnglesInfo, TorsionAngle):
  985     """Setup torsion alert type and angle violation for a rotatable bond."""
  986 
  987     TorsionCategory, TorsionAngleViolation = [None, None]
  988     
  989     for ID in TorsionAnglesInfo["IDs"]:
  990         if IsTorsionAngleInWithinTolerance(TorsionAngle, TorsionAnglesInfo["Value"][ID], TorsionAnglesInfo["Tolerance1"][ID]):
  991             TorsionCategory = "Green"
  992             TorsionAngleViolation = 0.0
  993             break
  994         
  995         if IsTorsionAngleInWithinTolerance(TorsionAngle, TorsionAnglesInfo["Value"][ID], TorsionAnglesInfo["Tolerance2"][ID]):
  996             TorsionCategory = "Orange"
  997             TorsionAngleViolation = CalculateTorsionAngleViolation(TorsionAngle, TorsionAnglesInfo["ValuesIn360RangeList"], TorsionAnglesInfo["Tolerances1List"])
  998             break
  999 
 1000     if TorsionCategory is None:
 1001         TorsionCategory = "Red"
 1002         TorsionAngleViolation = CalculateTorsionAngleViolation(TorsionAngle, TorsionAnglesInfo["ValuesIn360RangeList"], TorsionAnglesInfo["Tolerances2List"])
 1003         
 1004     return (TorsionCategory, TorsionAngleViolation)
 1005 
 1006 def IsTorsionAngleInWithinTolerance(TorsionAngle, TorsionPeak, TorsionTolerance):
 1007     """Check torsion angle against torsion tolerance."""
 1008 
 1009     TorsionAngleDiff = TorsionLibraryUtil.CalculateTorsionAngleDifference(TorsionPeak, TorsionAngle)
 1010     
 1011     return True if (abs(TorsionAngleDiff) <= TorsionTolerance) else False
 1012 
 1013 def CalculateTorsionAngleViolation(TorsionAngle, TorsionPeaks, TorsionTolerances):
 1014     """Calculate torsion angle violation."""
 1015 
 1016     TorsionAngleViolation = None
 1017 
 1018     # Map angle to 0 to 360 range. TorsionPeaks values must be in this range...
 1019     if TorsionAngle < 0:
 1020         TorsionAngle = TorsionAngle + 360
 1021 
 1022     # Identify the closest torsion peak index...
 1023     if len(TorsionPeaks) == 1:
 1024         NearestPeakIndex = 0
 1025     else:
 1026         NearestPeakIndex = min(range(len(TorsionPeaks)), key=lambda Index: abs(TorsionPeaks[Index] - TorsionAngle))
 1027 
 1028     # Calculate torsion angle violation from the nearest peak and its tolerance value...
 1029     TorsionAngleDiff = TorsionLibraryUtil.CalculateTorsionAngleDifference(TorsionPeaks[NearestPeakIndex], TorsionAngle)
 1030     TorsionAngleViolation = abs(abs(TorsionAngleDiff) - TorsionTolerances[NearestPeakIndex])
 1031     
 1032     return TorsionAngleViolation
 1033 
 1034 def InitializeTorsionAlertsSummaryInfo():
 1035     """Initialize torsion alerts summary."""
 1036 
 1037     if OptionsInfo["CountMode"]:
 1038         return None
 1039     
 1040     if not OptionsInfo["TrackAlertsSummaryInfo"]:
 1041         return None
 1042     
 1043     TorsionAlertsSummaryInfo = {}
 1044     TorsionAlertsSummaryInfo["RuleIDs"] = []
 1045 
 1046     for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "AlertTypes", "AlertTypesMolCount"]:
 1047         TorsionAlertsSummaryInfo[DataLabel] = {}
 1048         
 1049     return TorsionAlertsSummaryInfo
 1050 
 1051 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo):
 1052     """Track torsion alerts summary information for matched torsion rules in a
 1053     molecule."""
 1054     
 1055     if OptionsInfo["CountMode"]:
 1056         return
 1057     
 1058     if not OptionsInfo["TrackAlertsSummaryInfo"]:
 1059         return
 1060 
 1061     if RotBondsAlertsInfo is None:
 1062         return
 1063 
 1064     MolAlertsInfo = {}
 1065     MolAlertsInfo["RuleIDs"] = []
 1066     MolAlertsInfo["AlertTypes"] = {}
 1067     
 1068     for ID in RotBondsAlertsInfo["IDs"]:
 1069         if not RotBondsAlertsInfo["MatchStatus"][ID]:
 1070             continue
 1071 
 1072         if OptionsInfo["OutfileAlertsOnly"]:
 1073             if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]:
 1074                 continue
 1075         
 1076         AlertType = RotBondsAlertsInfo["AlertTypes"][ID]
 1077         TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID]
 1078         TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]
 1079 
 1080         # Track data for torsion alert summary information across molecules...
 1081         if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]:
 1082             TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID)
 1083             TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID
 1084             
 1085             TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS
 1086             TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassNames"][ID]
 1087             TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassNames"][ID]
 1088 
 1089             TorsionAlertsSummaryInfo["TorsionRulePeaks"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRulePeaks"][ID]
 1090             TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRuleTolerances1"][ID]
 1091             TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRuleTolerances2"][ID]
 1092             
 1093             # Initialize number of alert types across all molecules...
 1094             TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID] = {}
 1095             
 1096             # Initialize number of molecules flagged by each alert type...
 1097             TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID] = {}
 1098         
 1099         if AlertType not in TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID]:
 1100             TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0
 1101             TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] = 0
 1102         
 1103         TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1
 1104 
 1105         # Track data for torsion alert information in a molecule...
 1106         if TorsionRuleNodeID not in MolAlertsInfo["AlertTypes"]:
 1107             MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
 1108             MolAlertsInfo["AlertTypes"][TorsionRuleNodeID] = {}
 1109         
 1110         if AlertType not in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]:
 1111             MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0
 1112         MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1
 1113 
 1114     # Track number of molecules flagged by a specific torsion alert...
 1115     for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]:
 1116         for AlertType in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]:
 1117             if MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType]:
 1118                 TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] += 1
 1119 
 1120 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo):
 1121     """Write out torsion alerts summary informatio to a CSV file."""
 1122     
 1123     if OptionsInfo["CountMode"]:
 1124         return
 1125     
 1126     if not OptionsInfo["OutfileSummaryMode"]:
 1127         return
 1128 
 1129     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
 1130         return
 1131     
 1132     # Write headers...
 1133     QuoteValues = True
 1134     Values = ["TorsionRule", "TorsionPeaks", "Tolerances1", "Tolerances2", "HierarchyClass", "HierarchySubClass", "TorsionAlertTypes", "TorsionAlertCount", "TorsionAlertMolCount"]
 1135     Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
 1136 
 1137     SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
 1138 
 1139     # Write alerts information...
 1140     for ID in SortedRuleIDs:
 1141         # Remove any double quotes in SMARTS...
 1142         RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID]
 1143         RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I)
 1144         
 1145         HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID]
 1146         HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID]
 1147 
 1148         TorsionPeaks = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRulePeaks"][ID]], ",")
 1149         TorsionRuleTolerances1 = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][ID]], ",")
 1150         TorsionRuleTolerances2 = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][ID]], ",")
 1151         
 1152         AlertTypes = []
 1153         AlertTypeCount = []
 1154         AlertTypeMolCount = []
 1155         for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]):
 1156             AlertTypes.append(AlertType)
 1157             AlertTypeCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypes"][ID][AlertType])
 1158             AlertTypeMolCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType])
 1159         
 1160         Values = [RuleSMARTS, TorsionPeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, HierarchyClassName, HierarchySubClassName, "%s" % MiscUtil.JoinWords(AlertTypes, ","), "%s" % (MiscUtil.JoinWords(AlertTypeCount, ",")), "%s" % (MiscUtil.JoinWords(AlertTypeMolCount, ","))]
 1161         Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
 1162 
 1163 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo):
 1164     """Sort torsion rule IDs by  alert types molecule count in descending order."""
 1165 
 1166     SortedRuleIDs = []
 1167     
 1168     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
 1169     if len(RuleIDs) == 0:
 1170         return SortedRuleIDs
 1171     
 1172     # Setup a map from AlertTypesMolCount to IDs for sorting alerts...
 1173     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
 1174     MolCountMap = {}
 1175     for ID in RuleIDs:
 1176         MolCount = 0
 1177         for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]):
 1178             MolCount += TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType]
 1179         MolCountMap[ID] = MolCount
 1180 
 1181     SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True)
 1182     
 1183     return SortedRuleIDs
 1184 
 1185 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo):
 1186     """Write out torsion alerts SD files for individual torsion rules."""
 1187     
 1188     if OptionsInfo["CountMode"]:
 1189         return
 1190     
 1191     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
 1192         return
 1193 
 1194     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
 1195         return
 1196 
 1197     # Setup a molecule reader for filtered molecules...
 1198     FilteredMols  = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"])
 1199 
 1200     # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules... 
 1201     TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo)
 1202 
 1203     # Setup writers...
 1204     ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs)
 1205     
 1206     for Mol in FilteredMols:
 1207         # Retrieve torsion alerts info...
 1208         TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo)
 1209         if TorsionAlertsInfo is None:
 1210             continue
 1211         
 1212         for TorsionRuleID in TorsionRuleIDs:
 1213             if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]:
 1214                 continue
 1215             
 1216             WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
 1217         
 1218     CloseByRuleOutfilesWriters(ByRuleOutfilesWriters)
 1219     
 1220 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo):
 1221     """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules."""
 1222     
 1223     # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to
 1224     # the least number of unique molecules...
 1225     RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
 1226 
 1227     # Select torsion rule IDs for writing out SD files...
 1228     if not OptionsInfo["OutfilesFilteredByRulesAllMode"]:
 1229         MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"]
 1230         if MaxRuleIDs < len(RuleIDs):
 1231             RuleIDs = RuleIDs[0:MaxRuleIDs]
 1232     
 1233     return RuleIDs
 1234 
 1235 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo):
 1236     """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds."""
 1237     
 1238     TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]
 1239     TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None
 1240     
 1241     if TorsionAlerts is None or len(TorsionAlerts) == 0:
 1242         return None
 1243 
 1244     # Initialize for tracking by rule IDs...
 1245     TorsionAlertsInfo = {}
 1246     TorsionAlertsInfo["RuleIDs"] = []
 1247     
 1248     for DataLabel in ["RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "AlertTypes", "AtomIndices", "TorsionAtomIndices", "TorsionAngles", "TorsionAngleViolations", "AlertTypesCount"]:
 1249         TorsionAlertsInfo[DataLabel] = {}
 1250         
 1251     ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"]
 1252     TorsionAlertsSetSize = 11
 1253     
 1254     TorsionAlertsWords = TorsionAlerts.split()
 1255     if len(TorsionAlertsWords) % TorsionAlertsSetSize:
 1256         MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize))
 1257 
 1258     ID = 0
 1259     for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize):
 1260         ID += 1
 1261         
 1262         RotBondIndices, TorsionAlertType, TorsionIndices, TorsionAngle, TorsionAngleViolation, HierarchyClass, HierarchySubClass, TorsionPeaks, Tolerances1, Tolerances2, TorsionRule = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize]
 1263         RotBondIndices = RotBondIndices.split(ValuesDelimiter)
 1264         TorsionIndices = TorsionIndices.split(ValuesDelimiter)
 1265         TorsionPeaks = TorsionPeaks.split(ValuesDelimiter)
 1266         Tolerances1 = Tolerances1.split(ValuesDelimiter)
 1267         Tolerances2 = Tolerances2.split(ValuesDelimiter)
 1268 
 1269         if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]:
 1270             MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule)
 1271             continue
 1272         TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule]
 1273 
 1274         # Track data for torsion alerts in a molecule...
 1275         if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]:
 1276             TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
 1277 
 1278             TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule
 1279             TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass
 1280             TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass
 1281             TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleNodeID] = TorsionPeaks
 1282             TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = Tolerances1
 1283             TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = Tolerances2
 1284             
 1285             TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID] = []
 1286             TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = []
 1287             TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = []
 1288             TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID] = []
 1289             TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID] = []
 1290 
 1291             TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID] = {}
 1292             
 1293         # Track multiple values for a rule ID...
 1294         TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID].append(TorsionAlertType)
 1295         TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices)
 1296         TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices)
 1297         TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID].append(TorsionAngle)
 1298         TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID].append(TorsionAngleViolation)
 1299         
 1300         # Count alert type for a rule ID...
 1301         if TorsionAlertType not in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID]:
 1302             TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] = 0
 1303         TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] += 1
 1304         
 1305     return TorsionAlertsInfo
 1306     
 1307 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo):
 1308     """Write out molecule."""
 1309     
 1310     if OptionsInfo["CountMode"]:
 1311         return True
 1312 
 1313     SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo)
 1314 
 1315     try:
 1316         Writer.write(Mol)
 1317     except Exception as ErrMsg:
 1318         MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg))
 1319         return False
 1320     
 1321     return True
 1322 
 1323 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo):
 1324     """Setup molecule properties containing alerts information for rotatable bonds."""
 1325 
 1326     if not OptionsInfo["OutfileAlerts"]:
 1327         return
 1328     
 1329     # Setup rotatable bonds count...
 1330     RotBondsCount = 0
 1331     if RotBondsAlertsInfo is not None:
 1332         RotBondsCount =  len(RotBondsAlertsInfo["IDs"])
 1333     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["RotBondsCountLabel"],  "%s" % RotBondsCount)
 1334     
 1335     # Setup alert counts for rotatable bonds...
 1336     AlertsCount = []
 1337     if RotBondsAlertsInfo is not None:
 1338         for AlertType in ["Green", "Orange", "Red"]:
 1339             if AlertType in RotBondsAlertsInfo["Count"]:
 1340                 AlertsCount.append("%s" % RotBondsAlertsInfo["Count"][AlertType])
 1341             else:
 1342                 AlertsCount.append("0")
 1343     
 1344     if len(AlertsCount):
 1345         Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsCountLabel"],  "%s" % MiscUtil.JoinWords(AlertsCount, " "))
 1346 
 1347     # Setup alert information for rotatable bonds...
 1348     AlertsInfoValues = []
 1349 
 1350     # Delimiter for multiple values corresponding to specific set of information for
 1351     # a rotatable bond. For example: TorsionAtomIndices
 1352     ValuesDelim = OptionsInfo["IntraSetValuesDelim"]
 1353 
 1354     # Delimiter for various values for a rotatable bond...
 1355     RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"]
 1356     
 1357     # Delimiter for values corresponding to multiple rotatable bonds...
 1358     AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"]
 1359     
 1360     if RotBondsAlertsInfo is not None:
 1361         for ID in RotBondsAlertsInfo["IDs"]:
 1362             if not RotBondsAlertsInfo["MatchStatus"][ID]:
 1363                 continue
 1364             
 1365             if OptionsInfo["OutfileAlertsOnly"]:
 1366                 if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]:
 1367                     continue
 1368             
 1369             RotBondValues = []
 1370             
 1371             # Bond atom indices...
 1372             Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]]
 1373             RotBondValues.append(ValuesDelim.join(Values))
 1374 
 1375             # Alert type...
 1376             RotBondValues.append(RotBondsAlertsInfo["AlertTypes"][ID])
 1377 
 1378             # Torsion atom indices...
 1379             TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim)
 1380             RotBondValues.append(TorsionAtomIndices)
 1381 
 1382             # Torsion angle...
 1383             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngles"][ID])
 1384 
 1385             # Torsion angle violation...
 1386             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngleViolations"][ID])
 1387 
 1388             # Hierarchy class and subclass names...
 1389             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassNames"][ID])
 1390             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassNames"][ID])
 1391 
 1392             # Torsion rule peaks...
 1393             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRulePeaks"][ID]]
 1394             RotBondValues.append(ValuesDelim.join(Values))
 1395             
 1396             # Torsion rule tolerances...
 1397             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances1"][ID]]
 1398             RotBondValues.append(ValuesDelim.join(Values))
 1399             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances2"][ID]]
 1400             RotBondValues.append(ValuesDelim.join(Values))
 1401             
 1402             # Torsion rule SMARTS...
 1403             RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID])
 1404 
 1405             # Track joined values for a rotatable bond...
 1406             AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues))
 1407 
 1408     if len(AlertsInfoValues):
 1409         Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues)))
 1410     
 1411 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
 1412     """Write out molecule."""
 1413     
 1414     if OptionsInfo["CountMode"]:
 1415         return
 1416 
 1417     SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
 1418         
 1419     Writer.write(Mol)
 1420 
 1421 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
 1422     """Setup molecule properties containing alerts information for torsion alerts
 1423     fileted by Rule IDs."""
 1424 
 1425     # Delete torsion alerts information for rotatable bonds...
 1426     if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]):
 1427         Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"])
 1428 
 1429     # Delimiter for values...
 1430     IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"]
 1431     InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"]
 1432     
 1433     # Setup alert rule information...
 1434     AlertRuleInfoValues = []
 1435     
 1436     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID])
 1437     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID])
 1438     
 1439     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleID]]
 1440     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
 1441     
 1442     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleID]]
 1443     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
 1444     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleID]]
 1445     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
 1446     
 1447     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID])
 1448     
 1449     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues)))
 1450 
 1451     # Setup alerts count for torsion rule...
 1452     AlertsCount = []
 1453     for AlertType in ["Green", "Orange", "Red"]:
 1454         if AlertType in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID]:
 1455             AlertsCount.append("%s" % TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID][AlertType])
 1456         else:
 1457             AlertsCount.append("0")
 1458     
 1459     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsCountLabel"],  "%s" % (InterSetValuesDelim.join(AlertsCount)))
 1460     
 1461     # Setup torsion rule alerts...
 1462     AlertsInfoValues = []
 1463     for Index in range(0, len(TorsionAlertsInfo["AlertTypes"][TorsionRuleID])):
 1464         RotBondInfoValues = []
 1465         
 1466         # Bond atom indices...
 1467         Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]]
 1468         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
 1469         
 1470         # Alert type...
 1471         RotBondInfoValues.append(TorsionAlertsInfo["AlertTypes"][TorsionRuleID][Index])
 1472         
 1473         # Torsion atom indices retrieved from the filtered SD file and stored as strings...
 1474         Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]]
 1475         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
 1476         
 1477         # Torsion angle...
 1478         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngles"][TorsionRuleID][Index])
 1479         
 1480         # Torsion angle violation...
 1481         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID][Index])
 1482 
 1483         # Track alerts informaiton...
 1484         AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues))
 1485     
 1486     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"],  "%s" % (InterSetValuesDelim.join(AlertsInfoValues)))
 1487     
 1488     # Setup torsion rule alert max angle violation...
 1489     TorsionAngleViolations = [float(Angle) for Angle in TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID]]
 1490     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxAngleViolationLabel"],  "%.2f" % (max(TorsionAngleViolations)))
 1491 
 1492 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim):
 1493     """Setup torsion atom indices value for output files."""
 1494 
 1495     # Check for any list values in the list of torsion atom indices used as placeholders
 1496     # for positions of lone pairs in torsion rules containing  N_lp...
 1497     TorsionAtomsInfo = []
 1498     for Value in TorsionAtomIndicesList:
 1499         if type(Value) is list:
 1500             TorsionAtomsInfo.append("N_lp")
 1501         else:
 1502             TorsionAtomsInfo.append(Value)
 1503             
 1504     Values = ["%s" % Value for Value in TorsionAtomsInfo]
 1505     
 1506     return ValuesDelim.join(Values)
 1507     
 1508 def SetupOutfilesWriters():
 1509     """Setup molecule and summary writers."""
 1510 
 1511     OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None}
 1512 
 1513     # Writers for SD files...
 1514     WriterRemaining, WriterFiltered = SetupMoleculeWriters()
 1515     OutfilesWriters["WriterRemaining"] = WriterRemaining
 1516     OutfilesWriters["WriterFiltered"] = WriterFiltered
 1517     
 1518     # Writer for alert summary CSV file...
 1519     WriterAlertSummary = SetupAlertSummaryWriter()
 1520     OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary
 1521 
 1522     return OutfilesWriters
 1523 
 1524 def SetupMoleculeWriters():
 1525     """Setup molecule writers."""
 1526     
 1527     Writer = None
 1528     WriterFiltered = None
 1529 
 1530     if OptionsInfo["CountMode"]:
 1531         return (Writer, WriterFiltered)
 1532 
 1533     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 1534     if Writer is None:
 1535         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 1536     MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"])
 1537     
 1538     if OptionsInfo["OutfileFilteredMode"]:
 1539         WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
 1540         if WriterFiltered is None:
 1541             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
 1542         MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
 1543     
 1544     return (Writer, WriterFiltered)
 1545 
 1546 def SetupAlertSummaryWriter():
 1547     """Setup a alert summary writer."""
 1548     
 1549     Writer = None
 1550     
 1551     if OptionsInfo["CountMode"]:
 1552         return Writer
 1553         
 1554     if not OptionsInfo["OutfileSummaryMode"]:
 1555         return Writer
 1556     
 1557     Outfile = OptionsInfo["OutfileSummary"]
 1558     Writer = open(Outfile, "w")
 1559     if Writer is None:
 1560         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
 1561     
 1562     MiscUtil.PrintInfo("Generating file %s..." % Outfile)
 1563     
 1564     return Writer
 1565     
 1566 def CloseOutfilesWriters(OutfilesWriters):
 1567     """Close outfile writers."""
 1568 
 1569     for WriterType, Writer in OutfilesWriters.items():
 1570         if Writer is not None:
 1571             Writer.close()
 1572 
 1573 def SetupByRuleOutfilesWriters(RuleIDs):
 1574     """Setup by rule outfiles writers."""
 1575 
 1576     # Initialize...
 1577     OutfilesWriters = {}
 1578     for RuleID in RuleIDs:
 1579         OutfilesWriters[RuleID] = None
 1580     
 1581     if OptionsInfo["CountMode"]:
 1582         return OutfilesWriters
 1583         
 1584     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
 1585         return OutfilesWriters
 1586     
 1587     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 1588     OutfilesRoot = "%s_Filtered_TopRule" % FileName
 1589     OutfilesExt = "sdf"
 1590 
 1591     MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"]
 1592     MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt))
 1593     
 1594     # Delete any existing output files...
 1595     Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt))
 1596     if len(Outfiles):
 1597         MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt))
 1598         for Outfile in Outfiles:
 1599             try:
 1600                 os.remove(Outfile)
 1601             except Exception as ErrMsg:
 1602                 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg)
 1603     
 1604     RuleIndex = 0
 1605     for RuleID in RuleIDs:
 1606         RuleIndex += 1
 1607         Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt)
 1608         Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
 1609         if Writer is None:
 1610             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
 1611             
 1612         OutfilesWriters[RuleID] = Writer
 1613 
 1614     return OutfilesWriters
 1615 
 1616 def CloseByRuleOutfilesWriters(OutfilesWriters):
 1617     """Close by rule outfile writers."""
 1618 
 1619     for RuleID, Writer in OutfilesWriters.items():
 1620         if Writer is not None:
 1621             Writer.close()
 1622 
 1623 def ProcessTorsionLibraryInfo():
 1624     """Process torsion library information."""
 1625 
 1626     RetrieveTorsionLibraryInfo()
 1627     ListTorsionLibraryInfo()
 1628 
 1629 def RetrieveTorsionLibraryInfo(Quiet = False):
 1630     """Retrieve torsion library information."""
 1631 
 1632     TorsionLibraryFilePath = OptionsInfo["TorsionLibraryFile"]
 1633     if TorsionLibraryFilePath is None:
 1634         TorsionLibraryFile = "TorsionLibrary.xml"
 1635         MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath()
 1636         TorsionLibraryFilePath = os.path.join(MayaChemToolsDataDir, TorsionLibraryFile)
 1637         if not Quiet:
 1638             MiscUtil.PrintInfo("\nRetrieving data from default torsion library file %s..." % TorsionLibraryFile)
 1639     else:
 1640         TorsionLibraryFilePath = OptionsInfo["TorsionLibraryFile"]
 1641         if not Quiet:
 1642             MiscUtil.PrintInfo("\nRetrieving data from torsion library file %s..." % TorsionLibraryFilePath)
 1643         
 1644     TorsionLibraryInfo["TorsionLibElementTree"] = TorsionLibraryUtil.RetrieveTorsionLibraryInfo(TorsionLibraryFilePath)
 1645 
 1646 def ListTorsionLibraryInfo():
 1647     """List torsion library information."""
 1648     
 1649     TorsionLibraryUtil.ListTorsionLibraryInfo(TorsionLibraryInfo["TorsionLibElementTree"])
 1650 
 1651 def SetupTorsionLibraryInfo(Quiet = False):
 1652     """Setup torsion library information for matching rotatable bonds."""
 1653 
 1654     if not Quiet:
 1655         MiscUtil.PrintInfo("\nSetting up torsion library information for matching rotatable bonds...")
 1656 
 1657     TorsionLibraryUtil.SetupTorsionLibraryInfoForMatchingRotatableBonds(TorsionLibraryInfo)
 1658 
 1659 def ProcessRotatableBondsSMARTSMode():
 1660     """"Process SMARTS pattern for rotatable bonds."""
 1661 
 1662     RotBondsMode = OptionsInfo["RotBondsSMARTSMode"]
 1663     RotBondsSMARTSPattern = None
 1664     if re.match("NonStrict", RotBondsMode, re.I):
 1665         RotBondsSMARTSPattern = "[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]"
 1666     elif re.match("SemiStrict", RotBondsMode, re.I):
 1667         RotBondsSMARTSPattern = "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]"
 1668     elif re.match("Strict", RotBondsMode, re.I):
 1669         RotBondsSMARTSPattern = "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]"
 1670     elif re.match("Specify", RotBondsMode, re.I):
 1671         RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPatternSpecified"]
 1672         RotBondsSMARTSPattern = RotBondsSMARTSPattern.strip()
 1673         if not len(RotBondsSMARTSPattern):
 1674             MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in  \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode)
 1675     else:
 1676         MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode)
 1677         
 1678     RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPattern)
 1679     if RotBondsPatternMol is None:
 1680         if re.match("Specify", RotBondsMode, re.I):
 1681             MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPattern))
 1682         else:
 1683             MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The default rotatable bonds SMARTS pattern, \"%s\", used for, \"%s\", value of  \"-r, --rotBondsSMARTSMode\" option is not valid." % (RotBondsSMARTSPattern, RotBondsMode))
 1684     
 1685     OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern
 1686 
 1687 def ProcessSDFieldLabelsOption():
 1688     """Process SD data field label option."""
 1689 
 1690     ParamsOptionName = "--outfileSDFieldLabels"
 1691     ParamsOptionValue = Options["--outfileSDFieldLabels"]
 1692     
 1693     ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TorsionAlertsCountLabel": "TorsionAlertsCount (Green Orange Red)", "TorsionAlertsLabel": "TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)", "TorsionRuleAlertsCountLabel": "TorsionRuleAlertsCount (Green Orange Red)", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)", "TorsionRuleMaxAngleViolationLabel": "TorsionRuleMaxAngleViolation"}
 1694     
 1695     if re.match("^auto$", ParamsOptionValue, re.I):
 1696         OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
 1697         return
 1698     
 1699     # Setup a canonical paramater names...
 1700     ValidParamNames = []
 1701     CanonicalParamNamesMap = {}
 1702     for ParamName in sorted(ParamsIDsToLabels):
 1703         ValidParamNames.append(ParamName)
 1704         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 1705     
 1706     ParamsOptionValue = ParamsOptionValue.strip()
 1707     if not ParamsOptionValue:
 1708         PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
 1709     
 1710     ParamsOptionValueWords = ParamsOptionValue.split(",")
 1711     if len(ParamsOptionValueWords) % 2:
 1712         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
 1713     
 1714     # Validate paramater name and value pairs...
 1715     for Index in range(0, len(ParamsOptionValueWords), 2):
 1716         Name = ParamsOptionValueWords[Index].strip()
 1717         Value = ParamsOptionValueWords[Index + 1].strip()
 1718 
 1719         CanonicalName = Name.lower()
 1720         if  not CanonicalName in CanonicalParamNamesMap:
 1721             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
 1722 
 1723         ParamName = CanonicalParamNamesMap[CanonicalName]
 1724         ParamValue = Value
 1725         
 1726         # Set value...
 1727         ParamsIDsToLabels[ParamName] = ParamValue
 1728     
 1729     OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
 1730 
 1731 def ProcessOptionNitrogenLonePairParameters():
 1732     """Process nitrogen lone pair parameters option."""
 1733 
 1734     ParamsOptionName = "--nitrogenLonePairParams"
 1735     ParamsOptionValue = Options["--nitrogenLonePairParams"]
 1736     
 1737     ParamsInfo = {"AllowHydrogenNbrs": True, "PlanarityTolerance": 1.0,}
 1738     
 1739     if re.match("^auto$", ParamsOptionValue, re.I):
 1740         OptionsInfo["NitrogenLonePairParams"] = ParamsInfo
 1741         return
 1742     
 1743     # Setup a canonical paramater names...
 1744     ValidParamNames = []
 1745     CanonicalParamNamesMap = {}
 1746     for ParamName in sorted(ParamsInfo):
 1747         ValidParamNames.append(ParamName)
 1748         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 1749     
 1750     ParamsOptionValue = ParamsOptionValue.strip()
 1751     if not ParamsOptionValue:
 1752         PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
 1753     
 1754     ParamsOptionValueWords = ParamsOptionValue.split(",")
 1755     if len(ParamsOptionValueWords) % 2:
 1756         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
 1757     
 1758     # Validate paramater name and value pairs...
 1759     for Index in range(0, len(ParamsOptionValueWords), 2):
 1760         Name = ParamsOptionValueWords[Index].strip()
 1761         Value = ParamsOptionValueWords[Index + 1].strip()
 1762 
 1763         CanonicalName = Name.lower()
 1764         if  not CanonicalName in CanonicalParamNamesMap:
 1765             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
 1766 
 1767         ParamName = CanonicalParamNamesMap[CanonicalName]
 1768         ParamValue = Value
 1769         
 1770         if re.match("^PlanarityTolerance$", ParamName, re.I):
 1771             Value = float(Value)
 1772             if Value < 0:
 1773                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: >= 0" % (Value, Name, ParamsOptionName))
 1774             ParamValue = Value
 1775         elif re.match("^AllowHydrogenNbrs$", ParamName, re.I):
 1776             if not re.match("^(yes|no)$", Value, re.I):
 1777                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: yes or no" % (Value, Name, ParamsOptionName))
 1778             ParamValue = True if re.match("^yes$", Value, re.I) else False
 1779             
 1780         # Set value...
 1781         ParamsInfo[ParamName] = ParamValue
 1782     
 1783     OptionsInfo["NitrogenLonePairParams"] = ParamsInfo
 1784     
 1785 def ProcessOptions():
 1786     """Process and validate command line arguments and options."""
 1787     
 1788     MiscUtil.PrintInfo("Processing options...")
 1789 
 1790     # Validate options...
 1791     ValidateOptions()
 1792     
 1793     OptionsInfo["Infile"] = Options["--infile"]
 1794     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1795     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
 1796     
 1797     OptionsInfo["Outfile"] = Options["--outfile"]
 1798     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 1799     
 1800     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 1801     OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
 1802     OptionsInfo["OutfileFiltered"] = OutfileFiltered
 1803     OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
 1804     
 1805     OutfileSummary = "%s_AlertsSummary.csv" % (FileName)
 1806     OptionsInfo["OutfileSummary"] = OutfileSummary
 1807     OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", Options["--outfileSummary"], re.I) else False
 1808 
 1809     OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I) else False
 1810     OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False
 1811 
 1812     OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"]
 1813     if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I):
 1814         OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount)
 1815     OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount
 1816     OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False
 1817     
 1818     OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False
 1819 
 1820     if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
 1821         if not re.match("^yes$", Options["--outfileAlerts"], re.I):
 1822             MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"]))
 1823     
 1824     OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"]
 1825     OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False
 1826 
 1827     ProcessSDFieldLabelsOption()
 1828     
 1829     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1830     OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
 1831 
 1832     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1833     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1834 
 1835     ProcessOptionNitrogenLonePairParameters()
 1836     
 1837     OptionsInfo["AlertsMode"] = Options["--alertsMode"]
 1838     OptionsInfo["SpecifiedAlertsModeList"] = []
 1839     if re.match("^Red$", Options["--alertsMode"], re.I):
 1840         OptionsInfo["SpecifiedAlertsModeList"].append("Red")
 1841     elif re.match("^RedAndOrange$", Options["--alertsMode"], re.I):
 1842         OptionsInfo["SpecifiedAlertsModeList"].append("Red")
 1843         OptionsInfo["SpecifiedAlertsModeList"].append("Orange")
 1844     
 1845     OptionsInfo["MinAlertsCount"] = int(Options["--alertsMinCount"])
 1846 
 1847     OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"]
 1848     OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"]
 1849     ProcessRotatableBondsSMARTSMode()
 1850 
 1851     OptionsInfo["TorsionLibraryFile"] = None
 1852     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1853         OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"]
 1854 
 1855     # Setup delimiter for writing out torsion alert information to output files...
 1856     OptionsInfo["IntraSetValuesDelim"] = ","
 1857     OptionsInfo["InterSetValuesDelim"] = " "
 1858 
 1859 def RetrieveOptions():
 1860     """Retrieve command line arguments and options."""
 1861     
 1862     # Get options...
 1863     global Options
 1864     Options = docopt(_docoptUsage_)
 1865 
 1866     # Set current working directory to the specified directory...
 1867     WorkingDir = Options["--workingdir"]
 1868     if WorkingDir:
 1869         os.chdir(WorkingDir)
 1870     
 1871     # Handle examples option...
 1872     if "--examples" in Options and Options["--examples"]:
 1873         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1874         sys.exit(0)
 1875     
 1876 def ProcessListTorsionLibraryOption():
 1877     """Process list torsion library information."""
 1878 
 1879     # Validate and process dataFile option for listing torsion library information...
 1880     OptionsInfo["TorsionLibraryFile"] = None
 1881     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1882         MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"])
 1883         OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"]
 1884     
 1885     RetrieveTorsionLibraryInfo()
 1886     ListTorsionLibraryInfo()
 1887     
 1888 def ValidateOptions():
 1889     """Validate option values."""
 1890 
 1891     MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "Red RedAndOrange")
 1892     MiscUtil.ValidateOptionIntegerValue("--alertsMinCount", Options["--alertsMinCount"], {">=": 1})
 1893     
 1894     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1895     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
 1896     
 1897     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1898     if re.match("^filter$", Options["--mode"], re.I):
 1899         MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1900         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1901 
 1902     MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
 1903     
 1904     MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no")
 1905     if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I):
 1906         MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0})
 1907     
 1908     MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no")
 1909     MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no")
 1910     MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly")
 1911     
 1912     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count")
 1913     if re.match("^filter$", Options["--mode"], re.I):
 1914         if not Options["--outfile"]:
 1915             MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option")
 1916         
 1917     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1918     
 1919     MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify")
 1920     if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I):
 1921         if not Options["--rotBondsSMARTSPattern"]:
 1922             MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option")
 1923     
 1924     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1925         MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"])
 1926 
 1927 # Setup a usage string for docopt...
 1928 _docoptUsage_ = """
 1929 RDKitFilterTorsionLibraryAlerts.py - Filter torsion library alerts
 1930 
 1931 Usage:
 1932     RDKitFilterTorsionLibraryAlerts.py  [--alertsMode <Red, RedAndOrange>] [--alertsMinCount <Number>]
 1933                                         [--infileParams <Name,Value,...>] [--mode <filter or count>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
 1934                                         [--nitrogenLonePairParams <Name,Value,...>] [--outfileAlerts <yes or no>]
 1935                                         [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>]
 1936                                         [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>]
 1937                                         [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>]
 1938                                         [--outfileParams <Name,Value,...>] [--overwrite] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>]
 1939                                         [--rotBondsSMARTSPattern <SMARTS>] [--torsionLibraryFile <FileName or auto>] [-w <dir>] -i <infile> -o <outfile>
 1940     RDKitFilterTorsionLibraryAlerts.py [--torsionLibraryFile <FileName or auto>] -l | --list
 1941     RDKitFilterTorsionLibraryAlerts.py -h | --help | -e | --examples
 1942 
 1943 Description:
 1944     Filter strained molecules from an input file for torsion library [ Ref 146, 152, 159 ]
 1945     alerts by matching rotatable bonds against SMARTS patterns specified for torsion
 1946     rules in a torsion library file and write out appropriate molecules to output
 1947     files. The molecules must have 3D coordinates in input file. The default torsion
 1948     library file, TorsionLibrary.xml, is available under MAYACHEMTOOLS/lib/data
 1949     directory.
 1950     
 1951     The data in torsion library file is organized in a hierarchical manner. It consists
 1952     of one generic class and six specific classes at the highest level. Each class
 1953     contains multiple subclasses corresponding to named functional groups or
 1954     substructure patterns. The subclasses consist of torsion rules sorted from
 1955     specific to generic torsion patterns. The torsion rule, in turn, contains a list
 1956     of peak values for torsion angles and two tolerance values. A pair of tolerance
 1957     values define torsion bins around a torsion peak value. For example:
 1958          
 1959         <library>
 1960             <hierarchyClass name="GG" id1="G" id2="G">
 1961             ...
 1962             </hierarchyClass>
 1963             <hierarchyClass name="CO" id1="C" id2="O">
 1964                 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]">
 1965                     <torsionRule smarts="[O:1]=[C:2]!@[O:3]~[CH0:4]">
 1966                         <angleList>
 1967                             <angle value="0.0" tolerance1="20.00"
 1968                              tolerance2="25.00" score="56.52"/>
 1969                         </angleList>
 1970                     </torsionRule>
 1971                     ...
 1972                 ...
 1973              ...
 1974             </hierarchyClass>
 1975             <hierarchyClass name="NC" id1="N" id2="C">
 1976              ...
 1977             </hierarchyClass>
 1978             <hierarchyClass name="SN" id1="S" id2="N">
 1979             ...
 1980             </hierarchyClass>
 1981             <hierarchyClass name="CS" id1="C" id2="S">
 1982             ...
 1983             </hierarchyClass>
 1984             <hierarchyClass name="CC" id1="C" id2="C">
 1985             ...
 1986             </hierarchyClass>
 1987             <hierarchyClass name="SS" id1="S" id2="S">
 1988              ...
 1989             </hierarchyClass>
 1990         </library>
 1991         
 1992     The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern.
 1993     A custom SMARTS pattern may be optionally specified to detect rotatable bonds.
 1994     Each rotatable bond is matched to a torsion rule in the torsion library and
 1995     assigned one of the following three alert categories: Green, Orange or Red. The 
 1996     rotatable bond is marked Green or Orange for the measured angle of the torsion
 1997     pattern within the first or second tolerance bins around a torsion peak.
 1998     Otherwise, it's marked Red implying that the measured angle is not observed in
 1999     the structure databases employed to generate the torsion library.
 2000 
 2001     The following output files are generated after the filtering:
 2002         
 2003         <OutfileRoot>.sdf
 2004         <OutfileRoot>_Filtered.sdf
 2005         <OutfileRoot>_AlertsSummary.csv
 2006         <OutfileRoot>_Filtered_TopRule*.sdf
 2007         
 2008     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 2009 
 2010     The supported output file formats are: SD (.sdf, .sd)
 2011 
 2012 Options:
 2013     -a, --alertsMode <Red, RedAndOrange>  [default: Red]
 2014         Torsion library alert types to use for filtering molecules containing
 2015         rotatable bonds marked with Green, Orange, or Red alerts. Possible
 2016         values: Red or RedAndOrange.
 2017     --alertsMinCount <Number>  [default: 1]
 2018         Minimum number of rotatable bond alerts in a molecule for filtering the
 2019         molecule.
 2020     -e, --examples
 2021         Print examples.
 2022     -h, --help
 2023         Print this help message.
 2024     -i, --infile <infile>
 2025         Input file name.
 2026     --infileParams <Name,Value,...>  [default: auto]
 2027         A comma delimited list of parameter name and value pairs for reading
 2028         molecules from files. The supported parameter names for different file
 2029         formats, along with their default values, are shown below:
 2030             
 2031             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 2032             
 2033     -l, --list
 2034         List torsion library information without performing any filtering.
 2035     -m, --mode <filter or count>  [default: filter]
 2036         Specify whether to filter molecules for torsion library [ Ref 146, 152, 159 ] alerts
 2037         by matching rotatable bonds against SMARTS patterns specified for torsion
 2038         rules and write out the rest of the molecules to an outfile or simply count
 2039         the number of matched molecules marked for filtering.
 2040     --mp <yes or no>  [default: no]
 2041         Use multiprocessing.
 2042          
 2043         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 2044         function employing lazy RDKit data iterable. This allows processing of
 2045         arbitrary large data sets without any additional requirements memory.
 2046         
 2047         All input data may be optionally loaded into memory by mp.Pool.map()
 2048         before starting worker processes in a process pool by setting the value
 2049         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 2050         
 2051         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 2052         data mode may adversely impact the performance. The '--mpParams' section
 2053         provides additional information to tune the value of 'chunkSize'.
 2054     --mpParams <Name,Value,...>  [default: auto]
 2055         A comma delimited list of parameter name and value pairs to configure
 2056         multiprocessing.
 2057         
 2058         The supported parameter names along with their default and possible
 2059         values are shown below:
 2060         
 2061             chunkSize, auto
 2062             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 2063             numProcesses, auto   [ Default: mp.cpu_count() ]
 2064         
 2065         These parameters are used by the following functions to configure and
 2066         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 2067         mp.Pool.imap().
 2068         
 2069         The chunkSize determines chunks of input data passed to each worker
 2070         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 2071         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 2072         
 2073         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 2074         automatically converts RDKit data iterable into a list, loads all data into
 2075         memory, and calculates the default chunkSize using the following method
 2076         as shown in its code:
 2077         
 2078             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 2079             if extra: chunkSize += 1
 2080         
 2081         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 2082         and 100 data items.
 2083         
 2084         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 2085         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 2086         data into memory. Consequently, the size of input data is not known a priori.
 2087         It's not possible to estimate an optimal value for the chunkSize. The default 
 2088         chunkSize is set to 1.
 2089         
 2090         The default value for the chunkSize during 'Lazy' data mode may adversely
 2091         impact the performance due to the overhead associated with exchanging
 2092         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 2093         a larger value during 'Lazy' input data mode, based on the size of your input
 2094         data and number of processes in the process pool.
 2095         
 2096         The mp.Pool.map() function waits for all worker processes to process all
 2097         the data and return the results. The mp.Pool.imap() function, however,
 2098         returns the the results obtained from worker processes as soon as the
 2099         results become available for specified chunks of data.
 2100         
 2101         The order of data in the results returned by both mp.Pool.map() and 
 2102         mp.Pool.imap() functions always corresponds to the input data.
 2103     -n, --nitrogenLonePairParams <Name,Value,...>  [default: auto]
 2104         A comma delimited list of parameter name and value pairs to match
 2105         torsion SMARTS patterns containing non-standard construct 'N_lp'
 2106         corresponding to nitrogen lone pair.
 2107         
 2108         The supported parameter names along with their default and possible
 2109         values are shown below:
 2110         
 2111             allowHydrogenNbrs, yes   [ Possible values: yes or no ]
 2112             planarityTolerance, 1  [Possible values: >=0] 
 2113             
 2114         These parameters are used during the matching of torsion rules containing
 2115         'N_lp' in their SMARTS patterns. The 'allowHydrogensNbrs' allows the use
 2116         hydrogen neighbors attached to nitrogen during the determination of its
 2117         planarity. The 'planarityTolerance' in degrees represents the tolerance
 2118         allowed for nitrogen to be considered coplanar with its three neighbors.
 2119         
 2120         The torsion rules containing 'N_lp' in their SMARTS patterns are categorized
 2121         into the following two types of rules:
 2122          
 2123             TypeOne:  
 2124             
 2125             [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4]
 2126             [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4]
 2127             ... ... ...
 2128          
 2129             TypeTwo:  
 2130             
 2131             [!#1:1][CX4:2]!@[NX3;"N_lp":3]
 2132             [C:1][$(S(=O)=O):2]!@["N_lp":3]
 2133             ... ... ...
 2134             
 2135         The torsions are matched to torsion rules containing 'N_lp' using specified
 2136         SMARTS patterns without the 'N_lp' along with additional constraints using
 2137         the following methodology:
 2138             
 2139             TypeOne:  
 2140             
 2141             . SMARTS pattern must contain four mapped atoms and the third
 2142                 mapped atom must be a nitrogen matched with 'NX3:3'
 2143             . Nitrogen atom must have 3 neighbors. The 'allowHydrogens'
 2144                 parameter controls inclusion of hydrogens as its neighbors.
 2145             . Nitrogen atom and its 3 neighbors must be coplanar.
 2146                 'planarityTolerance' parameter provides tolerance in degrees
 2147                 for nitrogen to be considered coplanar with its 3 neighbors.
 2148             
 2149             TypeTwo:  
 2150             
 2151             . SMARTS pattern must contain three mapped atoms and the third
 2152                 mapped atom must be a nitrogen matched with 'NX3:3'. The 
 2153                 third mapped atom may contain only 'N_lp:3' The missing 'NX3'
 2154                 is automatically detected.
 2155             . Nitrogen atom must have 3 neighbors. 'allowHydrogens'
 2156                 parameter controls inclusion of hydrogens as neighbors.
 2157             . Nitrogen atom and its 3 neighbors must not be coplanar.
 2158                 'planarityTolerance' parameter provides tolerance in degrees
 2159                 for nitrogen to be considered coplanar with its 3 neighbors.
 2160             . Nitrogen lone pair position equivalent to VSEPR theory is
 2161                 determined based on the position of nitrogen and its neighbors.
 2162                 A vector normal to 3 nitrogen neighbors is calculated and added
 2163                 to the coordinates of nitrogen atom to determine the approximate
 2164                 position of the lone pair. It is used as the fourth position to
 2165                 calculate the torsion angle.
 2166             
 2167     -o, --outfile <outfile>
 2168         Output file name.
 2169     --outfileAlerts <yes or no>  [default: yes]
 2170         Write out alerts information to SD output files.
 2171     --outfileAlertsMode <All or AlertsOnly>  [default: AlertsOnly]
 2172         Write alerts information to SD output files for all alerts or only for alerts
 2173         specified by '--AlertsMode' option. Possible values: All or AlertsOnly
 2174         This option is only valid for 'Yes' value of '--outfileAlerts' option.
 2175         
 2176         The following alerts information is added to SD output files using
 2177         'TorsionAlerts' data field:
 2178             
 2179             RotBondIndices TorsionAlert TorsionIndices TorsionAngle
 2180             TorsionAngleViolation HierarchyClass HierarchySubClass
 2181             TorsionRule TorsionPeaks Tolerances1 Tolerances2
 2182             
 2183         The 'RotBondsCount' and 'TorsionAlertsCount' data fields are always added
 2184         to SD output files containing both remaining and filtered molecules.
 2185         
 2186         Format:
 2187             
 2188             > <RotBondsCount>
 2189             Number
 2190             
 2191             > <TorsionAlertsCount (Green Orange Red)>
 2192             Number Number Number
 2193             
 2194             > <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices
 2195                 TorsionAngle TorsionAngleViolation HierarchyClass
 2196                 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2
 2197                 TorsionRule)>
 2198             AtomIndex2,AtomIndex3  AlertType AtomIndex1,AtomIndex2,AtomIndex3,
 2199             AtomIndex4 Angle AngleViolation ClassName SubClassName
 2200             CommaDelimPeakValues CommaDelimTol1Values CommDelimTol2Values
 2201             SMARTS ... ... ...
 2202              ... ... ...
 2203             
 2204         A set of 11 values is written out as value of 'TorsionAlerts' data field for
 2205         each torsion in a molecule. The space character is used as a delimiter
 2206         to separate values with in a set and across set. The comma character
 2207         is used to delimit multiple values for each value in a set.
 2208         
 2209         The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited
 2210         values representing atom indices for a rotatable bond and matched torsion.
 2211         The 'TorsionPeaks',  'Tolerances1', and 'Tolerances2' contain same number
 2212         of comma delimited values corresponding to  torsion angle peaks and
 2213         tolerance intervals specified in torsion library. For example:
 2214             
 2215             ... ... ...
 2216             >  <RotBondsCount>  (1) 
 2217             7
 2218             
 2219             >  <TorsionAlertsCount (Green Orange Red)>  (1) 
 2220             3 2 2
 2221             
 2222             >  <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices
 2223                 TorsionAngle TorsionAngleViolation HierarchyClass
 2224                 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2
 2225                 TorsionRule)>
 2226             1,2 Red 32,2,1,0 0.13 149.87 NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]
 2227             ([cH,nX2H0])!@[NX3H1:3][CX4:4] 8,9 Red 10,9,8,28 -0.85 GG
 2228             None -90.0,90.0 30.0,30.0 60.0,60.0 [cH1:1][a:2]([cH1])!@[a:3]
 2229             ([cH0])[cH0:4]
 2230             ... ... ...
 2231             
 2232     --outfileFiltered <yes or no>  [default: yes]
 2233         Write out a file containing filtered molecules. Its name is automatically
 2234         generated from the specified output file. Default: <OutfileRoot>_
 2235         Filtered.<OutfileExt>.
 2236     --outfilesFilteredByRules <yes or no>  [default: yes]
 2237         Write out SD files containing filtered molecules for individual torsion
 2238         rules triggering alerts in molecules. The name of SD files are automatically
 2239         generated from the specified output file. Default file names: <OutfileRoot>_
 2240         Filtered_TopRule*.sdf
 2241                 
 2242         The following alerts information is added to SD output files:
 2243             
 2244             > <RotBondsCount>
 2245             Number
 2246             
 2247             >  <TorsionAlertsCount (Green Orange Red)> 
 2248             Number Number Number
 2249             
 2250             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks
 2251                 Tolerances1 Tolerances2 TorsionRule)> 
 2252             ClassName SubClassName CommaDelimPeakValues CommaDelimTol1Values
 2253             CommDelimTol2Values SMARTS ... ... ...
 2254              ... ... ...
 2255             
 2256             > <TorsionRuleAlertsCount (Green Orange Red)>
 2257             Number Number Number
 2258             
 2259             >  <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices
 2260                 TorsionAngle TorsionAngleViolation)>
 2261             AtomIndex2,AtomIndex3  AlertType AtomIndex1,AtomIndex2,AtomIndex3,
 2262             AtomIndex4 Angle AngleViolation ... ... ...
 2263             
 2264             >  <TorsionRuleMaxAngleViolation>
 2265             Number
 2266              ... ... ...
 2267             
 2268         For example:
 2269             
 2270             ... ... ...
 2271             >  <RotBondsCount>  (1) 
 2272             7
 2273              
 2274             >  <TorsionAlertsCount (Green Orange Red)>  (1) 
 2275             3 2 2
 2276             
 2277             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks
 2278                 Tolerances1 Tolerances2 TorsionRule)>  (1) 
 2279             NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]([cH,nX2H0])!@[NX3H1:3][CX4:4]
 2280             
 2281             >  <TorsionRuleAlertsCount (Green Orange Red)>  (1) 
 2282             0 0 1
 2283             
 2284             >  <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices
 2285                 TorsionAngle TorsionAngleViolation)>  (1) 
 2286             1,2 Red 32,2,1,0 0.13 149.87
 2287             
 2288             >  <TorsionRuleMaxAngleViolation>  (1) 
 2289             149.87
 2290             ... ... ...
 2291             
 2292     --outfilesFilteredByRulesMaxCount <All or number>  [default: 10]
 2293         Write out SD files containing filtered molecules for specified number of
 2294         top N torsion rules triggering alerts for the largest number of molecules
 2295         or for all torsion rules triggering alerts across all molecules.
 2296     --outfileSummary <yes or no>  [default: yes] 
 2297         Write out a CVS text file containing summary of torsions rules responsible
 2298         for triggering torsion alerts. Its name is automatically generated from the
 2299         specified output file. Default: <OutfileRoot>_AlertsSummary.csv.
 2300         
 2301         The following alerts information is written to summary text file:
 2302             
 2303             TorsionRule, TorsionPeaks, Tolerances1, Tolerances2,
 2304             HierarchyClass, HierarchySubClass, TorsionAlertType,
 2305             TorsionAlertCount, TorsionAlertMolCount
 2306              
 2307         The double quotes characters are removed from SMART patterns before
 2308         before writing them to a CSV file. In addition, the torsion rules are sorted by
 2309         TorsionAlertMolCount. For example:
 2310             
 2311             "TorsionRule","TorsionPeaks","Tolerances1","Tolerances2",
 2312                 "HierarchyClass","HierarchySubClass","TorsionAlertTypes",
 2313                 "TorsionAlertCount","TorsionAlertMolCount"
 2314             "[!#1:1][CX4H2:2]!@[CX4H2:3][!#1:4]","-60.0,60.0,180.0",
 2315                 "20.0,20.0,20.0","30.0,30.0,30.0","CC","None/[CX4:2][CX4:3]",
 2316                 "Red","16","11"
 2317             ... ... ...
 2318             
 2319     --outfileSDFieldLabels <Type,Label,...>  [default: auto]
 2320         A comma delimited list of SD data field type and label value pairs for writing
 2321         torsion alerts information along with molecules to SD files.
 2322         
 2323         The supported SD data field label type along with their default values are
 2324         shown below:
 2325             
 2326             For all SD files:
 2327             
 2328             RotBondsCountLabel, RotBondsCount
 2329             TorsionAlertsCountLabel, TorsionAlertsCount (Green Orange Red)
 2330             TorsionAlertsLabel, TorsionAlerts (RotBondIndices TorsionAlert
 2331                 TorsionIndices TorsionAngle TorsionAngleViolation
 2332                 HierarchyClass HierarchySubClass TorsionPeaks Tolerances1
 2333                 Tolerances2 TorsionRule)
 2334             
 2335             For individual SD files filtered by torsion rules:
 2336             
 2337             TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass
 2338                 TorsionPeaks Tolerances1 Tolerances2 TorsionRule)
 2339             TorsionRuleAlertsCountLabel, TorsionRuleAlertsCount (Green Orange
 2340                 Red)
 2341             TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices
 2342                 TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)
 2343             TorsionRuleMaxAngleViolationLabel, TorsionRuleMaxAngleViolation
 2344             
 2345     --outfileParams <Name,Value,...>  [default: auto]
 2346         A comma delimited list of parameter name and value pairs for writing
 2347         molecules to files. The supported parameter names for different file
 2348         formats, along with their default values, are shown below:
 2349             
 2350             SD: kekulize,yes,forceV3000,no
 2351             
 2352     --overwrite
 2353         Overwrite existing files.
 2354     -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...>  [default: SemiStrict]
 2355         SMARTS pattern to use for identifying rotatable bonds in a molecule
 2356         for matching against torsion rules in the torsion library. Possible values:
 2357         NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches
 2358         are filtered to ensure that each atom in the rotatable bond is attached to
 2359         at least two heavy atoms.
 2360         
 2361         The following SMARTS patterns are used to identify rotatable bonds for
 2362         different modes:
 2363             
 2364             NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
 2365             
 2366             SemiStrict:
 2367             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 2368             &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 2369             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 2370             
 2371             Strict:
 2372             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 2373             &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
 2374             &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
 2375             &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 2376             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 2377             
 2378         The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 
 2379         'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
 2380          specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 
 2381          derived from 'Strict' SMARTS patterns for its usage in this script.
 2382         
 2383         You may use any arbitrary SMARTS pattern to identify rotatable bonds by
 2384         choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its
 2385         value via '--rotBondsSMARTSPattern' option.
 2386     --rotBondsSMARTSPattern <SMARTS>
 2387         SMARTS pattern for identifying rotatable bonds. This option is only valid
 2388         for 'Specify' value of '-r, --rotBondsSMARTSMode' option.
 2389     -t, --torsionLibraryFile <FileName or auto>  [default: auto]
 2390         Specify a XML file name containing data for torsion library hierarchy or use
 2391         default file, TorsionLibrary.xml, available in MAYACHEMTOOLS/lib/data
 2392         directory.
 2393         
 2394         The format of data in local XML file must match format of the data in Torsion
 2395         Library [ Ref 146, 152, 159 ] file available in MAYACHEMTOOLS data directory.
 2396     -w, --workingdir <dir>
 2397         Location of working directory which defaults to the current directory.
 2398 
 2399 Examples:
 2400     To filter molecules containing any rotatable bonds marked with Red alerts
 2401     based on torsion rules in the torsion library and write out SD files containing
 2402     remaining and filtered molecules, and individual SD files for torsion rules
 2403     triggering alerts along with appropriate torsion information for red alerts,
 2404     type:
 2405 
 2406         % RDKitFilterTorsionLibraryAlerts.py -i Sample3D.sdf -o Sample3DOut.sdf
 2407 
 2408     To run the first example for only counting number of alerts without writing
 2409     out any SD files, type:
 2410 
 2411         % RDKitFilterTorsionLibraryAlerts.py -m count -i Sample3D.sdf -o
 2412           Sample3DOut.sdf
 2413     
 2414     To run the first example for filtertering molecules marked with Orange or
 2415     Red alerts and write out SD files, tye:
 2416 
 2417         % RDKitFilterTorsionLibraryAlerts.py -m Filter --alertsMode RedAndOrange
 2418           -i Sample3D.sdf -o Sample3DOut.sdf
 2419     
 2420     To run the first example for filtering molecules and writing out torsion
 2421     information for all alert types to SD files, type:
 2422 
 2423         % RDKitFilterTorsionLibraryAlerts.py --outfileAlertsMode All
 2424           -i Sample3D.sdf -o Sample3DOut.sdf
 2425 
 2426     To run the first example for filtering molecules in multiprocessing mode on
 2427     all available CPUs without loading all data into memory and write out SD files,
 2428     type:
 2429 
 2430         % RDKitFilterTorsionLibraryAlerts.py --mp yes -i Sample3D.sdf
 2431          -o Sample3DOut.sdf
 2432 
 2433     To run the first example for filtering molecules in multiprocessing mode on
 2434     all available CPUs by loading all data into memory and write out a SD files,
 2435     type:
 2436 
 2437         % RDKitFilterTorsionLibraryAlerts.py  --mp yes --mpParams
 2438           "inputDataMode, InMemory" -i Sample3D.sdf  -o Sample3DOut.sdf
 2439 
 2440     To run the first example for filtering molecules in multiprocessing mode on
 2441     specific number of CPUs and chunksize without loading all data into memory
 2442     and write out SD files, type:
 2443 
 2444         % RDKitFilterTorsionLibraryAlerts.py --mp yes --mpParams
 2445           "inputDataMode,lazy,numProcesses,4,chunkSize,8"  -i Sample3D.sdf
 2446           -o Sample3DOut.sdf
 2447 
 2448     To list information about default torsion library file without performing any
 2449     filtering, type:
 2450 
 2451         % RDKitFilterTorsionLibraryAlerts.py -l
 2452 
 2453     To list information about a local torsion library XML file without performing
 2454     any, filtering, type:
 2455 
 2456         % RDKitFilterTorsionLibraryAlerts.py --torsionLibraryFile
 2457           TorsionLibrary.xml -l
 2458 
 2459 Author:
 2460     Manish Sud (msud@san.rr.com)
 2461 
 2462 Collaborator:
 2463     Pat Walters
 2464 
 2465 Acknowledgments:
 2466     Wolfgang Guba, Patrick Penner, Levi Pierce
 2467 
 2468 See also:
 2469     RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionStrainEnergyAlerts.py,
 2470     RDKitConvertFileFormat.py, RDKitSearchSMARTS.py
 2471 
 2472 Copyright:
 2473     Copyright (C) 2023 Manish Sud. All rights reserved.
 2474 
 2475     This script uses the Torsion Library jointly developed by the University
 2476     of Hamburg, Center for Bioinformatics, Hamburg, Germany and
 2477     F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
 2478 
 2479     The functionality available in this script is implemented using RDKit, an
 2480     open source toolkit for cheminformatics developed by Greg Landrum.
 2481 
 2482     This file is part of MayaChemTools.
 2483 
 2484     MayaChemTools is free software; you can redistribute it and/or modify it under
 2485     the terms of the GNU Lesser General Public License as published by the Free
 2486     Software Foundation; either version 3 of the License, or (at your option) any
 2487     later version.
 2488 
 2489 """
 2490 
 2491 if __name__ == "__main__":
 2492     main()