MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitFilterTorsionStrainEnergyAlerts.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Collaborator: Pat Walters
    7 #
    8 # Copyright (C) 2023 Manish Sud. All rights reserved.
    9 #
   10 # This script uses the torsion strain energy library developed by Gu, S.;
   11 # Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ].
   12 #
   13 # The torsion strain enegy library is based on the Torsion Library jointly
   14 # developed by the University of Hamburg, Center for Bioinformatics,
   15 # Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
   16 #
   17 # The functionality available in this script is implemented using RDKit, an
   18 # open source toolkit for cheminformatics developed by Greg Landrum.
   19 #
   20 # This file is part of MayaChemTools.
   21 #
   22 # MayaChemTools is free software; you can redistribute it and/or modify it under
   23 # the terms of the GNU Lesser General Public License as published by the Free
   24 # Software Foundation; either version 3 of the License, or (at your option) any
   25 # later version.
   26 #
   27 # MayaChemTools is distributed in the hope that it will be useful, but without
   28 # any warranty; without even the implied warranty of merchantability of fitness
   29 # for a particular purpose.  See the GNU Lesser General Public License for more
   30 # details.
   31 #
   32 # You should have received a copy of the GNU Lesser General Public License
   33 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   34 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   35 # Boston, MA, 02111-1307, USA.
   36 #
   37 
   38 from __future__ import print_function
   39 
   40 # Add local python path to the global path and import standard library modules...
   41 import os
   42 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   43 import time
   44 import re
   45 import glob
   46 import multiprocessing as mp
   47 import math
   48 
   49 # RDKit imports...
   50 try:
   51     from rdkit import rdBase
   52     from rdkit import Chem
   53     from rdkit.Chem import rdMolTransforms
   54 except ImportError as ErrMsg:
   55     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   56     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   57     sys.exit(1)
   58 
   59 # MayaChemTools imports...
   60 try:
   61     from docopt import docopt
   62     import MiscUtil
   63     import RDKitUtil
   64     import TorsionLibraryUtil
   65 except ImportError as ErrMsg:
   66     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   67     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   68     sys.exit(1)
   69 
   70 ScriptName = os.path.basename(sys.argv[0])
   71 Options = {}
   72 OptionsInfo = {}
   73 
   74 TorsionLibraryInfo = {}
   75 
   76 def main():
   77     """Start execution of the script."""
   78     
   79     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   80     
   81     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   82     
   83     # Retrieve command line arguments and options...
   84     RetrieveOptions()
   85     
   86     if  Options["--list"]:
   87         # Handle listing of torsion library information...
   88         ProcessListTorsionLibraryOption()
   89     else:
   90         # Process and validate command line arguments and options...
   91         ProcessOptions()
   92         
   93         # Perform actions required by the script...
   94         PerformFiltering()
   95     
   96     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   97     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   98 
   99 def PerformFiltering():
  100     """Filter molecules using SMARTS torsion rules in the torsion strain energy
  101     library file."""
  102 
  103     # Process torsion library info...
  104     ProcessTorsionLibraryInfo()
  105     
  106     # Set up a pattern molecule for rotatable bonds...
  107     RotBondsPatternMol = Chem.MolFromSmarts(OptionsInfo["RotBondsSMARTSPattern"])
  108     
  109     # Setup a molecule reader...
  110     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  111     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  112     
  113     MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols, RotBondsPatternMol)
  114 
  115     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  116     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  117     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  118     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount))
  119 
  120     MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
  121     MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
  122 
  123 def ProcessMolecules(Mols, RotBondsPatternMol):
  124     """Process and filter molecules."""
  125     
  126     if OptionsInfo["MPMode"]:
  127         return ProcessMoleculesUsingMultipleProcesses(Mols, RotBondsPatternMol)
  128     else:
  129         return ProcessMoleculesUsingSingleProcess(Mols, RotBondsPatternMol)
  130 
  131 def ProcessMoleculesUsingSingleProcess(Mols, RotBondsPatternMol):
  132     """Process and filter molecules using a single process."""
  133     
  134     SetupTorsionLibraryInfo()
  135     
  136     MiscUtil.PrintInfo("\nFiltering molecules...")
  137     
  138     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  139 
  140     # Set up writers...
  141     OutfilesWriters = SetupOutfilesWriters()
  142     
  143     WriterRemaining = OutfilesWriters["WriterRemaining"]
  144     WriterFiltered = OutfilesWriters["WriterFiltered"]
  145     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  146     
  147     # Initialize alerts summary info...
  148     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  149 
  150     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  151     for Mol in Mols:
  152         MolCount += 1
  153         
  154         if Mol is None:
  155             continue
  156         
  157         if RDKitUtil.IsMolEmpty(Mol):
  158             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount))
  159             continue
  160 
  161         # Check for 3D flag...
  162         if not Mol.GetConformer().Is3D():
  163             MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount))
  164             continue
  165         
  166         ValidMolCount += 1
  167         
  168         # Identify torsion library alerts for rotatable bonds..
  169         RotBondsAlertsStatus, RotBondsAlertsInfo = IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, RotBondsPatternMol)
  170 
  171         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  172         
  173         # Write out filtered and remaining molecules...
  174         WriteStatus = True
  175         if RotBondsAlertsStatus:
  176             if OutfileFilteredMode:
  177                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  178                 if WriteStatus:
  179                     FilteredMolWriteCount += 1
  180         else:
  181             RemainingMolCount += 1
  182             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  183         
  184         if not WriteStatus:
  185             WriteFailedCount += 1
  186 
  187     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  188     CloseOutfilesWriters(OutfilesWriters)
  189 
  190     if FilteredMolWriteCount:
  191         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  192     
  193     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  194 
  195 def ProcessMoleculesUsingMultipleProcesses(Mols, RotBondsPatternMol):
  196     """Process and filter molecules using multiprocessing."""
  197 
  198     MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
  199     
  200     MPParams = OptionsInfo["MPParams"]
  201     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  202 
  203     # Set up writers...
  204     OutfilesWriters = SetupOutfilesWriters()
  205     
  206     WriterRemaining = OutfilesWriters["WriterRemaining"]
  207     WriterFiltered = OutfilesWriters["WriterFiltered"]
  208     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  209     
  210     # Initialize alerts summary info...
  211     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  212 
  213     # Setup data for initializing a worker process...
  214     MiscUtil.PrintInfo("\nEncoding options info and rotatable bond pattern molecule...")
  215     OptionsInfo["EncodedRotBondsPatternMol"] = RDKitUtil.MolToBase64EncodedMolString(RotBondsPatternMol)
  216     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  217 
  218     # Setup a encoded mols data iterable for a worker process...
  219     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  220     
  221     # Setup process pool along with data initialization for each process...
  222     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  223     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  224     
  225     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  226     
  227     # Start processing...
  228     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  229         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  230     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  231         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  232     else:
  233         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  234     
  235     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  236     for Result in Results:
  237         MolCount += 1
  238         MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result
  239         
  240         if EncodedMol is None:
  241             continue
  242         ValidMolCount += 1
  243         
  244         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  245         
  246         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  247         
  248         # Write out filtered and remaining molecules...
  249         WriteStatus = True
  250         if RotBondsAlertsStatus:
  251             if OutfileFilteredMode:
  252                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  253                 if WriteStatus:
  254                     FilteredMolWriteCount += 1
  255         else:
  256             RemainingMolCount += 1
  257             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  258 
  259         if not WriteStatus:
  260             WriteFailedCount += 1
  261     
  262     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  263     CloseOutfilesWriters(OutfilesWriters)
  264 
  265     if FilteredMolWriteCount:
  266         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  267     
  268     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  269 
  270 def InitializeWorkerProcess(*EncodedArgs):
  271     """Initialize data for a worker process."""
  272     
  273     global Options, OptionsInfo
  274     
  275     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  276     
  277     # Decode Options and OptionInfo...
  278     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  279     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  280 
  281     # Decode RotBondsPatternMol...
  282     OptionsInfo["RotBondsPatternMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRotBondsPatternMol"])
  283     
  284     # Setup torsion library...
  285     RetrieveTorsionLibraryInfo(Quiet = True)
  286     SetupTorsionLibraryInfo(Quiet = True)
  287 
  288 def WorkerProcess(EncodedMolInfo):
  289     """Process data for a worker process."""
  290 
  291     MolIndex, EncodedMol = EncodedMolInfo
  292     
  293     if EncodedMol is None:
  294         return [MolIndex, None, False, None]
  295     
  296     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  297     if RDKitUtil.IsMolEmpty(Mol):
  298         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  299         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  300         return [MolIndex, None, False, None]
  301         
  302     # Check for 3D flag...
  303     if not Mol.GetConformer().Is3D():
  304         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  305         MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName)
  306         return [MolIndex, None, False, None]
  307     
  308     # Identify torsion library alerts for rotatable bonds..
  309     RotBondsAlertsStatus, RotBondsAlertsInfo = IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, OptionsInfo["RotBondsPatternMol"])
  310 
  311     return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo]
  312     
  313 def  IdentifyTorsionLibraryAlertsForRotatableBonds(Mol, RotBondsPatternMol):
  314     """Identify rotatable bonds for torsion library alerts."""
  315     
  316     # Identify rotatable bonds...
  317     RotBondsStatus, RotBondsInfo = TorsionLibraryUtil.IdentifyRotatableBondsForTorsionLibraryMatch(TorsionLibraryInfo, Mol, RotBondsPatternMol)
  318     
  319     if not RotBondsStatus:
  320         return (False, None)
  321 
  322     # Identify alerts for rotatable bonds...
  323     RotBondsAlertsStatus, RotBondsAlertsInfo = MatchRotatableBondsToTorsionLibrary(Mol, RotBondsInfo)
  324     
  325     return (RotBondsAlertsStatus, RotBondsAlertsInfo)
  326 
  327 def MatchRotatableBondsToTorsionLibrary(Mol, RotBondsInfo):
  328     """Match rotatable bond to torsion library."""
  329 
  330     # Initialize...
  331     RotBondsAlertsInfo = InitializeRotatableBondsAlertsInfo()
  332     
  333     # Match rotatable bonds to torsion library...
  334     for ID in RotBondsInfo["IDs"]:
  335         AtomIndices = RotBondsInfo["AtomIndices"][ID]
  336         HierarchyClass = RotBondsInfo["HierarchyClass"][ID]
  337 
  338         MatchStatus, MatchInfo = MatchRotatableBondToTorsionLibrary(Mol, AtomIndices, HierarchyClass)
  339         TrackRotatableBondsAlertsInfo(RotBondsAlertsInfo, ID, AtomIndices, MatchStatus, MatchInfo)
  340         
  341     RotBondsAlertsStatus = SetupRotatableBondsAlertStatusTotalStrainEnergiesInfo(RotBondsAlertsInfo)
  342 
  343     return (RotBondsAlertsStatus, RotBondsAlertsInfo)
  344 
  345 def InitializeRotatableBondsAlertsInfo():
  346     """Initialize alerts information for rotatable bonds."""
  347     
  348     RotBondsAlertsInfo = {}
  349     RotBondsAlertsInfo["IDs"] = []
  350 
  351     for DataLabel in ["RotBondsAlertsStatus", "TotalEnergy", "TotalEnergyLowerBound", "TotalEnergyUpperBound", "AnglesNotObservedCount", "MaxSingleEnergy", "MaxSingleEnergyAlertsCount"]:
  352         RotBondsAlertsInfo[DataLabel] = None
  353     
  354     for DataLabel in ["MatchStatus", "MaxSingleEnergyAlertStatus", "AtomIndices", "TorsionAtomIndices", "TorsionAngle", "HierarchyClassName", "HierarchySubClassName", "TorsionRuleNodeID", "TorsionRuleSMARTS", "EnergyMethod", "AngleNotObserved", "Energy", "EnergyLowerBound", "EnergyUpperBound"]:
  355         RotBondsAlertsInfo[DataLabel] = {}
  356         
  357     return RotBondsAlertsInfo
  358 
  359 def TrackRotatableBondsAlertsInfo(RotBondsAlertsInfo, ID, AtomIndices, MatchStatus, MatchInfo):
  360     """Track alerts information for rotatable bonds."""
  361     
  362     if MatchInfo is None or len(MatchInfo) == 0:
  363         TorsionAtomIndices, TorsionAngle, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRuleSMARTS, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = [None] * 11
  364     else:
  365         TorsionAtomIndices, TorsionAngle, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRuleSMARTS, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = MatchInfo
  366     
  367     # Track torsion match information...
  368     RotBondsAlertsInfo["IDs"].append(ID)
  369     RotBondsAlertsInfo["MatchStatus"][ID] = MatchStatus
  370     RotBondsAlertsInfo["AtomIndices"][ID] = AtomIndices
  371     RotBondsAlertsInfo["TorsionAtomIndices"][ID] = TorsionAtomIndices
  372     RotBondsAlertsInfo["TorsionAngle"][ID] = TorsionAngle
  373     RotBondsAlertsInfo["HierarchyClassName"][ID] = HierarchyClassName
  374     RotBondsAlertsInfo["HierarchySubClassName"][ID] = HierarchySubClassName
  375     RotBondsAlertsInfo["TorsionRuleNodeID"][ID] = TorsionRuleNodeID
  376     RotBondsAlertsInfo["TorsionRuleSMARTS"][ID] = TorsionRuleSMARTS
  377     RotBondsAlertsInfo["EnergyMethod"][ID] = EnergyMethod
  378     RotBondsAlertsInfo["AngleNotObserved"][ID] = AngleNotObserved
  379     RotBondsAlertsInfo["Energy"][ID] = Energy
  380     RotBondsAlertsInfo["EnergyLowerBound"][ID] = EnergyLowerBound
  381     RotBondsAlertsInfo["EnergyUpperBound"][ID] = EnergyUpperBound
  382 
  383 def SetupRotatableBondsAlertStatusTotalStrainEnergiesInfo(RotBondsAlertsInfo):
  384     """Setup rotatable bonds alert status along with total strain energies."""
  385 
  386     # Initialize...
  387     RotBondsAlertsStatus = False
  388     TotalEnergy, TotalEnergyLowerBound, TotalEnergyUpperBound, AnglesNotObservedCount  = [None, None, None, None]
  389     MaxSingleEnergy, MaxSingleEnergyAlertsCount  = [None, None]
  390 
  391     # Initialize max single energy alert status...
  392     for ID in RotBondsAlertsInfo["IDs"]:
  393         RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] = None
  394     
  395     # Check for torsion angles not obervered in the strain library...
  396     AnglesNotObservedCount = 0
  397     for ID in RotBondsAlertsInfo["IDs"]:
  398         AngleNotObserved = RotBondsAlertsInfo["AngleNotObserved"][ID]
  399         if AngleNotObserved is not None and AngleNotObserved:
  400             AnglesNotObservedCount += 1
  401 
  402     # Setup alert status for rotable bonds...
  403     if AnglesNotObservedCount > 0:
  404         if OptionsInfo["FilterTorsionsNotObserved"]:
  405             RotBondsAlertsStatus = True
  406     else:
  407         TotalEnergy = 0.0
  408         for ID in RotBondsAlertsInfo["IDs"]:
  409             Energy = RotBondsAlertsInfo["Energy"][ID]
  410             TotalEnergy += Energy
  411             if OptionsInfo["TotalEnergyMode"]:
  412                 if TotalEnergy > OptionsInfo["TotalEnergyCutoff"]:
  413                     RotBondsAlertsStatus = True
  414                     break
  415             elif OptionsInfo["MaxSingleEnergyMode"]:
  416                 if Energy > OptionsInfo["MaxSingleEnergyCutoff"]:
  417                     RotBondsAlertsStatus = True
  418                     break
  419             elif OptionsInfo["TotalOrMaxSingleEnergyMode"]:
  420                 if TotalEnergy > OptionsInfo["TotalEnergyCutoff"] or Energy > OptionsInfo["MaxSingleEnergyCutoff"]:
  421                     RotBondsAlertsStatus = True
  422                     break
  423     
  424         # Setup energy infomation...
  425         TotalEnergy, TotalEnergyLowerBound, TotalEnergyUpperBound = [0.0, 0.0, 0.0]
  426         if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]:
  427             MaxSingleEnergy, MaxSingleEnergyAlertsCount = [0.0, 0]
  428         
  429         for ID in RotBondsAlertsInfo["IDs"]:
  430             Energy = RotBondsAlertsInfo["Energy"][ID]
  431             
  432             # Setup total energy along with the lower and upper bounds...
  433             TotalEnergy += Energy
  434             TotalEnergyLowerBound += RotBondsAlertsInfo["EnergyLowerBound"][ID]
  435             TotalEnergyUpperBound += RotBondsAlertsInfo["EnergyUpperBound"][ID]
  436         
  437             # Setup max single energy and max single energy alerts count...
  438             if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]:
  439                 MaxSingleEnergyAlertStatus = False
  440                 
  441                 if Energy > MaxSingleEnergy:
  442                     MaxSingleEnergy = Energy
  443                     if Energy > OptionsInfo["MaxSingleEnergyCutoff"]:
  444                         MaxSingleEnergyAlertStatus = True
  445                         MaxSingleEnergyAlertsCount += 1
  446                 
  447                 RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] = MaxSingleEnergyAlertStatus
  448     
  449     RotBondsAlertsInfo["RotBondsAlertsStatus"] = RotBondsAlertsStatus
  450     
  451     RotBondsAlertsInfo["TotalEnergy"] = TotalEnergy
  452     RotBondsAlertsInfo["TotalEnergyLowerBound"] = TotalEnergyLowerBound
  453     RotBondsAlertsInfo["TotalEnergyUpperBound"] = TotalEnergyUpperBound
  454     
  455     RotBondsAlertsInfo["AnglesNotObservedCount"] = AnglesNotObservedCount
  456     
  457     RotBondsAlertsInfo["MaxSingleEnergy"] = MaxSingleEnergy
  458     RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"] = MaxSingleEnergyAlertsCount
  459 
  460     return RotBondsAlertsStatus
  461     
  462 def MatchRotatableBondToTorsionLibrary(Mol, RotBondAtomIndices, RotBondHierarchyClass):
  463     """Match rotatable bond to torsion library."""
  464 
  465     if TorsionLibraryUtil.IsSpecificHierarchyClass(TorsionLibraryInfo, RotBondHierarchyClass):
  466         MatchStatus, MatchInfo = MatchRotatableBondAgainstSpecificHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass)
  467         if not MatchStatus:
  468             MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass)
  469     else:
  470         MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass)
  471 
  472     return (MatchStatus, MatchInfo)
  473 
  474 def MatchRotatableBondAgainstSpecificHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass):
  475     """Match rotatable bond against a specific hierarchy class."""
  476 
  477     HierarchyClassElementNode = None
  478     if RotBondHierarchyClass in TorsionLibraryInfo["SpecificClasses"]["ElementNode"]:
  479         HierarchyClassElementNode = TorsionLibraryInfo["SpecificClasses"]["ElementNode"][RotBondHierarchyClass]
  480     
  481     if HierarchyClassElementNode is None:
  482         return (False, None, None, None)
  483 
  484     TorsionLibraryUtil.TrackHierarchyClassElementNode(TorsionLibraryInfo, HierarchyClassElementNode)
  485     MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, HierarchyClassElementNode)
  486     TorsionLibraryUtil.RemoveLastHierarchyClassElementNodeFromTracking(TorsionLibraryInfo)
  487     
  488     return (MatchStatus, MatchInfo)
  489 
  490 def MatchRotatableBondAgainstGenericHierarchyClass(Mol, RotBondAtomIndices, RotBondHierarchyClass):
  491     """Match rotatable bond against a generic hierarchy class."""
  492     
  493     HierarchyClassElementNode = TorsionLibraryUtil.GetGenericHierarchyClassElementNode(TorsionLibraryInfo)
  494     if HierarchyClassElementNode is None:
  495         return (False, None)
  496 
  497     TorsionLibraryUtil.TrackHierarchyClassElementNode(TorsionLibraryInfo, HierarchyClassElementNode)
  498     
  499     #  Match hierarchy subclasses before matching torsion rules...
  500     MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchySubClasses(Mol, RotBondAtomIndices, HierarchyClassElementNode)
  501     
  502     if not MatchStatus:
  503         MatchStatus, MatchInfo = MatchRotatableBondAgainstGenericHierarchyTorsionRules(Mol, RotBondAtomIndices, HierarchyClassElementNode)
  504     
  505     TorsionLibraryUtil.RemoveLastHierarchyClassElementNodeFromTracking(TorsionLibraryInfo)
  506     
  507     return (MatchStatus, MatchInfo)
  508 
  509 def MatchRotatableBondAgainstGenericHierarchySubClasses(Mol, RotBondAtomIndices, HierarchyClassElementNode):
  510     """Match rotatable bond againat generic hierarchy subclasses."""
  511 
  512     for ElementChildNode in HierarchyClassElementNode:
  513         if ElementChildNode.tag != "hierarchySubClass":
  514             continue
  515         
  516         SubClassMatchStatus = ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  517         
  518         if SubClassMatchStatus:
  519             MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  520             
  521             if MatchStatus:
  522                 return (MatchStatus, MatchInfo)
  523         
  524     return(False, None)
  525 
  526 def MatchRotatableBondAgainstGenericHierarchyTorsionRules(Mol, RotBondAtomIndices, HierarchyClassElementNode):
  527     """Match rotatable bond againat torsion rules generic hierarchy class."""
  528     
  529     for ElementChildNode in HierarchyClassElementNode:
  530         if ElementChildNode.tag != "torsionRule":
  531             continue
  532         
  533         MatchStatus, MatchInfo = ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  534         
  535         if MatchStatus:
  536             return (MatchStatus, MatchInfo)
  537 
  538     return(False, None)
  539     
  540 def ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode):
  541     """Process element node to recursively match rotatable bond against hierarchy
  542     subclasses and torsion rules."""
  543     
  544     for ElementChildNode in ElementNode:
  545         if ElementChildNode.tag == "hierarchySubClass":
  546             SubClassMatchStatus = ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  547             
  548             if SubClassMatchStatus:
  549                 TorsionLibraryUtil.TrackHierarchySubClassElementNode(TorsionLibraryInfo, ElementChildNode)
  550                 
  551                 MatchStatus, MatchInfo = ProcessElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  552                 if MatchStatus:
  553                     TorsionLibraryUtil.RemoveLastHierarchySubClassElementNodeFromTracking(TorsionLibraryInfo)
  554                     return (MatchStatus, MatchInfo)
  555             
  556                 TorsionLibraryUtil.RemoveLastHierarchySubClassElementNodeFromTracking(TorsionLibraryInfo)
  557             
  558         elif ElementChildNode.tag == "torsionRule":
  559             MatchStatus, MatchInfo = ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementChildNode)
  560             
  561             if MatchStatus:
  562                 return (MatchStatus, MatchInfo)
  563     
  564     return (False, None)
  565 
  566 def ProcessHierarchySubClassElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode):
  567     """Process hierarchy subclass element to match rotatable bond."""
  568     
  569     # Setup subclass SMARTS pattern mol...
  570     SubClassPatternMol = TorsionLibraryUtil.SetupHierarchySubClassElementPatternMol(TorsionLibraryInfo, ElementNode)
  571     if SubClassPatternMol is None:
  572         return False
  573 
  574     # Match SMARTS pattern...
  575     SubClassPatternMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, SubClassPatternMol, Mol.GetSubstructMatches(SubClassPatternMol, useChirality = False))
  576     if len(SubClassPatternMatches) == 0:
  577         return False
  578 
  579     # Match rotatable bond indices...
  580     RotBondAtomIndex1, RotBondAtomIndex2 = RotBondAtomIndices
  581     MatchStatus = False
  582     for SubClassPatternMatch in SubClassPatternMatches:
  583         if len(SubClassPatternMatch) == 2:
  584             # Matched to pattern containing map atom numbers ":2" and ":3"...
  585             CentralAtomsIndex1, CentralAtomsIndex2 = SubClassPatternMatch
  586         elif len(SubClassPatternMatch) == 4:
  587             # Matched to pattern containing map atom numbers ":1", ":2", ":3" and ":4"...
  588             CentralAtomsIndex1 = SubClassPatternMatch[1]
  589             CentralAtomsIndex2 = SubClassPatternMatch[2]
  590         elif len(SubClassPatternMatch) == 3:
  591             SubClassSMARTSPattern = ElementNode.get("smarts")
  592             if TorsionLibraryUtil.DoesSMARTSContainsMappedAtoms(SubClassSMARTSPattern, [":2", ":3", ":4"]):
  593                 # Matched to pattern containing map atom numbers ":2", ":3" and ":4"...
  594                 CentralAtomsIndex1 = SubClassPatternMatch[0]
  595                 CentralAtomsIndex2 = SubClassPatternMatch[1]
  596             else:
  597                 # Matched to pattern containing map atom numbers ":1", ":2" and ":3"...
  598                 CentralAtomsIndex1 = SubClassPatternMatch[1]
  599                 CentralAtomsIndex2 = SubClassPatternMatch[2]
  600         else:
  601             continue
  602 
  603         if CentralAtomsIndex1 != CentralAtomsIndex2:
  604             if ((CentralAtomsIndex1 == RotBondAtomIndex1 and CentralAtomsIndex2 == RotBondAtomIndex2) or (CentralAtomsIndex1 == RotBondAtomIndex2 and CentralAtomsIndex2 == RotBondAtomIndex1)):
  605                 MatchStatus = True
  606                 break
  607     
  608     return (MatchStatus)
  609 
  610 def ProcessTorsionRuleElementForRotatableBondMatch(Mol, RotBondAtomIndices, ElementNode):
  611     """Process torsion rule element to match rotatable bond."""
  612 
  613     #  Retrieve torsions matched to rotatable bond...
  614     TorsionAtomIndicesList, TorsionAnglesList = MatchTorsionRuleToRotatableBond(Mol, RotBondAtomIndices, ElementNode)
  615     if TorsionAtomIndicesList is None:
  616         return (False, None)
  617     
  618     # Setup torsion angles and enery bin information for matched torsion rule...
  619     TorsionRuleAnglesInfo = TorsionLibraryUtil.SetupTorsionRuleAnglesInfo(TorsionLibraryInfo, ElementNode)
  620     if TorsionRuleAnglesInfo is None:
  621         return (False, None)
  622 
  623     # Setup highest strain energy for matched torsions...
  624     TorsionAtomIndices, TorsionAngle, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = SelectHighestStrainEnergyTorsionForRotatableBond(TorsionRuleAnglesInfo, TorsionAtomIndicesList, TorsionAnglesList)
  625     
  626     # Setup hierarchy class and subclass names...
  627     HierarchyClassName, HierarchySubClassName = TorsionLibraryUtil.SetupHierarchyClassAndSubClassNamesForRotatableBond(TorsionLibraryInfo)
  628     
  629     # Setup rule node ID...
  630     TorsionRuleNodeID = ElementNode.get("NodeID")
  631     
  632     # Setup SMARTS...
  633     TorsionRuleSMARTS = ElementNode.get("smarts")
  634     if " " in TorsionRuleSMARTS:
  635         TorsionRuleSMARTS = TorsionRuleSMARTS.replace(" ", "")
  636 
  637     # Setup match info...
  638     MatchInfo = [TorsionAtomIndices, TorsionAngle, HierarchyClassName, HierarchySubClassName, TorsionRuleNodeID, TorsionRuleSMARTS, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound]
  639     
  640     # Setup match status...
  641     MatchStatus = True
  642     
  643     return (MatchStatus, MatchInfo)
  644 
  645 def SelectHighestStrainEnergyTorsionForRotatableBond(TorsionRuleAnglesInfo, TorsionAtomIndicesList, TorsionAnglesList):
  646     """Select highest strain energy torsion matched to a rotatable bond."""
  647     
  648     TorsionAtomIndices, TorsionAngle, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = [None] * 7
  649     ValidEnergyValue, ValidCurrentEnergyValue = [False] * 2
  650     
  651     FirstTorsion = True
  652     for Index in range(0, len(TorsionAtomIndicesList)):
  653         CurrentTorsionAtomIndices = TorsionAtomIndicesList[Index]
  654         CurrentTorsionAngle = TorsionAnglesList[Index]
  655 
  656         if FirstTorsion:
  657             FirstTorsion = False
  658             EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = SetupStrainEnergyForRotatableBond(TorsionRuleAnglesInfo, CurrentTorsionAngle)
  659             TorsionAtomIndices = CurrentTorsionAtomIndices
  660             TorsionAngle = CurrentTorsionAngle
  661             ValidEnergyValue = IsEnergyValueValid(Energy)
  662             continue
  663 
  664         # Select highest strain energy...
  665         CurrentEnergyMethod, CurrentAngleNotObserved, CurrentEnergy, CurrentEnergyLowerBound, CurrentEnergyUpperBound = SetupStrainEnergyForRotatableBond(TorsionRuleAnglesInfo, CurrentTorsionAngle)
  666         ValidCurrentEnergyValue = IsEnergyValueValid(CurrentEnergy)
  667         
  668         UpdateValues = False
  669         if ValidEnergyValue and ValidCurrentEnergyValue:
  670             if CurrentEnergy > Energy:
  671                 UpdateValues = True
  672         elif ValidCurrentEnergyValue:
  673             if not ValidEnergyValue:
  674                 UpdateValues = True
  675         
  676         if UpdateValues:
  677             EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = [CurrentEnergyMethod, CurrentAngleNotObserved, CurrentEnergy, CurrentEnergyLowerBound, CurrentEnergyUpperBound]
  678             TorsionAtomIndices = CurrentTorsionAtomIndices
  679             TorsionAngle = CurrentTorsionAngle
  680     
  681     return (TorsionAtomIndices, TorsionAngle, EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound)
  682 
  683 def IsEnergyValueValid(Value):
  684     """Check for valid energy value."""
  685 
  686     return False if (Value is None or math.isnan(Value) or math.isinf(Value)) else True
  687 
  688 def SetupStrainEnergyForRotatableBond(TorsionRuleAnglesInfo, TorsionAngle):
  689     """Setup strain energy for rotatable bond."""
  690 
  691     if TorsionRuleAnglesInfo["EnergyMethodExact"]:
  692         return (SetupStrainEnergyForRotatableBondByExactMethod(TorsionRuleAnglesInfo, TorsionAngle))
  693     elif TorsionRuleAnglesInfo["EnergyMethodApproximate"]:
  694         return (SetupStrainEnergyForRotatableBondByApproximateMethod(TorsionRuleAnglesInfo, TorsionAngle))
  695     else:
  696         EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = [None, None, None, None, None]
  697         return (EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound)
  698 
  699 def SetupStrainEnergyForRotatableBondByExactMethod(TorsionRuleAnglesInfo, TorsionAngle):
  700     """Setup strain energy for rotatable bond by exact method."""
  701     
  702     EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = ["Exact", None, None, None, None]
  703 
  704     # Map angle to energy bin numbers...
  705     BinNum = math.ceil(TorsionAngle / 10) + 17
  706     PreviousBinNum = (BinNum + 35) % 36
  707     
  708     # Bin angle from -170 to 180 by the right end points...
  709     BinAngleRightSide = (BinNum - 17) * 10
  710     
  711     # Angle offset towards the left of the bin from the right end point...
  712     AngleOffset = TorsionAngle - BinAngleRightSide
  713     
  714     BinEnergy = TorsionRuleAnglesInfo["HistogramEnergy"][BinNum]
  715     PreviousBinEnergy = TorsionRuleAnglesInfo["HistogramEnergy"][PreviousBinNum]
  716     Energy =  BinEnergy + (BinEnergy - PreviousBinEnergy)/10.0 * AngleOffset
  717     
  718     BinEnergyLowerBound = TorsionRuleAnglesInfo["HistogramEnergyLowerBound"][BinNum]
  719     PreviousBinEnergyLowerBound = TorsionRuleAnglesInfo["HistogramEnergyLowerBound"][PreviousBinNum]
  720     EnergyLowerBound =  BinEnergyLowerBound + (BinEnergyLowerBound - PreviousBinEnergyLowerBound)/10.0 * AngleOffset
  721     
  722     BinEnergyUpperBound = TorsionRuleAnglesInfo["HistogramEnergyUpperBound"][BinNum]
  723     PreviousBinEnergyUpperBound = TorsionRuleAnglesInfo["HistogramEnergyUpperBound"][PreviousBinNum]
  724     EnergyUpperBound =  BinEnergyUpperBound + (BinEnergyUpperBound - PreviousBinEnergyUpperBound)/10.0 * AngleOffset
  725     
  726     return (EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound)
  727 
  728 def SetupStrainEnergyForRotatableBondByApproximateMethod(TorsionRuleAnglesInfo, TorsionAngle):
  729     """Setup strain energy for rotatable bond by approximate method."""
  730 
  731     EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound = ["Approximate", True, None, None, None]
  732     
  733     for AngleID in TorsionRuleAnglesInfo["IDs"]:
  734         Tolerance2 = TorsionRuleAnglesInfo["Tolerance2"][AngleID]
  735         Theta0 = TorsionRuleAnglesInfo["Theta0"][AngleID]
  736         
  737         AngleDiff = TorsionLibraryUtil.CalculateTorsionAngleDifference(TorsionAngle, Theta0)
  738         if abs(AngleDiff) <= Tolerance2:
  739             Beta1 = TorsionRuleAnglesInfo["Beta1"][AngleID]
  740             Beta2 = TorsionRuleAnglesInfo["Beta2"][AngleID]
  741             
  742             Energy = Beta1*(AngleDiff ** 2) + Beta2*(AngleDiff** 4)
  743 
  744             # Estimates of lower and upper bound are not available for
  745             # approximate method...
  746             EnergyLowerBound = Energy
  747             EnergyUpperBound = Energy
  748             
  749             AngleNotObserved = False
  750             
  751             break
  752     
  753     return (EnergyMethod, AngleNotObserved, Energy, EnergyLowerBound, EnergyUpperBound)
  754 
  755 def MatchTorsionRuleToRotatableBond(Mol, RotBondAtomIndices, ElementNode):
  756     """Retrieve matched torsions for torsion rule matched to rotatable bond."""
  757 
  758     # Get torsion matches...
  759     TorsionMatches = GetMatchesForTorsionRule(Mol, ElementNode)
  760     if TorsionMatches is None or len(TorsionMatches) == 0:
  761         return (None, None)
  762 
  763     # Identify all torsion matches corresponding to central atoms in RotBondAtomIndices...
  764     RotBondAtomIndex1, RotBondAtomIndex2 = RotBondAtomIndices
  765     RotBondTorsionMatches, RotBondTorsionAngles = [None] * 2
  766     
  767     for TorsionMatch in TorsionMatches:
  768         CentralAtomIndex1 = TorsionMatch[1]
  769         CentralAtomIndex2 = TorsionMatch[2]
  770         
  771         if ((CentralAtomIndex1 == RotBondAtomIndex1 and CentralAtomIndex2 == RotBondAtomIndex2) or (CentralAtomIndex1 == RotBondAtomIndex2 and CentralAtomIndex2 == RotBondAtomIndex1)):
  772             TorsionAngle = CalculateTorsionAngle(Mol, TorsionMatch)
  773             if RotBondTorsionMatches is None:
  774                 RotBondTorsionMatches = []
  775                 RotBondTorsionAngles = []
  776             RotBondTorsionMatches.append(TorsionMatch)
  777             RotBondTorsionAngles.append(TorsionAngle)
  778             
  779     return (RotBondTorsionMatches, RotBondTorsionAngles)
  780 
  781 def CalculateTorsionAngle(Mol, TorsionMatch):
  782     """Calculate torsion angle."""
  783 
  784     # Calculate torsion angle using torsion atom indices..
  785     MolConf = Mol.GetConformer(0)
  786     TorsionAngle = rdMolTransforms.GetDihedralDeg(MolConf, TorsionMatch[0], TorsionMatch[1], TorsionMatch[2], TorsionMatch[3])
  787     TorsionAngle = round(TorsionAngle, 2)
  788     
  789     return TorsionAngle
  790 
  791 def GetMatchesForTorsionRule(Mol, ElementNode):
  792     """Get matches for torsion rule."""
  793 
  794     # Match torsions...
  795     TorsionMatches = GetSubstructureMatchesForTorsionRule(Mol, ElementNode)
  796     
  797     if TorsionMatches is None or len(TorsionMatches) == 0:
  798         return TorsionMatches
  799 
  800     # Filter torsion matches...
  801     FiltertedTorsionMatches = []
  802     for TorsionMatch in TorsionMatches:
  803         if len(TorsionMatch) != 4:
  804             continue
  805 
  806         # Ignore matches containing hydrogen atoms as first or last atom...
  807         if Mol.GetAtomWithIdx(TorsionMatch[0]).GetAtomicNum() == 1:
  808             continue
  809         if Mol.GetAtomWithIdx(TorsionMatch[3]).GetAtomicNum() == 1:
  810             continue
  811         
  812         FiltertedTorsionMatches.append(TorsionMatch)
  813     
  814     return FiltertedTorsionMatches
  815 
  816 def GetSubstructureMatchesForTorsionRule(Mol, ElementNode):
  817     """Get substructure matches for a torsion rule."""
  818     
  819     # Setup torsion rule SMARTS pattern mol....
  820     TorsionRuleNodeID = ElementNode.get("NodeID")
  821     TorsionSMARTSPattern = ElementNode.get("smarts")
  822     TorsionPatternMol = TorsionLibraryUtil.SetupTorsionRuleElementPatternMol(TorsionLibraryInfo, ElementNode, TorsionRuleNodeID, TorsionSMARTSPattern)
  823     if TorsionPatternMol is None:
  824         return None
  825     
  826     # Match torsions...
  827     TorsionMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = False))
  828     
  829     return TorsionMatches
  830 
  831 def InitializeTorsionAlertsSummaryInfo():
  832     """Initialize torsion alerts summary."""
  833 
  834     if OptionsInfo["CountMode"]:
  835         return None
  836     
  837     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  838         return None
  839     
  840     TorsionAlertsSummaryInfo = {}
  841     TorsionAlertsSummaryInfo["RuleIDs"] = []
  842 
  843     for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "EnergyMethod", "MaxSingleEnergyAlertTypes", "MaxSingleEnergyAlertTypesMolCount"]:
  844         TorsionAlertsSummaryInfo[DataLabel] = {}
  845     
  846     return TorsionAlertsSummaryInfo
  847 
  848 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo):
  849     """Track torsion alerts summary information for matched torsion rules in a
  850     molecule."""
  851     
  852     if OptionsInfo["CountMode"]:
  853         return
  854     
  855     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  856         return
  857 
  858     if RotBondsAlertsInfo is None:
  859         return
  860     
  861     MolAlertsInfo = {}
  862     MolAlertsInfo["RuleIDs"] = []
  863     MolAlertsInfo["MaxSingleEnergyAlertTypes"] = {}
  864 
  865     for ID in RotBondsAlertsInfo["IDs"]:
  866         if not RotBondsAlertsInfo["MatchStatus"][ID]:
  867             continue
  868         
  869         if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
  870             continue
  871 
  872         MaxSingleEnergyAlertType = SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID])
  873         
  874         TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID]
  875         TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]
  876         
  877         # Track data for torsion alert summary information across molecules...
  878         if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]:
  879             TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID)
  880             TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID
  881             
  882             TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS
  883             TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassName"][ID]
  884             TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassName"][ID]
  885             
  886             TorsionAlertsSummaryInfo["EnergyMethod"][TorsionRuleNodeID] = RotBondsAlertsInfo["EnergyMethod"][ID]
  887             
  888             # Initialize number of alert types across all molecules...
  889             TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {}
  890             
  891             # Initialize number of molecules flagged by each alert type...
  892             TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID] = {}
  893         
  894         if MaxSingleEnergyAlertType not in TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
  895             TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
  896             TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
  897         
  898         TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
  899         
  900         # Track data for torsion alert information in a molecule...
  901         if TorsionRuleNodeID not in MolAlertsInfo["MaxSingleEnergyAlertTypes"]:
  902             MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  903             MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {}
  904         
  905         if MaxSingleEnergyAlertType not in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
  906             MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
  907         MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
  908     
  909     # Track number of molecules flagged by a specific torsion alert...
  910     for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]:
  911         for MaxSingleEnergyAlertType in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
  912             if MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType]:
  913                 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
  914 
  915 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo):
  916     """Write out torsion alerts summary informatio to a CSV file."""
  917     
  918     if OptionsInfo["CountMode"]:
  919         return
  920     
  921     if not OptionsInfo["OutfileSummaryMode"]:
  922         return
  923 
  924     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  925         return
  926     
  927     # Write headers...
  928     QuoteValues = True
  929     Values = ["TorsionRule", "HierarchyClass", "HierarchySubClass", "EnergyMethod", "MaxSingleEnergyTorsionAlertTypes", "MaxSingleEnergyTorsionAlertCount", "MaxSingleEnergyTorsionAlertMolCount"]
  930     Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  931     
  932     SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  933     
  934     # Write alerts information...
  935     for ID in SortedRuleIDs:
  936         # Remove any double quotes in SMARTS...
  937         RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID]
  938         RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I)
  939         
  940         HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID]
  941         HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID]
  942         
  943         EnergyMethod = TorsionAlertsSummaryInfo["EnergyMethod"][ID]
  944         
  945         MaxSingleEnergyAlertTypes = []
  946         MaxSingleEnergyAlertTypesCount = []
  947         MaxSingleEnergyAlertTypesMolCount = []
  948         for MaxSingleEnergyAlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]):
  949             MaxSingleEnergyAlertTypes.append(MaxSingleEnergyAlertType)
  950             MaxSingleEnergyAlertTypesCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID][MaxSingleEnergyAlertType])
  951             MaxSingleEnergyAlertTypesMolCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][MaxSingleEnergyAlertType])
  952         
  953         Values = [RuleSMARTS, HierarchyClassName, HierarchySubClassName, EnergyMethod, "%s" % MiscUtil.JoinWords(MaxSingleEnergyAlertTypes, ","), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesCount, ",")), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesMolCount, ","))]
  954         Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  955 
  956 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo):
  957     """Sort torsion rule IDs by  alert types molecule count in descending order."""
  958 
  959     SortedRuleIDs = []
  960     
  961     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  962     if len(RuleIDs) == 0:
  963         return SortedRuleIDs
  964     
  965     # Setup a map from AlertTypesMolCount to IDs for sorting alerts...
  966     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  967     MolCountMap = {}
  968     for ID in RuleIDs:
  969         MolCount = 0
  970         for AlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]):
  971             MolCount += TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][AlertType]
  972         MolCountMap[ID] = MolCount
  973 
  974     SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True)
  975     
  976     return SortedRuleIDs
  977 
  978 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo):
  979     """Write out torsion alerts SD files for individual torsion rules."""
  980     
  981     if OptionsInfo["CountMode"]:
  982         return
  983     
  984     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  985         return
  986 
  987     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  988         return
  989 
  990     # Setup a molecule reader for filtered molecules...
  991     FilteredMols  = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"])
  992 
  993     # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules... 
  994     TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo)
  995 
  996     # Setup writers...
  997     ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs)
  998     
  999     for Mol in FilteredMols:
 1000         # Retrieve torsion alerts info...
 1001         TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo)
 1002         if TorsionAlertsInfo is None:
 1003             continue
 1004         
 1005         for TorsionRuleID in TorsionRuleIDs:
 1006             if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]:
 1007                 continue
 1008             
 1009             WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
 1010         
 1011     CloseByRuleOutfilesWriters(ByRuleOutfilesWriters)
 1012 
 1013 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo):
 1014     """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules."""
 1015     
 1016     # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to
 1017     # the least number of unique molecules...
 1018     RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
 1019 
 1020     # Select torsion rule IDs for writing out SD files...
 1021     if not OptionsInfo["OutfilesFilteredByRulesAllMode"]:
 1022         MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"]
 1023         if MaxRuleIDs < len(RuleIDs):
 1024             RuleIDs = RuleIDs[0:MaxRuleIDs]
 1025     
 1026     return RuleIDs
 1027 
 1028 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo):
 1029     """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds."""
 1030     
 1031     TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]
 1032     TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None
 1033     
 1034     if TorsionAlerts is None or len(TorsionAlerts) == 0:
 1035         return None
 1036 
 1037     # Initialize for tracking by rule IDs...
 1038     TorsionAlertsInfo = {}
 1039     TorsionAlertsInfo["RuleIDs"] = []
 1040     
 1041     TorsionAlertsInfo["RuleSMARTS"] = {}
 1042     TorsionAlertsInfo["HierarchyClassName"] = {}
 1043     TorsionAlertsInfo["HierarchySubClassName"] = {}
 1044     TorsionAlertsInfo["EnergyMethod"] = {}
 1045     
 1046     TorsionAlertsInfo["AtomIndices"] = {}
 1047     TorsionAlertsInfo["TorsionAtomIndices"] = {}
 1048     TorsionAlertsInfo["TorsionAngle"] = {}
 1049     
 1050     TorsionAlertsInfo["Energy"] = {}
 1051     TorsionAlertsInfo["EnergyLowerBound"] = {}
 1052     TorsionAlertsInfo["EnergyUpperBound"] = {}
 1053 
 1054     TorsionAlertsInfo["AngleNotObserved"] = {}
 1055     TorsionAlertsInfo["MaxSingleEnergyAlertType"] = {}
 1056     
 1057     TorsionAlertsInfo["AnglesNotObservedCount"] = {}
 1058     TorsionAlertsInfo["MaxSingleEnergyAlertsCount"] = {}
 1059     
 1060     ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"]
 1061     TorsionAlertsSetSize = 12
 1062     
 1063     TorsionAlertsWords = TorsionAlerts.split()
 1064     if len(TorsionAlertsWords) % TorsionAlertsSetSize:
 1065         MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize))
 1066 
 1067     ID = 0
 1068     for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize):
 1069         ID += 1
 1070         
 1071         RotBondIndices, TorsionIndices, TorsionAngle, Energy, EnergyLowerBound, EnergyUpperBound, HierarchyClass, HierarchySubClass, TorsionRule, EnergyMethod, AngleNotObserved, MaxSingleEnergyAlertType = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize]
 1072         RotBondIndices = RotBondIndices.split(ValuesDelimiter)
 1073         TorsionIndices = TorsionIndices.split(ValuesDelimiter)
 1074 
 1075         if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]:
 1076             MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule)
 1077             continue
 1078         TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule]
 1079     
 1080         # Track data for torsion alerts in a molecule...
 1081         if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]:
 1082             TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
 1083 
 1084             TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule
 1085             TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass
 1086             TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass
 1087             TorsionAlertsInfo["EnergyMethod"][TorsionRuleNodeID] = EnergyMethod
 1088 
 1089             TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = []
 1090             TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = []
 1091             TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID] = []
 1092     
 1093             TorsionAlertsInfo["Energy"][TorsionRuleNodeID] = []
 1094             TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID] = []
 1095             TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID] = []
 1096             TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID] = []
 1097             TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID] = []
 1098             
 1099             TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] = 0
 1100             TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] = 0
 1101             
 1102         # Track multiple values for a rule ID...
 1103         TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices)
 1104         TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices)
 1105         TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID].append(TorsionAngle)
 1106         
 1107         TorsionAlertsInfo["Energy"][TorsionRuleNodeID].append(Energy)
 1108         TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID].append(EnergyLowerBound)
 1109         TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID].append(EnergyUpperBound)
 1110         TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID].append(AngleNotObserved)
 1111         
 1112         TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID].append(MaxSingleEnergyAlertType)
 1113         
 1114         # Count angles not observer for a rule ID...
 1115         if AngleNotObserved == 'Yes':
 1116             TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] += 1
 1117 
 1118         # Count max single energy alert for a rule ID...
 1119         if MaxSingleEnergyAlertType == 'Yes':
 1120             TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] += 1
 1121     
 1122     return TorsionAlertsInfo
 1123 
 1124 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo):
 1125     """Write out molecule."""
 1126     
 1127     if OptionsInfo["CountMode"]:
 1128         return True
 1129 
 1130     SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo)
 1131 
 1132     try:
 1133         Writer.write(Mol)
 1134     except Exception as ErrMsg:
 1135         MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg))
 1136         return False
 1137     
 1138     return True
 1139 
 1140 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo):
 1141     """Setup molecule properties containing alerts information for rotatable bonds."""
 1142 
 1143     if not OptionsInfo["OutfileAlerts"]:
 1144         return
 1145 
 1146     SDFieldIDsToLabels = OptionsInfo["SDFieldIDsToLabels"]
 1147     Precision = OptionsInfo["Precision"]
 1148     
 1149     # Setup rotatable bonds count...
 1150     RotBondsCount = 0
 1151     if RotBondsAlertsInfo is not None:
 1152         RotBondsCount =  len(RotBondsAlertsInfo["IDs"])
 1153     Mol.SetProp(SDFieldIDsToLabels["RotBondsCountLabel"],  "%s" % RotBondsCount)
 1154 
 1155     if RotBondsAlertsInfo is not None:
 1156         # Setup total energy along with lower and upper bounds...
 1157         Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLabel"],  "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergy"], Precision))
 1158         Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLowerBoundCILabel"],  "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyLowerBound"], Precision))
 1159         Mol.SetProp(SDFieldIDsToLabels["TotalEnergyUpperBoundCILabel"],  "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyUpperBound"], Precision))
 1160         
 1161         # Setup max single energy and alert count...
 1162         if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]:
 1163             Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyLabel"],  "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["MaxSingleEnergy"], Precision))
 1164             Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyAlertsCountLabel"],  "%s" % ("NA" if RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"] is None else RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"]))
 1165             
 1166         Mol.SetProp(SDFieldIDsToLabels["AnglesNotObservedCountLabel"],  "%s" % ("NA" if RotBondsAlertsInfo["AnglesNotObservedCount"] is None else RotBondsAlertsInfo["AnglesNotObservedCount"]))
 1167     
 1168     
 1169     # Setup alert information for rotatable bonds...
 1170     AlertsInfoValues = []
 1171 
 1172     # Delimiter for multiple values corresponding to specific set of information for
 1173     # a rotatable bond. For example: TorsionAtomIndices
 1174     ValuesDelim = OptionsInfo["IntraSetValuesDelim"]
 1175 
 1176     # Delimiter for various values for a rotatable bond...
 1177     RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"]
 1178     
 1179     # Delimiter for values corresponding to multiple rotatable bonds...
 1180     AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"]
 1181     
 1182     if RotBondsAlertsInfo is not None:
 1183         for ID in RotBondsAlertsInfo["IDs"]:
 1184             if not RotBondsAlertsInfo["MatchStatus"][ID]:
 1185                 continue
 1186 
 1187             if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
 1188                 continue
 1189                 
 1190             RotBondValues = []
 1191             
 1192             # Bond atom indices...
 1193             Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]]
 1194             RotBondValues.append(ValuesDelim.join(Values))
 1195     
 1196             # Torsion atom indices...
 1197             TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim)
 1198             RotBondValues.append(TorsionAtomIndices)
 1199 
 1200             # Torsion angle...
 1201             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngle"][ID])
 1202 
 1203             # Energy along with its lower and upper bound confidence interval...
 1204             RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["Energy"][ID], Precision))
 1205             RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyLowerBound"][ID], Precision))
 1206             RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyUpperBound"][ID], Precision))
 1207             
 1208             # Hierarchy class and subclass names...
 1209             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassName"][ID])
 1210             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassName"][ID])
 1211             
 1212             # Torsion rule SMARTS...
 1213             RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID])
 1214             
 1215             # Energy method...
 1216             RotBondValues.append("%s" % RotBondsAlertsInfo["EnergyMethod"][ID])
 1217 
 1218             # Angle not observed...
 1219             RotBondValues.append("%s" % SetupAngleNotObservedValue(RotBondsAlertsInfo["AngleNotObserved"][ID]))
 1220             
 1221             # Max single energy alert status...
 1222             RotBondValues.append("%s" % SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]))
 1223                 
 1224             # Track joined values for a rotatable bond...
 1225             AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues))
 1226     
 1227     if len(AlertsInfoValues):
 1228         Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues)))
 1229 
 1230 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
 1231     """Write out molecule."""
 1232     
 1233     if OptionsInfo["CountMode"]:
 1234         return
 1235 
 1236     SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
 1237         
 1238     Writer.write(Mol)
 1239 
 1240 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
 1241     """Setup molecule properties containing alerts information for torsion alerts
 1242     fileted by Rule IDs."""
 1243 
 1244     # Delete torsion alerts information for rotatable bonds...
 1245     if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]):
 1246         Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"])
 1247 
 1248     # Delimiter for values...
 1249     IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"]
 1250     InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"]
 1251 
 1252     # Setup alert rule information...
 1253     AlertRuleInfoValues = []
 1254     
 1255     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID])
 1256     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID])
 1257     
 1258     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID])
 1259     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["EnergyMethod"][TorsionRuleID])
 1260     
 1261     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues)))
 1262 
 1263     # Setup max single energy alert count for torsion rule...
 1264     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxSingleEnergyAlertsCountLabel"],  "%s" % TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleID])
 1265     
 1266     # Setup angle not observed count for torsion rule...
 1267     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAnglesNotObservedCountLabel"],  "%s" % TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleID])
 1268     
 1269     # Setup torsion rule alerts...
 1270     # "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod AngleNotObserved MaxSingleEnergyAlert)
 1271     AlertsInfoValues = []
 1272     for Index in range(0, len(TorsionAlertsInfo["AtomIndices"][TorsionRuleID])):
 1273         RotBondInfoValues = []
 1274         
 1275         # Bond atom indices...
 1276         Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]]
 1277         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
 1278         
 1279         # Torsion atom indices retrieved from the filtered SD file and stored as strings...
 1280         Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]]
 1281         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
 1282         
 1283         # Torsion angle...
 1284         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngle"][TorsionRuleID][Index])
 1285 
 1286         # Energy and its bounds...
 1287         RotBondInfoValues.append(TorsionAlertsInfo["Energy"][TorsionRuleID][Index])
 1288         RotBondInfoValues.append(TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleID][Index])
 1289         RotBondInfoValues.append(TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleID][Index])
 1290         
 1291         # Angle not observed......
 1292         RotBondInfoValues.append(TorsionAlertsInfo["AngleNotObserved"][TorsionRuleID][Index])
 1293         
 1294         # Max single energy alert type...
 1295         RotBondInfoValues.append(TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleID][Index])
 1296         
 1297         # Track alerts informaiton...
 1298         AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues))
 1299         
 1300     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"],  "%s" % (InterSetValuesDelim.join(AlertsInfoValues)))
 1301     
 1302 def SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
 1303     """Skip rotatble bond alert info for a specific bond during writing to output files."""
 1304 
 1305     if not OptionsInfo["OutfileAlertsOnly"]:
 1306         return False
 1307     
 1308     if RotBondsAlertsInfo["RotBondsAlertsStatus"] is None:
 1309         return True
 1310 
 1311     Status = False
 1312     if OptionsInfo["TotalEnergyMode"]:
 1313         if not RotBondsAlertsInfo["RotBondsAlertsStatus"]:
 1314             Status = True
 1315     elif OptionsInfo["MaxSingleEnergyMode"]:
 1316         if RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] is None or not RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]:
 1317             Status = True
 1318     elif OptionsInfo["TotalOrMaxSingleEnergyMode"]:
 1319         if not RotBondsAlertsInfo["RotBondsAlertsStatus"]:
 1320             Status = True
 1321     
 1322     return Status
 1323 
 1324 def SetupEnergyValueForSDField(Value, Precision):
 1325     """Setup energy value for SD field."""
 1326     
 1327     if Value is None or math.isnan(Value) or math.isinf(Value):
 1328         return "NA"
 1329 
 1330     return "%.*f" % (Precision, Value)
 1331     
 1332 def SetupAngleNotObservedValue(Value):
 1333     """Setup angle not observed value."""
 1334     
 1335     if Value is None:
 1336         return "NA"
 1337 
 1338     return "Yes" if Value else "No"
 1339 
 1340 def SetupMaxSingleEnergyAlertStatusValue(Value):
 1341     """Setup max single energy alert status value."""
 1342     
 1343     if Value is None:
 1344         return "NA"
 1345 
 1346     return "Yes" if Value else "No"
 1347 
 1348 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim):
 1349     """Setup torsion atom indices value for output files."""
 1350 
 1351     Values = ["%s" % Value for Value in TorsionAtomIndicesList]
 1352     
 1353     return ValuesDelim.join(Values)
 1354     
 1355 def SetupOutfilesWriters():
 1356     """Setup molecule and summary writers."""
 1357 
 1358     OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None}
 1359 
 1360     # Writers for SD files...
 1361     WriterRemaining, WriterFiltered = SetupMoleculeWriters()
 1362     OutfilesWriters["WriterRemaining"] = WriterRemaining
 1363     OutfilesWriters["WriterFiltered"] = WriterFiltered
 1364     
 1365     # Writer for alert summary CSV file...
 1366     WriterAlertSummary = SetupAlertSummaryWriter()
 1367     OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary
 1368 
 1369     return OutfilesWriters
 1370 
 1371 def SetupMoleculeWriters():
 1372     """Setup molecule writers."""
 1373     
 1374     Writer = None
 1375     WriterFiltered = None
 1376 
 1377     if OptionsInfo["CountMode"]:
 1378         return (Writer, WriterFiltered)
 1379 
 1380     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 1381     if Writer is None:
 1382         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 1383     MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"])
 1384     
 1385     if OptionsInfo["OutfileFilteredMode"]:
 1386         WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
 1387         if WriterFiltered is None:
 1388             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
 1389         MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
 1390     
 1391     return (Writer, WriterFiltered)
 1392 
 1393 def SetupAlertSummaryWriter():
 1394     """Setup a alert summary writer."""
 1395     
 1396     Writer = None
 1397     
 1398     if OptionsInfo["CountMode"]:
 1399         return Writer
 1400         
 1401     if not OptionsInfo["OutfileSummaryMode"]:
 1402         return Writer
 1403     
 1404     Outfile = OptionsInfo["OutfileSummary"]
 1405     Writer = open(Outfile, "w")
 1406     if Writer is None:
 1407         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
 1408     
 1409     MiscUtil.PrintInfo("Generating file %s..." % Outfile)
 1410     
 1411     return Writer
 1412     
 1413 def CloseOutfilesWriters(OutfilesWriters):
 1414     """Close outfile writers."""
 1415 
 1416     for WriterType, Writer in OutfilesWriters.items():
 1417         if Writer is not None:
 1418             Writer.close()
 1419 
 1420 def SetupByRuleOutfilesWriters(RuleIDs):
 1421     """Setup by rule outfiles writers."""
 1422 
 1423     # Initialize...
 1424     OutfilesWriters = {}
 1425     for RuleID in RuleIDs:
 1426         OutfilesWriters[RuleID] = None
 1427     
 1428     if OptionsInfo["CountMode"]:
 1429         return OutfilesWriters
 1430         
 1431     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
 1432         return OutfilesWriters
 1433     
 1434     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 1435     OutfilesRoot = "%s_Filtered_TopRule" % FileName
 1436     OutfilesExt = "sdf"
 1437 
 1438     MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"]
 1439     MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt))
 1440     
 1441     # Delete any existing output files...
 1442     Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt))
 1443     if len(Outfiles):
 1444         MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt))
 1445         for Outfile in Outfiles:
 1446             try:
 1447                 os.remove(Outfile)
 1448             except Exception as ErrMsg:
 1449                 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg)
 1450     
 1451     RuleIndex = 0
 1452     for RuleID in RuleIDs:
 1453         RuleIndex += 1
 1454         Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt)
 1455         Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
 1456         if Writer is None:
 1457             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
 1458             
 1459         OutfilesWriters[RuleID] = Writer
 1460 
 1461     return OutfilesWriters
 1462 
 1463 def CloseByRuleOutfilesWriters(OutfilesWriters):
 1464     """Close by rule outfile writers."""
 1465 
 1466     for RuleID, Writer in OutfilesWriters.items():
 1467         if Writer is not None:
 1468             Writer.close()
 1469 
 1470 def ProcessTorsionLibraryInfo():
 1471     """Process torsion library information."""
 1472 
 1473     RetrieveTorsionLibraryInfo()
 1474     ListTorsionLibraryInfo()
 1475 
 1476 def RetrieveTorsionLibraryInfo(Quiet = False):
 1477     """Retrieve torsion library information."""
 1478     
 1479     TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"]
 1480     if TorsionLibraryFilePath is None:
 1481         TorsionLibraryFile = "TorsionStrainEnergyLibrary.xml"
 1482         MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath()
 1483         TorsionLibraryFilePath = os.path.join(MayaChemToolsDataDir, TorsionLibraryFile)
 1484         if not Quiet:
 1485             MiscUtil.PrintInfo("\nRetrieving data from default torsion strain energy library file %s..." % TorsionLibraryFile)
 1486     else:
 1487         TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"]
 1488         if not Quiet:
 1489             MiscUtil.PrintInfo("\nRetrieving data from torsion strain energy library file %s..." % TorsionLibraryFilePath)
 1490         
 1491     TorsionLibraryInfo["TorsionLibElementTree"] = TorsionLibraryUtil.RetrieveTorsionLibraryInfo(TorsionLibraryFilePath)
 1492 
 1493 def ListTorsionLibraryInfo():
 1494     """List torsion library information."""
 1495     
 1496     TorsionLibraryUtil.ListTorsionLibraryInfo(TorsionLibraryInfo["TorsionLibElementTree"])
 1497 
 1498 def SetupTorsionLibraryInfo(Quiet = False):
 1499     """Setup torsion library information for matching rotatable bonds."""
 1500 
 1501     if not Quiet:
 1502         MiscUtil.PrintInfo("\nSetting up torsion library information for matching rotatable bonds...")
 1503 
 1504     TorsionLibraryUtil.SetupTorsionLibraryInfoForMatchingRotatableBonds(TorsionLibraryInfo)
 1505     
 1506 def ProcessRotatableBondsSMARTSMode():
 1507     """"Process SMARTS pattern for rotatable bonds."""
 1508 
 1509     RotBondsMode = OptionsInfo["RotBondsSMARTSMode"]
 1510     RotBondsSMARTSPattern = None
 1511     if re.match("NonStrict", RotBondsMode, re.I):
 1512         RotBondsSMARTSPattern = "[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]"
 1513     elif re.match("SemiStrict", RotBondsMode, re.I):
 1514         RotBondsSMARTSPattern = "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]"
 1515     elif re.match("Strict", RotBondsMode, re.I):
 1516         RotBondsSMARTSPattern = "[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])&!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])&!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]"
 1517     elif re.match("Specify", RotBondsMode, re.I):
 1518         RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPatternSpecified"]
 1519         RotBondsSMARTSPattern = RotBondsSMARTSPattern.strip()
 1520         if not len(RotBondsSMARTSPattern):
 1521             MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in  \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode)
 1522     else:
 1523         MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode)
 1524         
 1525     RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPattern)
 1526     if RotBondsPatternMol is None:
 1527         if re.match("Specify", RotBondsMode, re.I):
 1528             MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPattern))
 1529         else:
 1530             MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The default rotatable bonds SMARTS pattern, \"%s\", used for, \"%s\", value of  \"-r, --rotBondsSMARTSMode\" option is not valid." % (RotBondsSMARTSPattern, RotBondsMode))
 1531     
 1532     OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern
 1533 
 1534 def ProcessSDFieldLabelsOption():
 1535     """Process SD data field label option."""
 1536 
 1537     ParamsOptionName = "--outfileSDFieldLabels"
 1538     ParamsOptionValue = Options["--outfileSDFieldLabels"]
 1539 
 1540     ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TotalEnergyLabel": "TotalEnergy", "TotalEnergyLowerBoundCILabel": "TotalEnergyLowerBoundCI", "TotalEnergyUpperBoundCILabel": "TotalEnergyUpperBoundCI", "MaxSingleEnergyLabel": "MaxSingleEnergy", "MaxSingleEnergyAlertsCountLabel": "MaxSingleEnergyAlertsCount", "AnglesNotObservedCountLabel": "AnglesNotObservedCount", "TorsionAlertsLabel": "TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass HierarchySubClass TorsionRule EnergyMethod AngleNotObserved MaxSingleEnergyAlert)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionRule EnergyMethod)", "TorsionRuleMaxSingleEnergyAlertsCountLabel": "TorsionRuleMaxSingleEnergyAlertsCount", "TorsionRuleAnglesNotObservedCountLabel": "TorsionRuleAnglesNotObservedCount", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved MaxSingleEnergyAlert)"}
 1541 
 1542     if re.match("^auto$", ParamsOptionValue, re.I):
 1543         OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
 1544         return
 1545     
 1546     # Setup a canonical paramater names...
 1547     ValidParamNames = []
 1548     CanonicalParamNamesMap = {}
 1549     for ParamName in sorted(ParamsIDsToLabels):
 1550         ValidParamNames.append(ParamName)
 1551         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 1552     
 1553     ParamsOptionValue = ParamsOptionValue.strip()
 1554     if not ParamsOptionValue:
 1555         PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
 1556     
 1557     ParamsOptionValueWords = ParamsOptionValue.split(",")
 1558     if len(ParamsOptionValueWords) % 2:
 1559         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
 1560     
 1561     # Validate paramater name and value pairs...
 1562     for Index in range(0, len(ParamsOptionValueWords), 2):
 1563         Name = ParamsOptionValueWords[Index].strip()
 1564         Value = ParamsOptionValueWords[Index + 1].strip()
 1565 
 1566         CanonicalName = Name.lower()
 1567         if  not CanonicalName in CanonicalParamNamesMap:
 1568             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
 1569 
 1570         ParamName = CanonicalParamNamesMap[CanonicalName]
 1571         ParamValue = Value
 1572         
 1573         # Set value...
 1574         ParamsIDsToLabels[ParamName] = ParamValue
 1575     
 1576     OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
 1577 
 1578 def ProcessOptions():
 1579     """Process and validate command line arguments and options."""
 1580     
 1581     MiscUtil.PrintInfo("Processing options...")
 1582 
 1583     # Validate options...
 1584     ValidateOptions()
 1585     
 1586     TotalEnergyMode, MaxSingleEnergyMode, TotalOrMaxSingleEnergyMode = [False] * 3
 1587     AlertsMode = Options["--alertsMode"]
 1588     if re.match("^TotalEnergy$", AlertsMode, re.I):
 1589         TotalEnergyMode = True
 1590     elif re.match("^MaxSingleEnergy$", AlertsMode, re.I):
 1591         MaxSingleEnergyMode = True
 1592     elif re.match("^TotalOrMaxSingleEnergy$", AlertsMode, re.I):
 1593         TotalOrMaxSingleEnergyMode = True
 1594     OptionsInfo["AlertsMode"] = AlertsMode
 1595     OptionsInfo["TotalEnergyMode"] = TotalEnergyMode
 1596     OptionsInfo["MaxSingleEnergyMode"] = MaxSingleEnergyMode
 1597     OptionsInfo["TotalOrMaxSingleEnergyMode"] = TotalOrMaxSingleEnergyMode
 1598     
 1599     OptionsInfo["FilterTorsionsNotObserved"] = True if re.match("^yes$", Options["--filterTorsionsNotObserved"], re.I) else False
 1600     
 1601     OptionsInfo["MaxSingleEnergyCutoff"] = float(Options["--alertsMaxSingleEnergyCutoff"])
 1602     OptionsInfo["TotalEnergyCutoff"] = float(Options["--alertsTotalEnergyCutoff"])
 1603     
 1604     OptionsInfo["Infile"] = Options["--infile"]
 1605     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1606     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
 1607     
 1608     OptionsInfo["Outfile"] = Options["--outfile"]
 1609     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 1610     
 1611     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 1612     OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
 1613     OptionsInfo["OutfileFiltered"] = OutfileFiltered
 1614     OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
 1615 
 1616     OptionsInfo["OutfileSummary"] = "%s_AlertsSummary.csv" % (FileName)
 1617 
 1618     OutfileSummaryMode = Options["--outfileSummary"]
 1619     if re.match("^auto$", OutfileSummaryMode, re.I):
 1620         OutfileSummaryMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no'
 1621     OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", OutfileSummaryMode, re.I) else False
 1622     
 1623     if re.match("^yes$", Options["--outfileSummary"], re.I):
 1624         if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I):
 1625             MiscUtil.PrintError("The value \"%s\" specified for \"--outfileSummary\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"]))
 1626     
 1627     OutfilesFilteredByRulesMode = Options["--outfilesFilteredByRules"]
 1628     if re.match("^auto$", OutfilesFilteredByRulesMode, re.I):
 1629         OutfilesFilteredByRulesMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no'
 1630     OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", OutfilesFilteredByRulesMode, re.I) else False
 1631     
 1632     if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
 1633         if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I):
 1634             MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"]))
 1635     
 1636     OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False
 1637     
 1638     OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"]
 1639     if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I):
 1640         OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount)
 1641     OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount
 1642     OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False
 1643     
 1644     OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False
 1645     
 1646     if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
 1647         if not re.match("^yes$", Options["--outfileAlerts"], re.I):
 1648             MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"]))
 1649     
 1650     OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"]
 1651     OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False
 1652     
 1653     ProcessSDFieldLabelsOption()
 1654     
 1655     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1656     OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
 1657 
 1658     OptionsInfo["Precision"] = int(Options["--precision"])
 1659     
 1660     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1661     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1662     
 1663     OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"]
 1664     OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"]
 1665     ProcessRotatableBondsSMARTSMode()
 1666     
 1667     OptionsInfo["TorsionEnergyLibraryFile"] = None
 1668     if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I):
 1669         OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"]
 1670     
 1671     # Setup delimiter for writing out torsion alert information to output files...
 1672     OptionsInfo["IntraSetValuesDelim"] = ","
 1673     OptionsInfo["InterSetValuesDelim"] = " "
 1674     
 1675 def RetrieveOptions():
 1676     """Retrieve command line arguments and options."""
 1677     
 1678     # Get options...
 1679     global Options
 1680     Options = docopt(_docoptUsage_)
 1681     
 1682     # Set current working directory to the specified directory...
 1683     WorkingDir = Options["--workingdir"]
 1684     if WorkingDir:
 1685         os.chdir(WorkingDir)
 1686     
 1687     # Handle examples option...
 1688     if "--examples" in Options and Options["--examples"]:
 1689         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1690         sys.exit(0)
 1691     
 1692 def ProcessListTorsionLibraryOption():
 1693     """Process list torsion library information."""
 1694 
 1695     # Validate and process dataFile option for listing torsion library information...
 1696     OptionsInfo["TorsionEnergyLibraryFile"] = None
 1697     if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I):
 1698         MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"])
 1699         OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"]
 1700     
 1701     RetrieveTorsionLibraryInfo()
 1702     ListTorsionLibraryInfo()
 1703     
 1704 def ValidateOptions():
 1705     """Validate option values."""
 1706 
 1707     MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "TotalEnergy MaxSingleEnergy TotalOrMaxSingleEnergy")
 1708     
 1709     MiscUtil.ValidateOptionFloatValue("--alertsMaxSingleEnergyCutoff", Options["--alertsMaxSingleEnergyCutoff"], {">": 0.0})
 1710     MiscUtil.ValidateOptionFloatValue("--alertsTotalEnergyCutoff", Options["--alertsTotalEnergyCutoff"], {">": 0.0})
 1711     
 1712     MiscUtil.ValidateOptionTextValue("--filterTorsionsNotObserved", Options["--filterTorsionsNotObserved"], "yes no")
 1713     
 1714     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1715     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
 1716     
 1717     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1718     if re.match("^filter$", Options["--mode"], re.I):
 1719         MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1720         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1721 
 1722     MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
 1723     
 1724     MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no auto")
 1725     if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I):
 1726         MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0})
 1727     
 1728     MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no auto")
 1729     MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no")
 1730     MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly")
 1731     
 1732     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count")
 1733     if re.match("^filter$", Options["--mode"], re.I):
 1734         if not Options["--outfile"]:
 1735             MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option")
 1736         
 1737     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1738     
 1739     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 1740     
 1741     MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify")
 1742     if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I):
 1743         if not Options["--rotBondsSMARTSPattern"]:
 1744             MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option")
 1745     
 1746     if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I):
 1747         MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"])
 1748 
 1749 # Setup a usage string for docopt...
 1750 _docoptUsage_ = """
 1751 RDKitFilterTorsionStrainEnergyAlerts.py - Filter torsion strain energy library alerts
 1752 
 1753 Usage:
 1754     RDKitFilterTorsionStrainEnergyAlerts.py [--alertsMode <TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy>]
 1755                                             [--alertsMaxSingleEnergyCutoff <Number>] [--alertsTotalEnergyCutoff <Number>]
 1756                                             [--filterTorsionsNotObserved <yes or no>] [--infileParams <Name,Value,...>] [--mode <filter or count>]
 1757                                             [--mp <yes or no>] [--mpParams <Name,Value,...>] [--outfileAlerts <yes or no>]
 1758                                             [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>]
 1759                                             [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>]
 1760                                             [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>] [--outfileParams <Name,Value,...>]
 1761                                             [--overwrite] [--precision <number>] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>]
 1762                                             [--rotBondsSMARTSPattern <SMARTS>] [--torsionEnergyLibraryFile <FileName or auto>]
 1763                                             [-w <dir>] -i <infile> -o <outfile>
 1764     RDKitFilterTorsionStrainEnergyAlerts.py [--torsionEnergyLibraryFile <FileName or auto>] -l | --list
 1765     RDKitFilterTorsionStrainEnergyAlerts.py -h | --help | -e | --examples
 1766 
 1767 Description:
 1768     Filter strained molecules from an input file for torsion strain energy library
 1769     [ Ref 153 ] alerts by matching rotatable bonds against SMARTS patterns specified
 1770     for torsion rules in a torsion energy library file and write out appropriate
 1771     molecules to output files. The molecules must have 3D coordinates in input file.
 1772     The default torsion strain energy library file, TorsionStrainEnergyLibrary.xml,
 1773     is available under MAYACHEMTOOLS/lib/data directory.
 1774     
 1775     The data in torsion strain energy library file is organized in a hierarchical
 1776     manner. It consists of one generic class and six specific classes at the highest
 1777     level. Each class contains multiple subclasses corresponding to named functional
 1778     groups or substructure patterns. The subclasses consist of torsion rules sorted
 1779     from specific to generic torsion patterns. The torsion rule, in turn, contains a
 1780     list of peak values for torsion angles and two tolerance values. A pair of tolerance
 1781     values define torsion bins around a torsion peak value.
 1782     
 1783     A strain energy calculation method, 'exact' or 'approximate' [ Ref 153 ], is 
 1784     associated with each torsion rule for calculating torsion strain energy. The 'exact'
 1785     stain energy calculation relies on the energy bins available under the energy histogram
 1786     consisting of 36 bins covering angles from -180 to 180. The width of each bin is 10
 1787     degree. The energy bins are are defined at the right end points. The first and the
 1788     last energy bins correspond to -170 and 180 respectively. The torsion angle is mapped
 1789     to a energy bin. An angle offset is calculated for the torsion angle from the the right
 1790     end point angle of the bin. The strain energy is estimated for the angle offset based
 1791     on the energy difference between the current and previous bins. The torsion strain
 1792     energy, in terms of torsion energy units (TEUs), corresponds to the sum of bin strain
 1793     energy and the angle offset strain energy.
 1794         
 1795         Energy = BinEnergyDiff/10.0 * BinAngleOffset + BinEnergy[BinNum]
 1796         
 1797         Where:
 1798         
 1799         BinEnergyDiff = BinEnergy[BinNum] - BinEnergy[PreviousBinNum]
 1800         BinAngleOffset = TorsionAngle - BinAngleRightSide
 1801         
 1802     The 'approximate' strain energy calculation relies on the angle difference between a
 1803     torsion angle and the torsion peaks observed for the torsion rules in the torsion
 1804     energy library. The torsion angle is matched to a torsion peak based on the value of
 1805     torsion angle difference. It must be less than or equal to the value for the second
 1806     tolerance 'tolerance2'. Otherwise, the torsion angle is not observed in the torsion
 1807     energy library and a value of 'NA' is assigned for torsion energy along with the lower
 1808     and upper bounds on energy at 95% confidence interval. The 'approximate' torsion
 1809     energy (TEUs) for observed torsion angle is calculated using the following formula:
 1810         
 1811         Energy = beta_1 * (AngleDiff ** 2) + beta_2 * (AngleDiff ** 4)
 1812         
 1813     The coefficients 'beta_1' and 'beta_2' are available for the observed angles in 
 1814     the torsion strain energy library. The 'AngleDiff' is the difference between the
 1815     torsion angle and the matched torsion peak.
 1816     
 1817     For example:
 1818          
 1819         <library>
 1820             <hierarchyClass id1="G" id2="G" name="GG">
 1821             ...
 1822             </hierarchyClass>
 1823             <hierarchyClass id1="C" id2="O" name="CO">
 1824                 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]">
 1825                     <torsionRule method="exact" smarts=
 1826                         "[O:1]=[C:2]!@[O:3]~[CH0:4]">
 1827                         <angleList>
 1828                             <angle score="56.52" tolerance1="20.00"
 1829                             tolerance2="25.00" value="0.0"/>
 1830                         </angleList>
 1831                         <histogram>
 1832                             <bin count="1"/>
 1833                             ...
 1834                         </histogram>
 1835                         <histogram_shifted>
 1836                             <bin count="0"/>
 1837                             ...
 1838                         </histogram_shifted>
 1839                         <histogram_converted>
 1840                             <bin energy="4.67... lower="2.14..." upper="Inf"/>
 1841                             ...
 1842                             <bin energy="1.86..." lower="1.58..." upper="2.40..."/>
 1843                             ...
 1844                            </histogram_converted>
 1845                     </torsionRule>
 1846                     <torsionRule method="approximate" smarts=
 1847                         "[cH0:1][c:2]([cH0])!@[O:3][p:4]">
 1848                         <angleList>
 1849                         <angle beta_1="0.002..." beta_2="-7.843...e-07"
 1850                             score="27.14" theta_0="-90.0" tolerance1="30.00"
 1851                             tolerance2="45.00" value="-90.0"/>
 1852                         ...
 1853                         </angleList>
 1854                         <histogram>
 1855                             <bin count="0"/>
 1856                              ...
 1857                         </histogram>
 1858                         <histogram_shifted>
 1859                             <bin count="0"/>
 1860                             ...
 1861                         </histogram_shifted>
 1862                     </torsionRule>
 1863                 ...
 1864              ...
 1865             </hierarchyClass>
 1866              <hierarchyClass id1="N" id2="C" name="NC">
 1867              ...
 1868             </hierarchyClass>
 1869             <hierarchyClass id1="S" id2="N" name="SN">
 1870             ...
 1871             </hierarchyClass>
 1872             <hierarchyClass id1="C" id2="S" name="CS">
 1873             ...
 1874             </hierarchyClass>
 1875             <hierarchyClass id1="C" id2="C" name="CC">
 1876             ...
 1877             </hierarchyClass>
 1878             <hierarchyClass id1="S" id2="S" name="SS">
 1879              ...
 1880             </hierarchyClass>
 1881         </library>
 1882         
 1883     The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern.
 1884     A custom SMARTS pattern may be optionally specified to detect rotatable bonds.
 1885     Each rotatable bond is matched to a torsion rule in the torsion strain energy library.
 1886     The strain energy is calculated for each rotatable bond using the calculation
 1887     method, 'exact' or 'approximate', associated with the matched torsion rule.
 1888     
 1889     The total strain energy (TEUs) of a molecule corresponds to the sum of  'exact' and
 1890     'approximate' strain energies calculated for all matched rotatable bonds in the
 1891     molecule. The total strain energy is set to 'NA' for molecules containing a 'approximate'
 1892     energy estimate for a torsion angle not observed in the torsion energy library. In
 1893     addition, the lower and upper bounds on energy at 95% confidence interval are
 1894     set to 'NA'.
 1895     
 1896     The following output files are generated after the filtering:
 1897         
 1898         <OutfileRoot>.sdf
 1899         <OutfileRoot>_Filtered.sdf
 1900         <OutfileRoot>_AlertsSummary.csv
 1901         <OutfileRoot>_Filtered_TopRule*.sdf
 1902         
 1903     The last two set of outfile files, <OutfileRoot>_AlertsSummary.csv and
 1904     <OutfileRoot>_<OutfileRoot>_AlertsSummary.csv, are only generated during filtering
 1905     by 'MaxSingleEnergy'.
 1906     
 1907     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 1908     
 1909     The supported output file formats are: SD (.sdf, .sd)
 1910 
 1911 Options:
 1912     -a, --alertsMode <TotalEnergy,...>  [default: TotalEnergy]
 1913         Torsion strain energy library alert types to use for filtering molecules
 1914         containing rotatable bonds based on the calculated values for the total
 1915         torsion strain energy of a molecule and  the maximum single strain
 1916         energy of a rotatable bond in a molecule.
 1917         
 1918         Possible values: TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy
 1919         
 1920         The strain energy cutoff values in terms of torsion energy units (TEUs) are
 1921         used to filter molecules as shown below:
 1922             
 1923             AlertsMode                AlertsEnergyCutoffs (TEUs)
 1924             
 1925             TotalEnergy               >= TotalEnergyCutoff
 1926             
 1927             MaxSingleEnergy           >= MaxSingleEnergyCutoff
 1928             
 1929             TotalOrMaxSingleEnergy    >= TotalEnergyCutoff
 1930                                       or >= MaxSingleEnergyCutoff
 1931         
 1932     --alertsMaxSingleEnergyCutoff <Number>  [default: 1.8]
 1933         Maximum single strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules
 1934         based on the maximum value of a single strain energy of a rotatable bond
 1935         in  a molecule. This option is used during 'MaxSingleEnergy' or
 1936         'TotalOrMaxSingleEnergy' values of '-a, --alertsMode' option.
 1937         
 1938         The maximum single strain energy must be greater than or equal to the
 1939         specified cutoff value for filtering molecules.
 1940     --alertsTotalEnergyCutoff <Number>  [default: 6.0]
 1941         Total strain strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules
 1942         based on total strain energy for all rotatable bonds in a molecule. This
 1943         option is used during 'TotalEnergy' or 'TotalOrMaxSingleEnergy'
 1944         values of '-a, --alertsMode' option.
 1945         
 1946         The total strain energy must be greater than or equal to the specified
 1947         cutoff value for filtering molecules.
 1948     --filterTorsionsNotObserved <yes or no>  [default: no]
 1949         Filter molecules containing torsion angles not observed in torsion strain
 1950         energy library. It's not possible to calculate torsion strain energies for
 1951         these torsions during 'approximate' match to a specified torsion in the
 1952         library.
 1953         
 1954         The 'approximate' strain energy calculation relies on the angle difference
 1955         between a torsion angle and the torsion peaks observed for the torsion
 1956         rules in the torsion energy library. The torsion angle is matched to a
 1957         torsion peak based on the value of torsion angle difference. It must be
 1958         less than or equal to the value for the second tolerance 'tolerance2'.
 1959         Otherwise, the torsion angle is not observed in the torsion energy library
 1960         and a value of 'NA' is assigned for torsion energy along with the lower and
 1961         upper bounds on energy at 95% confidence interval.
 1962     -e, --examples
 1963         Print examples.
 1964     -h, --help
 1965         Print this help message.
 1966     -i, --infile <infile>
 1967         Input file name.
 1968     --infileParams <Name,Value,...>  [default: auto]
 1969         A comma delimited list of parameter name and value pairs for reading
 1970         molecules from files. The supported parameter names for different file
 1971         formats, along with their default values, are shown below:
 1972             
 1973             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1974             
 1975     -l, --list
 1976         List torsion library information without performing any filtering.
 1977     -m, --mode <filter or count>  [default: filter]
 1978         Specify whether to filter molecules for torsion strain energy library [ Ref 153 ]
 1979         alerts by matching rotatable bonds against SMARTS patterns specified for
 1980         torsion rules to calculate torsion strain energies and write out the rest
 1981         of the molecules to an outfile or simply count the number of matched
 1982         molecules marked for filtering.
 1983     --mp <yes or no>  [default: no]
 1984         Use multiprocessing.
 1985          
 1986         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1987         function employing lazy RDKit data iterable. This allows processing of
 1988         arbitrary large data sets without any additional requirements memory.
 1989         
 1990         All input data may be optionally loaded into memory by mp.Pool.map()
 1991         before starting worker processes in a process pool by setting the value
 1992         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1993         
 1994         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1995         data mode may adversely impact the performance. The '--mpParams' section
 1996         provides additional information to tune the value of 'chunkSize'.
 1997     --mpParams <Name,Value,...>  [default: auto]
 1998         A comma delimited list of parameter name and value pairs to configure
 1999         multiprocessing.
 2000         
 2001         The supported parameter names along with their default and possible
 2002         values are shown below:
 2003         
 2004             chunkSize, auto
 2005             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 2006             numProcesses, auto   [ Default: mp.cpu_count() ]
 2007         
 2008         These parameters are used by the following functions to configure and
 2009         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 2010         mp.Pool.imap().
 2011         
 2012         The chunkSize determines chunks of input data passed to each worker
 2013         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 2014         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 2015         
 2016         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 2017         automatically converts RDKit data iterable into a list, loads all data into
 2018         memory, and calculates the default chunkSize using the following method
 2019         as shown in its code:
 2020         
 2021             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 2022             if extra: chunkSize += 1
 2023         
 2024         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 2025         and 100 data items.
 2026         
 2027         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 2028         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 2029         data into memory. Consequently, the size of input data is not known a priori.
 2030         It's not possible to estimate an optimal value for the chunkSize. The default 
 2031         chunkSize is set to 1.
 2032         
 2033         The default value for the chunkSize during 'Lazy' data mode may adversely
 2034         impact the performance due to the overhead associated with exchanging
 2035         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 2036         a larger value during 'Lazy' input data mode, based on the size of your input
 2037         data and number of processes in the process pool.
 2038         
 2039         The mp.Pool.map() function waits for all worker processes to process all
 2040         the data and return the results. The mp.Pool.imap() function, however,
 2041         returns the the results obtained from worker processes as soon as the
 2042         results become available for specified chunks of data.
 2043         
 2044         The order of data in the results returned by both mp.Pool.map() and 
 2045         mp.Pool.imap() functions always corresponds to the input data.
 2046     -o, --outfile <outfile>
 2047         Output file name.
 2048     --outfileAlerts <yes or no>  [default: yes]
 2049         Write out alerts information to SD output files.
 2050     --outfileAlertsMode <All or AlertsOnly>  [default: AlertsOnly]
 2051         Write alerts information to SD output files for all alerts or only for alerts
 2052         specified by '--AlertsMode' option. Possible values: All or AlertsOnly
 2053         This option is only valid for 'Yes' value of '--outfileAlerts' option.
 2054         
 2055         The following alerts information is added to SD output files using
 2056         'TorsionAlerts' data field:
 2057             
 2058             RotBondIndices TorsionIndices TorsionAngle
 2059             Energy EnergyLowerBoundCI EnergyUpperBoundCI
 2060             HierarchyClass HierarchySubClass TorsionRule
 2061             EnergyMethod AngleNotObserved MaxSingleEnergyAlert
 2062             
 2063         The following data filelds are added to SD output files based on the value of
 2064         '--AlertsMode' option:
 2065             
 2066             TotalEnergy
 2067             TotalEnergyLowerBoundCI
 2068             TotalEnergyUpperBoundCI
 2069             
 2070             MaxSingleEnergy
 2071             MaxSingleEnergyAlertsCount
 2072             
 2073             AnglesNotObservedCount
 2074             
 2075         The 'RotBondsCount' is always added to SD output files containing both
 2076         remaining and filtered molecules.
 2077         
 2078         Format:
 2079             
 2080             > <RotBondsCount>
 2081             Number
 2082             
 2083             > <TotalEnergy>
 2084             Number
 2085             
 2086             > <TotalEnergyLowerBoundCI>
 2087             Number
 2088             
 2089             > <TotalEnergyUpperBoundCI>
 2090             Number
 2091             
 2092             > <MaxSingleEnergy>
 2093             Number
 2094             
 2095             > <MaxSingleEnergyAlertsCount>
 2096             Number
 2097             
 2098             > <AnglesNotObservedCount>
 2099             Number
 2100             
 2101             > <TorsionAlerts (RotBondIndices TorsionIndices TorsionAngle
 2102                 Energy EnergyLowerBoundCI EnergyUpperBoundCI
 2103                 HierarchyClass HierarchySubClass TorsionRule
 2104                 EnergyMethod AngleNotObserved MaxSingleEnergyAlert)>
 2105             AtomIndex2,AtomIndex3  AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4
 2106             Angle  Energy EnergyLowerBoundCI EnergyUpperBoundCI
 2107             ClassName SubClassName SMARTS EnergyMethod Yes|No|NA Yes|No|NA
 2108             ... ... ...
 2109             ... ... ...
 2110             
 2111         A set of 12 values is written out as value of 'TorsionAlerts' data field for
 2112         each torsion in a molecule. The space character is used as a delimiter
 2113         to separate values with in a set and across set. The comma character
 2114         is used to delimit multiple values for each value in a set.
 2115         
 2116         The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited
 2117         values representing atom indices for a rotatable bond and the matched
 2118         torsion.
 2119         
 2120         The 'Energy' value is the estimated strain energy for the matched torsion.
 2121         The 'EnergyLowerBoundCI' and 'EnergyUpperBoundCI' represent lower and
 2122         bound energy estimates at 95% confidence interval. The 'EnergyMethod',
 2123         exact or approximate, corresponds to the method employed to estimate
 2124         torsion strain energy.
 2125         
 2126         The 'AngleNotObserved' is only valid for 'approximate' value of 'EnergyMethod'.
 2127         It has three possible values: Yes, No, or NA. The 'Yes' value indicates that
 2128         the 'TorsionAngle' is outside the 'tolerance2' of all peaks for the matched
 2129         torsion rule in the torsion library.
 2130         
 2131         The 'MaxSingleEnergyAlert' is valid for the following values of '-a, --alertsMode'
 2132         option: 'MaxSingleEnergy' or 'TotalOrMaxSingleEnergy'. It has three possible
 2133         values: Yes, No, or NA. It's set to 'NA' for 'Yes' or 'NA' values of
 2134         'AngleNotObserved'. The 'Yes' value indicates that the estimated torsion
 2135         energy is greater than the specified value for '--alertsMaxSingleEnergyCutoff'
 2136         option.
 2137         
 2138         For example:
 2139             
 2140             >  <RotBondsCount>  (1) 
 2141             14
 2142             
 2143             >  <TotalEnergy>  (1) 
 2144             6.8065
 2145             
 2146             >  <TotalEnergyLowerBoundCI>  (1) 
 2147             5.9340
 2148             
 2149             >  <TotalEnergyUpperBoundCI>  (1) 
 2150             NA
 2151             
 2152             >  <MaxSingleEnergy>  (1) 
 2153             1.7108
 2154             
 2155             >  <MaxSingleEnergyAlertsCount>  (1) 
 2156             0
 2157             
 2158             >  <AnglesNotObservedCount>  (1) 
 2159             0
 2160              
 2161             >  <TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy
 2162                 EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass
 2163                 HierarchySubClass TorsionRule EnergyMethod AngleNotObserved
 2164                 MaxSingleEnergyAlert)>  (1) 
 2165             2,1 48,2,1,0 61.90 0.0159 -0.0320 0.0674 CO Ether [O:1][CX4:2]!
 2166             @[O:3][CX4:4] Exact NA No 2,3 1,2,3,4 109.12 1.5640 1.1175 NA CC
 2167             None/[CX4][CX3] [O:1][CX4:2]!@[CX3:3]=[O:4] Exact NA No
 2168             ... ... ...
 2169             
 2170     --outfileFiltered <yes or no>  [default: yes]
 2171         Write out a file containing filtered molecules. Its name is automatically
 2172         generated from the specified output file. Default: <OutfileRoot>_
 2173         Filtered.<OutfileExt>.
 2174     --outfilesFilteredByRules <yes or no>  [default: auto]
 2175         Write out SD files containing filtered molecules for individual torsion
 2176         rules triggering alerts in molecules. The name of SD files are automatically
 2177         generated from the specified output file. Default file names: <OutfileRoot>_
 2178         Filtered_TopRule*.sdf.
 2179         
 2180         Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option';
 2181         otherwise, 'no'.
 2182         
 2183         The output files are only generated for 'MaxSingleEnergy' of
 2184         '-a, --alertsMode' option.
 2185         
 2186         The following alerts information is added to SD output files:
 2187             
 2188             > <RotBondsCount>
 2189             Number
 2190             
 2191             > <TotalEnergy>
 2192             Number
 2193             
 2194             > <TotalEnergyLowerBoundCI>
 2195             Number
 2196             
 2197             > <TotalEnergyUpperBoundCI>
 2198             Number
 2199             
 2200             > <MaxSingleEnergy>
 2201             Number
 2202             
 2203             > <MaxSingleEnergyAlertsCount>
 2204             Number
 2205             
 2206             > <AnglesNotObservedCount>
 2207             Number
 2208             
 2209             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionRule
 2210                 EnergyMethod)> 
 2211             ClassName SubClassName EnergyMethod SMARTS
 2212              ... ... ...
 2213             
 2214             > <TorsionRuleMaxSingleEnergyAlertsCount>
 2215             Number
 2216             
 2217             > <TorsionRuleAnglesNotObservedCount>
 2218             Number
 2219             
 2220             >  <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle
 2221                 Energy EnergyLowerBoundCI EnergyUpperBoundCI
 2222                 AngleNotObserved MaxSingleEnergyAlert)>
 2223             AtomIndex2,AtomIndex3  AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4
 2224             Angle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod
 2225             Yes|No|NA Yes|No|NA
 2226              ... ... ...
 2227             
 2228         For example:
 2229             
 2230             >  <RotBondsCount>  (1) 
 2231             8
 2232             
 2233             >  <TotalEnergy>  (1) 
 2234             6.1889
 2235             
 2236             >  <TotalEnergyLowerBoundCI>  (1) 
 2237             5.1940
 2238             
 2239             >  <TotalEnergyUpperBoundCI>  (1) 
 2240             NA
 2241             
 2242             >  <MaxSingleEnergy>  (1) 
 2243             1.9576
 2244             
 2245             >  <MaxSingleEnergyAlertsCount>  (1) 
 2246             1
 2247             
 2248             >  <AnglesNotObservedCount>  (1) 
 2249             0
 2250             
 2251             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionRule
 2252                 EnergyMethod)>  (1) 
 2253             CC None/[CX4:2][CX4:3] [!#1:1][CX4:2]!@[CX4:3][!#1:4] Exact
 2254             
 2255             >  <TorsionRuleMaxSingleEnergyAlertsCount>  (1) 
 2256             0
 2257             
 2258             >  <TorsionRuleAnglesNotObservedCount>  (1) 
 2259             0
 2260             
 2261             >  <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle
 2262                 Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved
 2263                MaxSingleEnergyAlert)>  (1) 
 2264             1,3 0,1,3,4 72.63 0.8946 0.8756 0.9145 NA No
 2265             
 2266     --outfilesFilteredByRulesMaxCount <All or number>  [default: 10]
 2267         Write out SD files containing filtered molecules for specified number of
 2268         top N torsion rules triggering alerts for the largest number of molecules
 2269         or for all torsion rules triggering alerts across all molecules.
 2270         
 2271         These output files are only generated for 'MaxSingleEnergy' value of
 2272         '-a, --alertsMode' option.
 2273     --outfileSummary <yes or no>  [default: auto]
 2274         Write out a CVS text file containing summary of torsions rules responsible
 2275         for triggering torsion alerts. Its name is automatically generated from the
 2276         specified output file. Default: <OutfileRoot>_AlertsSummary.csv.
 2277         
 2278         Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option';
 2279         otherwise, 'no'.
 2280         
 2281         The summary output file is only generated for 'MaxSingleEnergy' of
 2282         '-a, --alertsMode' option.
 2283         
 2284         The following alerts information is written to summary text file:
 2285             
 2286             TorsionRule, HierarchyClass, HierarchySubClass, EnergyMethod,
 2287             MaxSingleEnergyTorsionAlertTypes, MaxSingleEnergyTorsionAlertCount,
 2288             MaxSingleEnergyTorsionAlertMolCount
 2289              
 2290         The double quotes characters are removed from SMART patterns before
 2291         before writing them to a CSV file. In addition, the torsion rules are sorted by
 2292         TorsionAlertMolCount.
 2293     --outfileSDFieldLabels <Type,Label,...>  [default: auto]
 2294         A comma delimited list of SD data field type and label value pairs for writing
 2295         torsion alerts information along with molecules to SD files.
 2296         
 2297         The supported SD data field label type along with their default values are
 2298         shown below:
 2299             
 2300             For all SD files:
 2301             
 2302             RotBondsCountLabel, RotBondsCount,
 2303             
 2304             TotalEnergyLabel, TotalEnergy,
 2305             TotalEnergyLowerBoundCILabel, TotalEnergyLowerBoundCI,
 2306             TotalEnergyUpperBoundCILabel, TotalEnergyUpperBoundCI,
 2307             
 2308             MaxSingleEnergyLabel, MaxSingleEnergy,
 2309             MaxSingleEnergyAlertsCountLabel,
 2310                 MaxSingleEnergyAlertsCount
 2311             
 2312             AnglesNotObservedCountLabel,
 2313                 AnglesNotObservedCount
 2314             
 2315             TorsionAlertsLabel, TorsionAlerts(RotBondIndices TorsionIndices
 2316                 TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI
 2317                 HierarchyClass HierarchySubClass TorsionRule
 2318                 EnergyMethod AngleNotObserved)
 2319             
 2320             For individual SD files filtered by torsion rules:
 2321             
 2322             TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass
 2323                 EnergyMethod TorsionRule)
 2324             TorsionRuleMaxSingleEnergyAlertsCountLabel,
 2325                 TorsionRuleMaxSingleEnergyAlertsCount,
 2326             TorsionRuleAnglesNotObservedCountLabel,
 2327                 TorsionRuleAnglesNotObservedCount,
 2328             TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices
 2329                 TorsionIndices TorsionAngle Energy EnergyLowerBoundCI
 2330                 EnergyUpperBoundCI EnergyMethod AngleObserved)
 2331             
 2332     --outfileParams <Name,Value,...>  [default: auto]
 2333         A comma delimited list of parameter name and value pairs for writing
 2334         molecules to files. The supported parameter names for different file
 2335         formats, along with their default values, are shown below:
 2336             
 2337             SD: kekulize,yes,forceV3000,no
 2338             
 2339     --overwrite
 2340         Overwrite existing files.
 2341     --precision <number>  [default: 4]
 2342         Floating point precision for writing torsion strain energy values.
 2343     -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...>  [default: SemiStrict]
 2344         SMARTS pattern to use for identifying rotatable bonds in a molecule
 2345         for matching against torsion rules in the torsion library. Possible values:
 2346         NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches
 2347         are filtered to ensure that each atom in the rotatable bond is attached to
 2348         at least two heavy atoms.
 2349         
 2350         The following SMARTS patterns are used to identify rotatable bonds for
 2351         different modes:
 2352             
 2353             NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
 2354             
 2355             SemiStrict:
 2356             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 2357             &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 2358             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 2359             
 2360             Strict:
 2361             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 2362             &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
 2363             &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
 2364             &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 2365             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 2366             
 2367         The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 
 2368         'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
 2369         specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 
 2370         derived from 'Strict' SMARTS patterns for its usage in this script.
 2371         
 2372         You may use any arbitrary SMARTS pattern to identify rotatable bonds by
 2373         choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its
 2374         value via '--rotBondsSMARTSPattern' option.
 2375     --rotBondsSMARTSPattern <SMARTS>
 2376         SMARTS pattern for identifying rotatable bonds. This option is only valid
 2377         for 'Specify' value of '-r, --rotBondsSMARTSMode' option.
 2378     -t, --torsionEnergyLibraryFile <FileName or auto>  [default: auto]
 2379         Specify a XML file name containing data for torsion starin energy library
 2380         hierarchy or use default file, TorsionEnergyLibrary.xml, available in
 2381         MAYACHEMTOOLS/lib/data directory.
 2382         
 2383         The format of data in local XML file must match format of the data in Torsion
 2384         Library [ Ref 153 ] file available in MAYACHEMTOOLS data directory.
 2385     -w, --workingdir <dir>
 2386         Location of working directory which defaults to the current directory.
 2387 
 2388 Examples:
 2389     To filter molecules containing rotatable bonds with total strain energy value
 2390     of >= 6.0 (TEUs) based on torsion rules in the torsion energy library and write
 2391     write out SD files containing remaining and filtered molecules, type:
 2392 
 2393         % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf
 2394           -o Sample3DOut.sdf
 2395 
 2396     To filter molecules containing any rotatable bonds with strain energy value of
 2397     >= 1.8 (TEUs) based on torsion rules in the torsion energy library and write out
 2398     SD files containing remaining and filtered molecules, and individual SD files for
 2399     torsion rules triggering alerts along with appropriate torsion information for
 2400     red alerts, type:
 2401 
 2402         % RDKitFilterTorsionStrainEnergyAlerts.py -a MaxSingleEnergy
 2403           -i Sample3D.sdf -o Sample3DOut.sdf
 2404 
 2405     To filter molecules containing rotatable bonds with total strain energy value
 2406     of >= 6.0 (TEUs) or any single strain energy value of >= 1.8 (TEUs) and write out
 2407     SD files containing remaining and filtered molecules, type:
 2408 
 2409         % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy
 2410           -i Sample3D.sdf -o Sample3DOut.sdf
 2411 
 2412     To filter molecules containing rotatable bonds with specific cutoff values for
 2413     total or single torsion strain energy and write out SD files containing
 2414     remaining and filtered molecules, type:
 2415 
 2416         % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy
 2417           -i Sample3D.sdf -o Sample3DOut.sdf --alertsTotalEnergyCutoff 6.0
 2418           --alertsMaxSingleEnergyCutoff 1.8
 2419 
 2420     To run the first example for filtering molecules and writing out torsion
 2421     information for all alert types to SD files, type:
 2422 
 2423         % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf
 2424           -o Sample3DOut.sdf --outfileAlertsMode All
 2425 
 2426     To run the first example for filtering molecules in multiprocessing mode on
 2427     all available CPUs without loading all data into memory and write out SD files,
 2428     type:
 2429 
 2430         % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes -i Sample3D.sdf
 2431          -o Sample3DOut.sdf
 2432 
 2433     To run the first example for filtering molecules in multiprocessing mode on
 2434     all available CPUs by loading all data into memory and write out a SD files,
 2435     type:
 2436 
 2437         % RDKitFilterTorsionStrainEnergyAlerts.py  --mp yes --mpParams
 2438           "inputDataMode, InMemory" -i Sample3D.sdf  -o Sample3DOut.sdf
 2439 
 2440     To run the first example for filtering molecules in multiprocessing mode on
 2441     specific number of CPUs and chunksize without loading all data into memory
 2442     and write out SD files, type:
 2443 
 2444         % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes --mpParams
 2445           "inputDataMode,lazy,numProcesses,4,chunkSize,8"  -i Sample3D.sdf
 2446           -o Sample3DOut.sdf
 2447 
 2448     To list information about default torsion library file without performing any
 2449     filtering, type:
 2450 
 2451         % RDKitFilterTorsionStrainEnergyAlerts.py -l
 2452 
 2453     To list information about a local torsion library XML file without performing
 2454     any, filtering, type:
 2455 
 2456         % RDKitFilterTorsionStrainEnergyAlerts.py --torsionEnergyLibraryFile
 2457           TorsionStrainEnergyLibrary.xml -l
 2458 
 2459 Author:
 2460     Manish Sud (msud@san.rr.com)
 2461 
 2462 Collaborator:
 2463     Pat Walters
 2464 
 2465 See also:
 2466     RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionLibraryAlerts.py,
 2467     RDKitConvertFileFormat.py, RDKitSearchSMARTS.py
 2468 
 2469 Copyright:
 2470     Copyright (C) 2023 Manish Sud. All rights reserved.
 2471 
 2472     This script uses the torsion strain energy library developed by Gu, S.;
 2473     Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ].
 2474 
 2475     The torsion strain enegy library is based on the Torsion Library jointly
 2476     developed by the University of Hamburg, Center for Bioinformatics,
 2477     Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
 2478 
 2479     The functionality available in this script is implemented using RDKit, an
 2480     open source toolkit for cheminformatics developed by Greg Landrum.
 2481 
 2482     This file is part of MayaChemTools.
 2483 
 2484     MayaChemTools is free software; you can redistribute it and/or modify it under
 2485     the terms of the GNU Lesser General Public License as published by the Free
 2486     Software Foundation; either version 3 of the License, or (at your option) any
 2487     later version.
 2488 
 2489 """
 2490 
 2491 if __name__ == "__main__":
 2492     main()