MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4PerformConstrainedMinimization.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2021 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using Psi4, an
    9 # open source quantum chemistry software package, and RDKit, an open
   10 # source toolkit for cheminformatics developed by Greg Landrum.
   11 #
   12 # This file is part of MayaChemTools.
   13 #
   14 # MayaChemTools is free software; you can redistribute it and/or modify it under
   15 # the terms of the GNU Lesser General Public License as published by the Free
   16 # Software Foundation; either version 3 of the License, or (at your option) any
   17 # later version.
   18 #
   19 # MayaChemTools is distributed in the hope that it will be useful, but without
   20 # any warranty; without even the implied warranty of merchantability of fitness
   21 # for a particular purpose.  See the GNU Lesser General Public License for more
   22 # details.
   23 #
   24 # You should have received a copy of the GNU Lesser General Public License
   25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   27 # Boston, MA, 02111-1307, USA.
   28 #
   29 
   30 from __future__ import print_function
   31 
   32 # Add local python path to the global path and import standard library modules...
   33 import os
   34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   35 import time
   36 import re
   37 import shutil
   38 import multiprocessing as mp
   39 
   40 # Psi4 imports...
   41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None):
   42     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   43     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   44     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   45     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   46     sys.stderr.write("Psi4 environment and try again.\n\n")
   47 
   48 # RDKit imports...
   49 try:
   50     from rdkit import rdBase
   51     from rdkit import Chem
   52     from rdkit.Chem import AllChem, rdMolAlign
   53     from rdkit.Chem import rdFMCS
   54 except ImportError as ErrMsg:
   55     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   56     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   57     sys.exit(1)
   58 
   59 # MayaChemTools imports...
   60 try:
   61     from docopt import docopt
   62     import MiscUtil
   63     import Psi4Util
   64     import RDKitUtil
   65 except ImportError as ErrMsg:
   66     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   67     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   68     sys.exit(1)
   69 
   70 ScriptName = os.path.basename(sys.argv[0])
   71 Options = {}
   72 OptionsInfo = {}
   73 
   74 def main():
   75     """Start execution of the script"""
   76     
   77     MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
   78     
   79     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   80     
   81     # Retrieve command line arguments and options...
   82     RetrieveOptions()
   83     
   84     # Process and validate command line arguments and options...
   85     ProcessOptions()
   86     
   87     # Perform actions required by the script...
   88     PerformConstrainedMinimization()
   89     
   90     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   91     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   92 
   93 def PerformConstrainedMinimization():
   94     """Perform constrained minimization."""
   95     
   96     # Read and validate reference molecule...
   97     RefMol = RetrieveReferenceMolecule()
   98     
   99     # Setup a molecule reader for input file...
  100     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  101     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  102     
  103     # Set up a molecule writer...
  104     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  105     if Writer is None:
  106         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  107     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  108 
  109     MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(RefMol, Mols, Writer)
  110 
  111     if Writer is not None:
  112         Writer.close()
  113     
  114     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  115     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  116     MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
  117     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
  118     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  119     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount + WriteFailedCount))
  120 
  121 def ProcessMolecules(RefMol, Mols, Writer):
  122     """Process and minimize molecules. """
  123     
  124     if OptionsInfo["MPMode"]:
  125         return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
  126     else:
  127         return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
  128 
  129 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
  130     """Process and minimize molecules using a single process."""
  131 
  132     # Intialize Psi4...
  133     MiscUtil.PrintInfo("\nInitializing Psi4...")
  134     Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True)
  135     OptionsInfo["psi4"] = Psi4Handle
  136 
  137     # Setup max iterations global variable...
  138     Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]})
  139     
  140     # Setup conversion factor for energy units...
  141     SetupEnergyConversionFactor(Psi4Handle)
  142     
  143     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  144     
  145     for Mol in Mols:
  146         MolCount += 1
  147 
  148         if not CheckAndValidateMolecule(Mol, MolCount):
  149             continue
  150 
  151         # Setup 2D coordinates for SMILES input file...
  152         if OptionsInfo["SMILESInfileStatus"]:
  153             AllChem.Compute2DCoords(Mol)
  154         
  155         ValidMolCount += 1
  156         
  157         # Setup a reference molecule core containing common scaffold atoms...
  158         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  159         if RefMolCore is None:
  160             CoreScaffoldMissingCount += 1
  161             continue
  162         
  163         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolCount)
  164         
  165         if not CalcStatus:
  166             MinimizationFailedCount += 1
  167             continue
  168         
  169         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  170         
  171         if not WriteStatus:
  172             WriteFailedCount += 1
  173         
  174     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  175 
  176 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
  177     """Process and minimize molecules using multiprocessing."""
  178     
  179     if OptionsInfo["MPLevelConformersMode"]:
  180         return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer)
  181     elif OptionsInfo["MPLevelMoleculesMode"]:
  182         return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer)
  183     else:
  184         MiscUtil.PrintError("The value, %s,  option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"]))
  185         
  186 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer):
  187     """Process and minimize molecules using multiprocessing at molecules level."""
  188 
  189     MiscUtil.PrintInfo("\nPerforming constrained minimization with generation of conformers using multiprocessing at molecules level...")
  190     
  191     MPParams = OptionsInfo["MPParams"]
  192 
  193     # Setup data for initializing a worker process...
  194     OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
  195     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  196     
  197     # Setup a encoded mols data iterable for a worker process...
  198     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  199     
  200     # Setup process pool along with data initialization for each process...
  201     if not OptionsInfo["QuietMode"]:
  202         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  203         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  204     
  205     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  206     
  207     # Start processing...
  208     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  209         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  210     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  211         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  212     else:
  213         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  214     
  215     # Print out Psi4 version in the main process...
  216     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  217     Psi4Handle  = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False)
  218     OptionsInfo["psi4"] = Psi4Handle
  219 
  220     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  221 
  222     for Result in Results:
  223         MolCount += 1
  224         MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD  = Result
  225         
  226         if EncodedMol is None:
  227             continue
  228         ValidMolCount += 1
  229 
  230         if CoreScaffoldMissingStatus:
  231             CoreScaffoldMissingStatus += 1
  232             continue
  233         
  234         if not CalcStatus:
  235             MinimizationFailedCount += 1
  236             continue
  237             
  238         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  239         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  240         
  241         if not WriteStatus:
  242             WriteFailedCount += 1
  243     
  244     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  245     
  246 def InitializeWorkerProcess(*EncodedArgs):
  247     """Initialize data for a worker process."""
  248     
  249     global Options, OptionsInfo
  250 
  251     if not OptionsInfo["QuietMode"]:
  252         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  253     
  254     # Decode Options and OptionInfo...
  255     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  256     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  257     
  258     # Decode RefMol...
  259     OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
  260     
  261     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  262     # output files for each process...
  263     OptionsInfo["Psi4Initialized"]  = False
  264 
  265 def WorkerProcess(EncodedMolInfo):
  266     """Process data for a worker process."""
  267 
  268     if not OptionsInfo["Psi4Initialized"]:
  269         InitializePsi4ForWorkerProcess()
  270     
  271     MolIndex, EncodedMol = EncodedMolInfo
  272 
  273     MolNum = MolIndex + 1
  274     CoreScaffoldMissingStatus = False
  275     CalcStatus = False
  276     Energy = None
  277     ScaffoldEmbedRMSD = None
  278 
  279     if EncodedMol is None:
  280         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  281 
  282     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  283     if not CheckAndValidateMolecule(Mol, MolNum):
  284         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  285     
  286     # Setup 2D coordinates for SMILES input file...
  287     if OptionsInfo["SMILESInfileStatus"]:
  288         AllChem.Compute2DCoords(Mol)
  289         
  290     # Setup a reference molecule core containing common scaffold atoms...
  291     RefMolCore = SetupCoreScaffold(OptionsInfo["RefMol"], Mol, MolNum)
  292     if RefMolCore is None:
  293         CoreScaffoldMissingStatus = True
  294         return [MolIndex, EncodedMol, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD]
  295     
  296     Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(OptionsInfo["psi4"], Mol, RefMolCore, MolNum)
  297     
  298     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  299 
  300 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer):
  301     """Process and minimize molecules using multiprocessing at conformers level."""
  302 
  303     MiscUtil.PrintInfo("\nPerforming constrained minimization with generation of conformers using multiprocessing at conformers level...")
  304     
  305     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  306     
  307     for Mol in Mols:
  308         MolCount += 1
  309 
  310         if not CheckAndValidateMolecule(Mol, MolCount):
  311             continue
  312 
  313         # Setup 2D coordinates for SMILES input file...
  314         if OptionsInfo["SMILESInfileStatus"]:
  315             AllChem.Compute2DCoords(Mol)
  316         
  317         ValidMolCount += 1
  318         
  319         # Setup a reference molecule core containing common scaffold atoms...
  320         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  321         if RefMolCore is None:
  322             CoreScaffoldMissingCount += 1
  323             continue
  324 
  325         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolCount)
  326         
  327         if not CalcStatus:
  328             MinimizationFailedCount += 1
  329             continue
  330         
  331         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  332         
  333         if not WriteStatus:
  334             WriteFailedCount += 1
  335         
  336     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  337 
  338 def ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolNum):
  339     """Generate conformers and minimize them using multiple processes. """
  340     
  341     # Add hydrogens...
  342     Mol = Chem.AddHs(Mol, addCoords = True)
  343 
  344     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  345     
  346     # Setup constrained conformers...
  347     MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
  348     if not MolConfsStatus:
  349         if not OptionsInfo["QuietMode"]:
  350             MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName)
  351         return (Mol, False, None, None)
  352     
  353     MPParams = OptionsInfo["MPParams"]
  354 
  355     # Setup data for initializing a worker process...
  356     OptionsInfo["EncodedRefMolCore"] = RDKitUtil.MolToBase64EncodedMolString(RefMolCore)
  357     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  358 
  359     # Setup a encoded mols data iterable for a worker process...
  360     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(MolConfs)
  361     
  362     # Setup process pool along with data initialization for each process...
  363     if not OptionsInfo["QuietMode"]:
  364         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  365         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  366     
  367     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
  368     
  369     # Start processing...
  370     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  371         Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  372     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  373         Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  374     else:
  375         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  376 
  377     ConfNums = []
  378     MolConfsMap = {}
  379     CalcEnergyMap = {}
  380     CalcFailedCount = 0
  381     
  382     for Result in Results:
  383         ConfNum, EncodedMolConf, CalcStatus, Energy = Result
  384 
  385         if EncodedMolConf is None:
  386             CalcFailedCount += 1
  387             continue
  388         
  389         if not CalcStatus:
  390             CalcFailedCount += 1
  391             continue
  392         
  393         MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
  394         
  395         ConfNums.append(ConfNum)
  396         CalcEnergyMap[ConfNum] = Energy
  397         MolConfsMap[ConfNum] = MolConf
  398     
  399     if CalcFailedCount:
  400         return (Mol, False, None, None)
  401     
  402     SortedConfNums = sorted(ConfNums, key = lambda ConfNum: CalcEnergyMap[ConfNum])
  403     MinEnergyConfNum = SortedConfNums[0]
  404     
  405     MinEnergy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum])  if OptionsInfo["EnergyOut"] else None
  406     MinEnergyMolConf = MolConfsMap[MinEnergyConfNum]
  407     
  408     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None
  409     MinEnergyMolConf.ClearProp('EmbedRMS')
  410 
  411     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
  412 
  413 def InitializeConformerWorkerProcess(*EncodedArgs):
  414     """Initialize data for a conformer worker process."""
  415     
  416     global Options, OptionsInfo
  417 
  418     if not OptionsInfo["QuietMode"]:
  419         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  420     
  421     # Decode Options and OptionInfo...
  422     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  423     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  424     
  425     # Decode RefMol...
  426     OptionsInfo["RefMolCore"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMolCore"])
  427     
  428     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  429     # output files for each process...
  430     OptionsInfo["Psi4Initialized"]  = False
  431 
  432 def ConformerWorkerProcess(EncodedMolInfo):
  433     """Process data for a conformer worker process."""
  434 
  435     if not OptionsInfo["Psi4Initialized"]:
  436         InitializePsi4ForWorkerProcess()
  437     
  438     MolConfIndex, EncodedMolConf = EncodedMolInfo
  439 
  440     MolConfNum = MolConfIndex
  441     CalcStatus = False
  442     Energy = None
  443     
  444     if EncodedMolConf is None:
  445         return [MolConfIndex, None, CalcStatus, Energy]
  446     
  447     MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
  448     MolConfName = RDKitUtil.GetMolName(MolConf, MolConfNum)
  449 
  450     if not OptionsInfo["QuietMode"]:
  451         MiscUtil.PrintInfo("Processing molecule %s conformer number %s..." % (MolConfName, MolConfNum))
  452     
  453     # Perform Psi4 constrained minimization...
  454     CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], MolConf, OptionsInfo["RefMolCore"], MolConfNum)
  455     if not CalcStatus:
  456         if not OptionsInfo["QuietMode"]:
  457             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  458         return [MolConfIndex, EncodedMolConf, CalcStatus, Energy]
  459 
  460     return [MolConfIndex, RDKitUtil.MolToBase64EncodedMolString(MolConf, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, Energy]
  461     
  462 def InitializePsi4ForWorkerProcess():
  463     """Initialize Psi4 for a worker process."""
  464     
  465     if OptionsInfo["Psi4Initialized"]:
  466         return
  467 
  468     OptionsInfo["Psi4Initialized"] = True
  469 
  470     if OptionsInfo["MPLevelConformersMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I):
  471         # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
  472         OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
  473     else:
  474         # Update output file...
  475         OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid())
  476             
  477     # Intialize Psi4...
  478     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True)
  479     
  480     # Setup max iterations global variable...
  481     Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]})
  482     
  483     # Setup conversion factor for energy units...
  484     SetupEnergyConversionFactor(OptionsInfo["psi4"])
  485 
  486 def RetrieveReferenceMolecule():
  487     """Retrieve and validate reference molecule """
  488     
  489     RefFile = OptionsInfo["RefFile"]
  490     
  491     MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
  492     OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
  493     ValidRefMols, RefMolCount, ValidRefMolCount  = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"])
  494     
  495     if ValidRefMolCount == 0:
  496         MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
  497     elif ValidRefMolCount > 1:
  498         MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount))
  499     
  500     RefMol = ValidRefMols[0]
  501 
  502     if OptionsInfo["UseScaffoldSMARTS"]:
  503         ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  504         if ScaffoldPatternMol is None:
  505             MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"]))
  506         
  507         if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
  508             MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"]))
  509             
  510     return RefMol
  511 
  512 def SetupCoreScaffold(RefMol, Mol, MolCount):
  513     """Setup a reference molecule core containing common scaffold atoms between
  514     a pair of molecules."""
  515 
  516     if OptionsInfo["UseScaffoldMCS"]:
  517         return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
  518     elif OptionsInfo["UseScaffoldSMARTS"]:
  519         return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
  520     else:
  521         MiscUtil.PrintError("The  value, %s, specified for  \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"]))
  522         
  523 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
  524     """Setup a reference molecule core containing common scaffold atoms between
  525     a pair of molecules using MCS."""
  526     
  527     MCSParams = OptionsInfo["MCSParams"]
  528     Mols = [RefMol, Mol]
  529 
  530     MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 
  531     
  532     if MCSResultObject.canceled:
  533         if not OptionsInfo["QuietMode"]:
  534             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
  535         return None
  536     
  537     CoreNumAtoms = MCSResultObject.numAtoms
  538     CoreNumBonds = MCSResultObject.numBonds
  539     
  540     SMARTSCore = MCSResultObject.smartsString
  541     
  542     if not len(SMARTSCore):
  543         if not OptionsInfo["QuietMode"]:
  544             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
  545         return None
  546         
  547     if CoreNumAtoms < MCSParams["MinNumAtoms"]:
  548         if not OptionsInfo["QuietMode"]:
  549             MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in  \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"]))
  550         return None
  551     
  552     if CoreNumBonds < MCSParams["MinNumBonds"]:
  553         if not OptionsInfo["QuietMode"]:
  554             MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in  \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"]))
  555         return None
  556 
  557     return GenerateCoreMol(RefMol, SMARTSCore)
  558     
  559 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
  560     """Setup a reference molecule core containing common scaffold atoms between
  561     a pair of molecules using specified SMARTS."""
  562     
  563     if OptionsInfo["ScaffoldPatternMol"] is None:
  564         OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  565         
  566     if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
  567         if not OptionsInfo["QuietMode"]:
  568             MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule,  %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount)))
  569         return None
  570 
  571     return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
  572 
  573 def GenerateCoreMol(RefMol, SMARTSCore):
  574     """Generate core molecule for embedding. """
  575 
  576     # Create a molecule corresponding to core atoms...
  577     SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
  578 
  579     # Setup a ref molecule containing core atoms with dummy atoms as
  580     # attachment points for atoms around the core atoms...
  581     Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
  582 
  583     # Delete any substructures containing dummy atoms..
  584     RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*'))
  585     RefMolCore.UpdatePropertyCache()
  586     
  587     return RefMolCore
  588 
  589 def ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolNum = None):
  590     "Constrain and minimize molecule."
  591 
  592     # Add hydrogens...
  593     Mol = Chem.AddHs(Mol, addCoords = True)
  594 
  595     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  596     
  597     # Setup constrained conformers...
  598     MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
  599     if not MolConfsStatus:
  600         if not OptionsInfo["QuietMode"]:
  601             MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName)
  602         return (Mol, False, None, None)
  603 
  604     # Minimize conformers...
  605     ConfNums = []
  606     CalcEnergyMap = {}
  607     MolConfsMap = {}
  608     
  609     for ConfNum, MolConf in enumerate(MolConfs):
  610         if not OptionsInfo["QuietMode"]:
  611             MiscUtil.PrintInfo("\nProcessing molecule %s conformer number %s..." % (MolName, ConfNum))
  612             
  613         CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, MolConf, RefMolCore, MolNum)
  614         if not CalcStatus:
  615             if not OptionsInfo["QuietMode"]:
  616                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  617             return (Mol, False, None, None)
  618         
  619         ConfNums.append(ConfNum)
  620         CalcEnergyMap[ConfNum] = Energy
  621         MolConfsMap[ConfNum] = MolConf
  622     
  623     SortedConfNums = sorted(ConfNums, key = lambda ConfNum: CalcEnergyMap[ConfNum])
  624     MinEnergyConfNum = SortedConfNums[0]
  625     
  626     MinEnergy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum])  if OptionsInfo["EnergyOut"] else None
  627     MinEnergyMolConf = MolConfsMap[MinEnergyConfNum]
  628     
  629     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None
  630     MinEnergyMolConf.ClearProp('EmbedRMS')
  631     
  632     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
  633 
  634 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, MolNum, ConfID = -1):
  635     "Minimize molecule using Psi4."
  636 
  637     # Setup a list for constrained atoms...
  638     ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore)
  639     if len(ConstrainedAtomIndices) == 0:
  640         return (False, None)
  641 
  642     # Setup a Psi4Mol...
  643     Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
  644     if Psi4Mol is None:
  645         return (False, None)
  646         
  647     #  Setup reference wave function...
  648     Reference = SetupReferenceWavefunction(Mol)
  649     Psi4Handle.set_options({'Reference': Reference})
  650     
  651     # Setup method name and basis set...
  652     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  653 
  654     # Setup freeze list for constrained atoms...
  655     FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices]
  656     FreezeXYZString = " %s " % " ".join(FreezeXYZList)
  657     Psi4Handle.set_module_options("OPTKING", {"frozen_cartesian": FreezeXYZString})
  658     
  659     # Optimize geometry...
  660     Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"])
  661     
  662     if not Status:
  663         PerformPsi4Cleanup(Psi4Handle)
  664         return (False, None)
  665 
  666     # Update atom positions...
  667     AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True)
  668     RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID)
  669 
  670     # Convert energy units...
  671     if OptionsInfo["ApplyEnergyConversionFactor"]:
  672         Energy = Energy * OptionsInfo["EnergyConversionFactor"]
  673     
  674     # Clean up
  675     PerformPsi4Cleanup(Psi4Handle)
  676     
  677     return (True, Energy)
  678 
  679 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum = None):
  680     "Constrain, embed, and minimize molecule."
  681 
  682     # Setup forcefield function to use for constrained minimization...
  683     ForceFieldFunction = None
  684     ForceFieldName = None
  685     if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
  686         ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId)
  687         ForeceFieldName = "UFF"
  688     else:
  689         ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) , confId = confId)
  690         ForeceFieldName = "MMFF"
  691 
  692     if ForceFieldFunction is None:
  693         if not OptionsInfo["QuietMode"]:
  694             MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum)))
  695         return (None, False)
  696     
  697     MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
  698     EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
  699     UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
  700     UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
  701     UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"]
  702 
  703     MolConfs = []
  704     ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
  705     
  706     for ConfID in ConfIDs:
  707         try:
  708             MolConf = Chem.Mol(Mol)
  709             AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge)
  710         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException)  as ErrMsg:
  711             if not OptionsInfo["QuietMode"]:
  712                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
  713                 MiscUtil.PrintWarning("Constrained embedding couldn't  be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg))
  714             return (None, False)
  715         
  716         MolConfs.append(MolConf)
  717 
  718     return FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum)
  719 
  720 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum = None):
  721     """Filter contarained conformations by RMSD."""
  722     
  723     EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
  724     if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0:
  725         if not OptionsInfo["QuietMode"]:
  726             MiscUtil.PrintInfo("\nGenerating initial ensemble of  constrained conformations by distance geometry  and forcefield for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs)))
  727         return (MolConfs, True)
  728 
  729     FirstMolConf = True
  730     SelectedMolConfs = []
  731     for MolConfIndex, MolConf in enumerate(MolConfs):
  732         if FirstMolConf:
  733             FirstMolConf = False
  734             SelectedMolConfs.append(MolConf)
  735             continue
  736         
  737         # Compare RMSD against all selected conformers...
  738         ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf))
  739         IgnoreConf = False
  740         for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs):
  741             RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf))
  742             CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
  743             
  744             if CalcRMSD < EmbedRMSDCutoff:
  745                 IgnoreConf = True
  746                 break
  747         
  748         if IgnoreConf:
  749             continue
  750         
  751         SelectedMolConfs.append(MolConf)
  752         
  753     if not OptionsInfo["QuietMode"]:
  754         MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs)))
  755     
  756     return (SelectedMolConfs, True)
  757 
  758 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, ConstrainHydrogens = False):
  759     """Setup a list of atom indices to be constrained during Psi4 minimizaiton."""
  760 
  761     AtomIndices = []
  762     
  763     # Collect matched heavy atoms along with attached hydrogens...
  764     for AtomIndex in Mol.GetSubstructMatch(RefMolCore):
  765         Atom = Mol.GetAtomWithIdx(AtomIndex)
  766         if Atom.GetAtomicNum() == 1:
  767             continue
  768         
  769         AtomIndices.append(AtomIndex)
  770         for AtomNbr in Atom.GetNeighbors():
  771             if AtomNbr.GetAtomicNum() == 1:
  772                 if ConstrainHydrogens:
  773                     AtomNbrIndex = AtomNbr.GetIdx()
  774                     AtomIndices.append(AtomNbrIndex)
  775     
  776     AtomIndices = sorted(AtomIndices)
  777 
  778     # Atom indices start from 1 for Psi4 instead 0 for RDKit...
  779     AtomIndices = [ AtomIndex + 1 for AtomIndex in AtomIndices]
  780     
  781     return AtomIndices
  782     
  783 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1):
  784     """Setup a Psi4 molecule object."""
  785 
  786     # Turn off recentering and reorientation to perform optimization in the
  787     # constrained coordinate space...
  788     MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True)
  789 
  790     try:
  791         Psi4Mol = Psi4Handle.geometry(MolGeometry)
  792     except Exception as ErrMsg:
  793         Psi4Mol = None
  794         if not OptionsInfo["QuietMode"]:
  795             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
  796             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  797             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
  798 
  799     return Psi4Mol
  800 
  801 def PerformPsi4Cleanup(Psi4Handle):
  802     """Perform clean up. """
  803 
  804     # No need to perform any explicit clean by calling
  805     # Psi4Handle.core.opt_clean(). It's already done by Psi4 after
  806     # optimization...
  807     
  808     # Clean up any leftover scratch files...
  809     if OptionsInfo["MPMode"]:
  810         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
  811 
  812 def CheckAndValidateMolecule(Mol, MolCount = None):
  813     """Validate molecule for Psi4 calculations."""
  814     
  815     if Mol is None:
  816         if not OptionsInfo["QuietMode"]:
  817             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
  818         return False
  819     
  820     MolName = RDKitUtil.GetMolName(Mol, MolCount)
  821     if not OptionsInfo["QuietMode"]:
  822         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
  823     
  824     if RDKitUtil.IsMolEmpty(Mol):
  825         if not OptionsInfo["QuietMode"]:
  826             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
  827         return False
  828     
  829     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
  830         if not OptionsInfo["QuietMode"]:
  831             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
  832         return False
  833     
  834     return True
  835 
  836 def SetupMethodNameAndBasisSet(Mol):
  837     """Setup method name and basis set. """
  838     
  839     MethodName = OptionsInfo["MethodName"]
  840     if OptionsInfo["MethodNameAuto"]:
  841         MethodName = "B3LYP"
  842     
  843     BasisSet = OptionsInfo["BasisSet"]
  844     if OptionsInfo["BasisSetAuto"]:
  845         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
  846     
  847     return (MethodName, BasisSet)
  848 
  849 def SetupReferenceWavefunction(Mol):
  850     """Setup method name and basis set. """
  851     
  852     Reference = OptionsInfo["Reference"]
  853     if OptionsInfo["ReferenceAuto"]:
  854         Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF'
  855     
  856     return Reference
  857 
  858 def SetupEnergyConversionFactor(Psi4Handle):
  859     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
  860     
  861     EnergyUnits = OptionsInfo["EnergyUnits"]
  862     
  863     ApplyConversionFactor = True
  864     if re.match("^kcal\/mol$", EnergyUnits, re.I):
  865         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
  866     elif re.match("^kJ\/mol$", EnergyUnits, re.I):
  867         ConversionFactor = Psi4Handle.constants.hartree2kJmol
  868     elif re.match("^eV$", EnergyUnits, re.I):
  869         ConversionFactor = Psi4Handle.constants.hartree2ev
  870     else:
  871         ApplyConversionFactor = False
  872         ConversionFactor = 1.0
  873     
  874     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
  875     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
  876 
  877 def WriteMolecule(Writer, Mol, MolNum = None, Energy = None, ScaffoldEmbedRMSD = None, ConfID = None,):
  878     """Write molecule. """
  879 
  880     if ScaffoldEmbedRMSD is not None:
  881         Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD)
  882             
  883     if OptionsInfo["EnergyOut"]  and Energy is not None:
  884         Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
  885             
  886     try:
  887         if ConfID is None:
  888             Writer.write(Mol)
  889         else:
  890             Writer.write(Mol, confId = ConfID)
  891     except (ValueError, RuntimeError) as ErrMsg:
  892         if not OptionsInfo["QuietMode"]:
  893             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  894             MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
  895         return False
  896 
  897     return True
  898 
  899 def ProcessMCSParameters():
  900     """Set up and process MCS parameters."""
  901 
  902     SetupMCSParameters()
  903     ProcessSpecifiedMCSParameters()
  904 
  905 def SetupMCSParameters():
  906     """Set up default MCS parameters."""
  907     
  908     OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0}
  909     
  910 def ProcessSpecifiedMCSParameters():
  911     """Process specified MCS parameters."""
  912 
  913     if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
  914         # Nothing to process...
  915         return
  916     
  917     # Parse specified parameters...
  918     MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
  919     if not MCSParams:
  920         MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.")
  921 
  922     MCSParamsWords = MCSParams.split(",")
  923     if len(MCSParamsWords) % 2:
  924         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords)))
  925     
  926     # Setup  canonical parameter names...
  927     ValidParamNames = []
  928     CanonicalParamNamesMap = {}
  929     for ParamName in sorted(OptionsInfo["MCSParams"]):
  930         ValidParamNames.append(ParamName)
  931         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  932 
  933     # Validate and set paramater names and value...
  934     for Index in range(0, len(MCSParamsWords), 2):
  935         Name = MCSParamsWords[Index]
  936         Value = MCSParamsWords[Index + 1]
  937 
  938         CanonicalName = Name.lower()
  939         if  not CanonicalName in CanonicalParamNamesMap:
  940             MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name,  " ".join(ValidParamNames)))
  941 
  942         ParamName = CanonicalParamNamesMap[CanonicalName]
  943         if re.match("^Threshold$", ParamName, re.I):
  944             Value = float(Value)
  945             if Value <= 0.0 or Value > 1.0 :
  946                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name))
  947             ParamValue = Value
  948         elif re.match("^Timeout$", ParamName, re.I):
  949             Value = int(Value)
  950             if Value <= 0:
  951                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name))
  952             ParamValue = Value
  953         elif re.match("^MinNumAtoms$", ParamName, re.I):
  954             Value = int(Value)
  955             if Value < 1:
  956                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name))
  957             ParamValue = Value
  958         elif re.match("^MinNumBonds$", ParamName, re.I):
  959             Value = int(Value)
  960             if Value < 0:
  961                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name))
  962             ParamValue = Value
  963         elif re.match("^AtomCompare$", ParamName, re.I):
  964             if re.match("^CompareAny$", Value, re.I):
  965                 ParamValue = rdFMCS.AtomCompare.CompareAny
  966             elif re.match("^CompareElements$", Value, re.I):
  967                 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
  968             elif re.match("^CompareIsotopes$", Value, re.I):
  969                 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
  970             else:
  971                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name))
  972         elif re.match("^BondCompare$", ParamName, re.I):
  973             if re.match("^CompareAny$", Value, re.I):
  974                 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
  975             elif re.match("^CompareOrder$", Value, re.I):
  976                 ParamValue = rdFMCS.BondCompare.CompareOrder
  977             elif re.match("^CompareOrderExact$", Value, re.I):
  978                 ParamValue = rdFMCS.BondCompare.CompareOrderExact
  979             else:
  980                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name))
  981         elif re.match("^SeedSMARTS$", ParamName, re.I):
  982             if not len(Value):
  983                 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option  for parameter, %s, is empty. " % (Name))
  984             ParamValue = Value
  985         else:
  986             if not re.match("^(Yes|No|True|False)$", Value, re.I):
  987                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name))
  988             ParamValue = False
  989             if re.match("^(Yes|True)$", Value, re.I):
  990                 ParamValue = True
  991         
  992         # Set value...
  993         OptionsInfo["MCSParams"][ParamName] = ParamValue
  994 
  995 def ProcessOptions():
  996     """Process and validate command line arguments and options"""
  997     
  998     MiscUtil.PrintInfo("Processing options...")
  999     
 1000     # Validate options...
 1001     ValidateOptions()
 1002 
 1003     OptionsInfo["Infile"] = Options["--infile"]
 1004     OptionsInfo["SMILESInfileStatus"] = True if  MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
 1005     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1006     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
 1007 
 1008     OptionsInfo["RefFile"] = Options["--reffile"]
 1009 
 1010     OptionsInfo["Outfile"] = Options["--outfile"]
 1011     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 1012     
 1013     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1014     
 1015     OptionsInfo["Scaffold"] = Options["--scaffold"]
 1016     if re.match("^auto$", Options["--scaffold"], re.I):
 1017         UseScaffoldMCS = True
 1018         UseScaffoldSMARTS = False
 1019         ScaffoldSMARTS = None
 1020     else:
 1021         UseScaffoldMCS = False
 1022         UseScaffoldSMARTS = True
 1023         ScaffoldSMARTS = OptionsInfo["Scaffold"]
 1024     
 1025     OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
 1026     OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
 1027     OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
 1028     OptionsInfo["ScaffoldPatternMol"] = None
 1029 
 1030     OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
 1031     ProcessMCSParameters()
 1032     
 1033     OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
 1034     
 1035     # Method, basis set, and reference wavefunction...
 1036     OptionsInfo["BasisSet"] = Options["--basisSet"]
 1037     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
 1038     
 1039     OptionsInfo["MethodName"] = Options["--methodName"]
 1040     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
 1041     
 1042     OptionsInfo["Reference"] = Options["--reference"]
 1043     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
 1044     
 1045     # Run and options parameters...
 1046     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"])
 1047     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"])
 1048 
 1049     # Conformer generation paramaters...
 1050     ParamsDefaultInfoOverride = {"MaxConfs": 50}
 1051     OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride)
 1052     
 1053     # Write energy parameters...
 1054     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
 1055     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
 1056     
 1057     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
 1058     if re.match("^auto$", EnergyDataFieldLabel, re.I):
 1059         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
 1060     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
 1061     
 1062     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
 1063 
 1064     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1065     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1066 
 1067     # Multiprocessing level...
 1068     MPLevelMoleculesMode = False
 1069     MPLevelConformersMode = False
 1070     MPLevel = Options["--mpLevel"]
 1071     if re.match("^Molecules$", MPLevel, re.I):
 1072         MPLevelMoleculesMode = True
 1073     elif re.match("^Conformers$", MPLevel, re.I):
 1074         MPLevelConformersMode = True
 1075     else:
 1076         MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel)
 1077     OptionsInfo["MPLevel"] = MPLevel
 1078     OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
 1079     OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
 1080     
 1081     OptionsInfo["Precision"] = int(Options["--precision"])
 1082     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 1083 
 1084 def RetrieveOptions():
 1085     """Retrieve command line arguments and options"""
 1086     
 1087     # Get options...
 1088     global Options
 1089     Options = docopt(_docoptUsage_)
 1090 
 1091     # Set current working directory to the specified directory...
 1092     WorkingDir = Options["--workingdir"]
 1093     if WorkingDir:
 1094         os.chdir(WorkingDir)
 1095     
 1096     # Handle examples option...
 1097     if "--examples" in Options and Options["--examples"]:
 1098         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1099         sys.exit(0)
 1100 
 1101 def ValidateOptions():
 1102     """Validate option values"""
 1103 
 1104     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 1105     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
 1106     
 1107     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1108     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 1109     
 1110     MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
 1111     MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
 1112     
 1113     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
 1114     
 1115     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1116     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1117     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1118     
 1119     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1120     MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
 1121     
 1122     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 1123     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 1124     
 1125     MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
 1126     
 1127 # Setup a usage string for docopt...
 1128 _docoptUsage_ = """
 1129 Psi4PerformConstrainedMinimization.py - Perform constrained minimization
 1130 
 1131 Usage:
 1132     Psi4PerformConstrainedMinimization.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyOut <yes or no>]
 1133                                           [--energyDataFieldLabel <text>] [--energyUnits <text>] [--infileParams <Name,Value,...>]
 1134                                           [--maxIters <number>] [--methodName <text>] [--mcsParams <Name,Value,...>]
 1135                                           [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
 1136                                           [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
 1137                                           [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
 1138                                           [--quiet <yes or no>]  [--reference <text>] [--scaffold <auto or SMARTS>]
 1139                                           [--scaffoldRMSDOut <yes or no>] [-w <dir>] -i <infile> -r <reffile> -o <outfile> 
 1140     Psi4PerformConstrainedMinimization.py -h | --help | -e | --examples
 1141 
 1142 Description:
 1143     Generate 3D structures for molecules by performing a constrained energy
 1144     minimization against a reference molecule. The 3D structures for molecules
 1145     are generated using a combination of distance geometry and forcefield
 1146     minimization followed by geometry optimization using a quantum chemistry
 1147     method.
 1148 
 1149     An initial set of 3D conformers are generated for input molecules using
 1150     distance geometry. A common core scaffold, corresponding to a Maximum
 1151     Common Substructure (MCS) or an explicit SMARTS pattern,  is identified
 1152     between a pair of input and reference molecules. The core scaffold atoms in
 1153     input molecules are aligned against the same atoms in the reference molecule.
 1154     The energy of aligned structures are sequentially minimized using the forcefield
 1155     and a quantum chemistry method. The conformation with the lowest energy is
 1156     selected to represent the final structure.
 1157 
 1158     A Psi4 XYZ format geometry string is automatically generated for each molecule
 1159     in input file. It contains atom symbols and 3D coordinates for each atom in a
 1160     molecule. In addition, the formal charge and spin multiplicity are present in the
 1161     the geometry string. These values are either retrieved from molecule properties
 1162     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 1163     molecule.
 1164 
 1165     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 1166     .csv, .tsv, .txt)
 1167 
 1168     The supported output file formats are: SD (.sdf, .sd)
 1169 
 1170 Options:
 1171     -b, --basisSet <text>  [default: auto]
 1172         Basis set to use for constrained energy minimization. Default: 6-31+G**
 1173         for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 1174         value must be a valid Psi4 basis set. No validation is performed.
 1175         
 1176         The following list shows a representative sample of basis sets available
 1177         in Psi4:
 1178             
 1179             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 1180             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 1181             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 1182             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 1183             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 1184             
 1185     --confParams <Name,Value,...>  [default: auto]
 1186         A comma delimited list of parameter name and value pairs for generating
 1187         initial 3D coordinates for molecules in input file. A common core scaffold is
 1188         identified between a pair of input and reference molecules. The atoms in
 1189         common core scaffold of input molecules are aligned against the reference
 1190         molecule followed by constrained energy minimization using forcefield
 1191         available in RDKit. The 3D structures are subsequently constrained and 
 1192         minimized by a quantum chemistry method available in Psi4.
 1193         
 1194         The supported parameter names along with their default values are shown
 1195         below:
 1196             
 1197             confMethod,ETKDG,
 1198             forceField,MMFF, forceFieldMMFFVariant,MMFF94,
 1199             enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
 1200             useTethers,yes
 1201             
 1202             confMethod,ETKDG   [ Possible values: SDG, ETDG, KDG, ETKDG ]
 1203             forceField, MMFF   [ Possible values: UFF or MMFF ]
 1204             forceFieldMMFFVariant,MMFF94   [ Possible values: MMFF94 or MMFF94s ]
 1205             enforceChirality,yes   [ Possible values: yes or no ]
 1206             useTethers,yes   [ Possible values: yes or no ]
 1207             
 1208         confMethod: Conformation generation methodology for generating initial 3D
 1209         coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
 1210         Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
 1211         with Distance Geometry (KDG) and Experimental Torsion-angle preference
 1212         along with basic Knowledge-terms with Distance Geometry (ETKDG) [Ref 129] .
 1213         
 1214         forceField: Forcefield method to use for constrained energy minimization.
 1215         Possible values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular
 1216         Mechanics Force Field [ Ref 83-87 ] .
 1217         
 1218         enforceChirality: Enforce chirality for defined chiral centers during
 1219         forcefield minimization.
 1220         
 1221         maxConfs: Maximum number of conformations to generate for each molecule
 1222         during the generation of an initial 3D conformation ensemble using conformation
 1223         generation methodology. The conformations are constrained and minimized using
 1224         the specified forcefield and a quantum chemistry method. The lowest energy
 1225         conformation is written to the output file.
 1226         
 1227         embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded
 1228         using distance geometry and forcefield minimization. All embedded conformers
 1229         are kept for 'None' value. Otherwise, only those conformers which are different
 1230         from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first
 1231         embedded conformer is always retained.
 1232         
 1233         useTethers: Use tethers to optimize the final embedded conformation by
 1234         applying a series of extra forces to align matching atoms to the positions of
 1235         the core atoms. Otherwise, use simple distance constraints during the
 1236         optimization.
 1237     --energyOut <yes or no>  [default: yes]
 1238         Write out energy values.
 1239     --energyDataFieldLabel <text>  [default: auto]
 1240         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 1241     --energyUnits <text>  [default: kcal/mol]
 1242         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 1243     -e, --examples
 1244         Print examples.
 1245     -h, --help
 1246         Print this help message.
 1247     -i, --infile <infile>
 1248         Input file name.
 1249     --infileParams <Name,Value,...>  [default: auto]
 1250         A comma delimited list of parameter name and value pairs for reading
 1251         molecules from files. The supported parameter names for different file
 1252         formats, along with their default values, are shown below:
 1253             
 1254             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1255             
 1256             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1257                 smilesTitleLine,auto,sanitize,yes
 1258             
 1259         Possible values for smilesDelimiter: space, comma or tab.
 1260     --maxIters <number>  [default: 50]
 1261         Maximum number of iterations to perform for each molecule or conformer
 1262         during energy minimization by a quantum chemistry method.
 1263     -m, --methodName <text>  [default: auto]
 1264         Method to use for constrained energy minimization. Default: B3LYP [ Ref 150 ].
 1265         The specified value must be a valid Psi4 method name. No validation is
 1266         performed.
 1267         
 1268         The following list shows a representative sample of methods available
 1269         in Psi4:
 1270             
 1271             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 1272             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 1273             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 1274             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 1275             
 1276     --mcsParams <Name,Value,...>  [default: auto]
 1277         Parameter values to use for identifying a maximum common substructure
 1278         (MCS) in between a pair of reference and input molecules.In general, it is a
 1279         comma delimited list of parameter name and value pairs. The supported
 1280         parameter names along with their default values are shown below:
 1281             
 1282             atomCompare,CompareElements,bondCompare,CompareOrder,
 1283             maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
 1284             minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
 1285             completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
 1286             
 1287         Possible values for atomCompare: CompareAny, CompareElements,
 1288         CompareIsotopes. Possible values for bondCompare: CompareAny,
 1289         CompareOrder, CompareOrderExact.
 1290         
 1291         A brief description of MCS parameters taken from RDKit documentation is
 1292         as follows:
 1293             
 1294             atomCompare - Controls match between two atoms
 1295             bondCompare - Controls match between two bonds
 1296             maximizeBonds - Maximize number of bonds instead of atoms
 1297             matchValences - Include atom valences in the MCS match
 1298             matchChiralTag - Include atom chirality in the MCS match
 1299             minNumAtoms - Minimum number of atoms in the MCS match
 1300             minNumBonds - Minimum number of bonds in the MCS match
 1301             ringMatchesRingOnly - Ring bonds only match other ring bonds
 1302             completeRingsOnly - Partial rings not allowed during the match
 1303             threshold - Fraction of the dataset that must contain the MCS
 1304             seedSMARTS - SMARTS string as the seed of the MCS
 1305             timeout - Timeout for the MCS calculation in seconds
 1306             
 1307     --mp <yes or no>  [default: no]
 1308         Use multiprocessing.
 1309          
 1310         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1311         function employing lazy RDKit data iterable. This allows processing of
 1312         arbitrary large data sets without any additional requirements memory.
 1313         
 1314         All input data may be optionally loaded into memory by mp.Pool.map()
 1315         before starting worker processes in a process pool by setting the value
 1316         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1317         
 1318         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1319         data mode may adversely impact the performance. The '--mpParams' section
 1320         provides additional information to tune the value of 'chunkSize'.
 1321     --mpLevel <Molecules or Conformers>  [default: Molecules]
 1322         Perform multiprocessing at molecules or conformers level. Possible values:
 1323         Molecules or Conformers. The 'Molecules' value starts a process pool at the
 1324         molecules level. All conformers of a molecule are processed in a single
 1325         process. The 'Conformers' value, however, starts a process pool at the 
 1326         conformers level. Each conformer of a molecule is processed in an individual
 1327         process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
 1328         using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
 1329         a large number of Psi4 output files.
 1330     --mpParams <Name,Value,...>  [default: auto]
 1331         A comma delimited list of parameter name and value pairs to configure
 1332         multiprocessing.
 1333         
 1334         The supported parameter names along with their default and possible
 1335         values are shown below:
 1336         
 1337             chunkSize, auto
 1338             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1339             numProcesses, auto   [ Default: mp.cpu_count() ]
 1340         
 1341         These parameters are used by the following functions to configure and
 1342         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1343         mp.Pool.imap().
 1344         
 1345         The chunkSize determines chunks of input data passed to each worker
 1346         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1347         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1348         
 1349         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1350         automatically converts RDKit data iterable into a list, loads all data into
 1351         memory, and calculates the default chunkSize using the following method
 1352         as shown in its code:
 1353         
 1354             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1355             if extra: chunkSize += 1
 1356         
 1357         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1358         and 100 data items.
 1359         
 1360         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1361         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1362         data into memory. Consequently, the size of input data is not known a priori.
 1363         It's not possible to estimate an optimal value for the chunkSize. The default 
 1364         chunkSize is set to 1.
 1365         
 1366         The default value for the chunkSize during 'Lazy' data mode may adversely
 1367         impact the performance due to the overhead associated with exchanging
 1368         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1369         a larger value during 'Lazy' input data mode, based on the size of your input
 1370         data and number of processes in the process pool.
 1371         
 1372         The mp.Pool.map() function waits for all worker processes to process all
 1373         the data and return the results. The mp.Pool.imap() function, however,
 1374         returns the the results obtained from worker processes as soon as the
 1375         results become available for specified chunks of data.
 1376         
 1377         The order of data in the results returned by both mp.Pool.map() and 
 1378         mp.Pool.imap() functions always corresponds to the input data.
 1379     -o, --outfile <outfile>
 1380         Output file name.
 1381     --outfileParams <Name,Value,...>  [default: auto]
 1382         A comma delimited list of parameter name and value pairs for writing
 1383         molecules to files. The supported parameter names for different file
 1384         formats, along with their default values, are shown below:
 1385             
 1386             SD: kekulize,yes
 1387             
 1388     --overwrite
 1389         Overwrite existing files.
 1390     --precision <number>  [default: 6]
 1391         Floating point precision for writing energy values.
 1392     --psi4OptionsParams <Name,Value,...>  [default: none]
 1393         A comma delimited list of Psi4 option name and value pairs for setting
 1394         global and module options. The names are 'option_name' for global options
 1395         and 'module_name__option_name' for options local to a module. The
 1396         specified option names must be valid Psi4 names. No validation is
 1397         performed.
 1398         
 1399         The specified option name and  value pairs are processed and passed to
 1400         psi4.set_options() as a dictionary. The supported value types are float,
 1401         integer, boolean, or string. The float value string is converted into a float.
 1402         The valid values for a boolean string are yes, no, true, false, on, or off. 
 1403     --psi4RunParams <Name,Value,...>  [default: auto]
 1404         A comma delimited list of parameter name and value pairs for configuring
 1405         Psi4 jobs.
 1406         
 1407         The supported parameter names along with their default and possible
 1408         values are shown below:
 1409              
 1410             MemoryInGB, 1
 1411             NumThreads, 1
 1412             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1413             ScratchDir, auto   [ Possivle values: DirName]
 1414             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1415             
 1416         These parameters control the runtime behavior of Psi4.
 1417         
 1418         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1419         is appended to output file name during multiprocessing as shown:
 1420         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1421         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1422         all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 
 1423         'Conformers' of '--mpLevel' option.
 1424         
 1425         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1426         of any existing Psi4 before creating new files to append output from
 1427         multiple Psi4 runs.
 1428         
 1429         The option 'ScratchDir' is a directory path to the location of scratch
 1430         files. The default value corresponds to Psi4 default. It may be used to
 1431         override the deafult path.
 1432     -q, --quiet <yes or no>  [default: no]
 1433         Use quiet mode. The warning and information messages will not be printed.
 1434     -r, --reffile <reffile>
 1435         Reference input file name containing a 3D reference molecule. A common
 1436         core scaffold must be present in a pair of an input and reference molecules.
 1437         Otherwise, no constrained minimization is performed on the input molecule.
 1438     --reference <text>  [default: auto]
 1439         Reference wave function to use for energy calculation. Default: RHF or UHF.
 1440         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 1441         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 1442         molecules with unpaired electrons.
 1443         
 1444         The specified value must be a valid Psi4 reference wave function. No validation
 1445         is performed. For example: ROHF, CUHF, RKS, etc.
 1446         
 1447         The spin multiplicity determines the default value of reference wave function
 1448         for input molecules. It is calculated from number of free radical electrons using
 1449         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1450         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1451         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1452         over the calculated value of spin multiplicity.
 1453     -s, --scaffold <auto or SMARTS>  [default: auto]
 1454         Common core scaffold between a pair of input and reference molecules used for
 1455         constrained minimization of molecules in input file. Possible values: Auto or a
 1456         valid SMARTS pattern. The common core scaffold is automatically detected
 1457         corresponding to the Maximum Common Substructure (MCS) between a pair of
 1458         reference and input molecules. A valid SMARTS pattern may be optionally specified
 1459         for the common core scaffold.
 1460     --scaffoldRMSDOut <yes or no>  [default: No]
 1461         Write out RMSD value for common core alignment between a pair of input and
 1462         reference molecules.
 1463     -w, --workingdir <dir>
 1464         Location of working directory which defaults to the current directory.
 1465 
 1466 Examples:
 1467     To perform constrained energy minimization for molecules in a SMILES file against
 1468     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 1469     input and reference molecules identified using MCS, generating up to 50 conformations
 1470     using ETKDG methodology followed by initial MMFF forcefield minimization and final
 1471     energy minimization using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and
 1472     sulfur containing molecules, and write out a SD file containing minimum energy
 1473     structure corresponding to each constrained molecule, type:
 1474 
 1475        %  Psi4PerformConstrainedMinimization.py  -i Psi4SampleAlkanes.smi
 1476           -r Psi4SampleEthane3D.sdf  -o Psi4SampleAlkanesOut.sdf
 1477 
 1478     To run the first example in a quiet mode and write out a SD file, type:
 1479 
 1480        %  Psi4PerformConstrainedMinimization.py  -q yes
 1481           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1482           -o Psi4SampleAlkanesOut.sdf
 1483 
 1484     To run the first example in multiprocessing mode at molecules level on all
 1485     available CPUs without loading all data into memory and write out a SD file,
 1486     type:
 1487 
 1488        %  Psi4PerformConstrainedMinimization.py  --mp yes
 1489           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1490           -o Psi4SampleAlkanesOut.sdf
 1491 
 1492     To run the first example in multiprocessing mode at conformers level on all
 1493     available CPUs without loading all data into memory and write out a SD file,
 1494     type:
 1495 
 1496        %  Psi4PerformConstrainedMinimization.py  --mp yes --mpLevel Conformers
 1497           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1498           -o Psi4SampleAlkanesOut.sdf
 1499 
 1500     To run the first example in multiprocessing mode at molecules level on all
 1501     available CPUs by loading all data into memory and write out a SD file, type:
 1502 
 1503        %  Psi4PerformConstrainedMinimization.py  --mp yes --mpParams
 1504           "inputDataMode,Lazy,numProcesses,4,chunkSize,8"
 1505           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1506           -o Psi4SampleAlkanesOut.sdf
 1507 
 1508     To rerun the first example using an explicit SMARTS string for a common core
 1509     scaffold and write out a SD file, type:
 1510 
 1511        %  Psi4PerformConstrainedMinimization.py  --scaffold "CC"
 1512           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1513           -o Psi4SampleAlkanesOut.sdf
 1514 
 1515     To run the first example using a specific set of parameters for generating
 1516     an initial set of conformers followed by energy minimization using forcefield
 1517     and a quantum chemistry method and write out a SD file type:
 1518 
 1519        %  Psi4PerformConstrainedMinimization.py  --confParams "
 1520           confMethod,ETKDG,forceField,MMFF, forceFieldMMFFVariant,MMFF94s,
 1521           maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
 1522           -b "6-31+G**" --maxIters 20 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1523           -o Psi4SampleAlkanesOut.sdf
 1524 
 1525     To run the first example for molecules in a CSV SMILES file, SMILES strings
 1526     in column 1, name column 2, and write out a SD file, type:
 1527 
 1528        %  Psi4PerformConstrainedMinimization.py  --infileParams
 1529           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 1530            smilesNameColumn,2" -i Psi4SampleAlkanes.csv
 1531            -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
 1532 
 1533 Author:
 1534 
 1535     Manish Sud(msud@san.rr.com)
 1536 
 1537 See also:
 1538     Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py,
 1539     Psi4GenerateConstrainedConformers.py, Psi4PerformMinimization.py
 1540 
 1541 Copyright:
 1542     Copyright (C) 2021 Manish Sud. All rights reserved.
 1543 
 1544     The functionality available in this script is implemented using Psi4, an
 1545     open source quantum chemistry software package, and RDKit, an open
 1546     source toolkit for cheminformatics developed by Greg Landrum.
 1547 
 1548     This file is part of MayaChemTools.
 1549 
 1550     MayaChemTools is free software; you can redistribute it and/or modify it under
 1551     the terms of the GNU Lesser General Public License as published by the Free
 1552     Software Foundation; either version 3 of the License, or (at your option) any
 1553     later version.
 1554 
 1555 """
 1556 
 1557 if __name__ == "__main__":
 1558     main()