1 #!/bin/env python 2 # 3 # File: Psi4PerformTorsionScan.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using Psi4, an 9 # open source quantum chemistry software package, and RDKit, an open 10 # source toolkit for cheminformatics developed by Greg Landrum. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 # 30 31 from __future__ import print_function 32 33 # Add local python path to the global path and import standard library modules... 34 import os 35 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 36 import time 37 import re 38 import glob 39 import shutil 40 import multiprocessing as mp 41 42 import matplotlib.pyplot as plt 43 import seaborn as sns 44 45 # Psi4 imports... 46 if (hasattr(shutil, 'which') and shutil.which("psi4") is None): 47 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 48 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 49 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 50 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 51 sys.stderr.write("Psi4 environment and try again.\n\n") 52 53 # RDKit imports... 54 try: 55 from rdkit import rdBase 56 from rdkit import Chem 57 from rdkit.Chem import AllChem 58 from rdkit.Chem import rdMolAlign 59 from rdkit.Chem import rdMolTransforms 60 except ImportError as ErrMsg: 61 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 62 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 63 sys.exit(1) 64 65 # MayaChemTools imports... 66 try: 67 from docopt import docopt 68 import MiscUtil 69 import Psi4Util 70 import RDKitUtil 71 except ImportError as ErrMsg: 72 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 73 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 74 sys.exit(1) 75 76 ScriptName = os.path.basename(sys.argv[0]) 77 Options = {} 78 OptionsInfo = {} 79 80 def main(): 81 """Start execution of the script.""" 82 83 MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 84 85 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 86 87 # Retrieve command line arguments and options... 88 RetrieveOptions() 89 90 # Process and validate command line arguments and options... 91 ProcessOptions() 92 93 # Perform actions required by the script... 94 PerformTorsionScan() 95 96 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 97 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 98 99 def PerformTorsionScan(): 100 """Perform torsion scan.""" 101 102 # Setup a molecule reader for input file... 103 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 104 OptionsInfo["InfileParams"]["AllowEmptyMols"] = True 105 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 106 107 PlotExt = OptionsInfo["OutPlotParams"]["OutExt"] 108 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 109 MiscUtil.PrintInfo("Generating output files %s_*.sdf, %s_*Torsion*Match*.sdf, %s_*Torsion*Match*Energies.csv, %s_*Torsion*Match*Plot.%s..." % (FileName, FileName, FileName, FileName, PlotExt)) 110 111 MolCount, ValidMolCount, MinimizationFailedCount, TorsionsMissingCount, TorsionsScanFailedCount = ProcessMolecules(Mols) 112 113 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 114 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 115 MiscUtil.PrintInfo("Number of molecules failed during initial minimization: %d" % MinimizationFailedCount) 116 MiscUtil.PrintInfo("Number of molecules without any matched torsions: %d" % TorsionsMissingCount) 117 MiscUtil.PrintInfo("Number of molecules failed during torsion scan: %d" % TorsionsScanFailedCount) 118 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + TorsionsMissingCount + MinimizationFailedCount + TorsionsScanFailedCount)) 119 120 def ProcessMolecules(Mols): 121 """Process molecules to perform torsion scan.""" 122 123 if OptionsInfo["MPMode"]: 124 return ProcessMoleculesUsingMultipleProcesses(Mols) 125 else: 126 return ProcessMoleculesUsingSingleProcess(Mols) 127 128 def ProcessMoleculesUsingSingleProcess(Mols): 129 """Process molecules to perform torsion scan using a single process.""" 130 131 # Intialize Psi4... 132 MiscUtil.PrintInfo("\nInitializing Psi4...") 133 Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True) 134 OptionsInfo["psi4"] = Psi4Handle 135 136 # Setup max iterations global variable... 137 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 138 139 # Setup conversion factor for energy units... 140 SetupEnergyConversionFactor(Psi4Handle) 141 142 MolInfoText = "first molecule" 143 if not OptionsInfo["FirstMolMode"]: 144 MolInfoText = "all molecules" 145 146 if OptionsInfo["TorsionMinimize"]: 147 MiscUtil.PrintInfo("\nPerforming torsion scan on %s by generating conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 148 else: 149 MiscUtil.PrintInfo("\nPerforming torsion scan on %s by skipping generation of conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 150 151 SetupTorsionsPatternsInfo() 152 153 (MolCount, ValidMolCount, TorsionsMissingCount, MinimizationFailedCount, TorsionsScanFailedCount) = [0] * 5 154 155 for Mol in Mols: 156 MolCount += 1 157 158 if OptionsInfo["FirstMolMode"] and MolCount > 1: 159 MolCount -= 1 160 break 161 162 if not CheckAndValidateMolecule(Mol, MolCount): 163 continue 164 165 # Setup 2D coordinates for SMILES input file... 166 if OptionsInfo["SMILESInfileStatus"]: 167 AllChem.Compute2DCoords(Mol) 168 169 ValidMolCount += 1 170 171 Mol, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus = PerformMinimizationAndTorsionScan(Mol, MolCount) 172 173 if not MinimizationCalcStatus: 174 MinimizationFailedCount += 1 175 continue 176 177 if not TorsionsMatchStatus: 178 TorsionsMissingCount += 1 179 continue 180 181 if not TorsionsScanCalcStatus: 182 TorsionsScanFailedCount += 1 183 continue 184 185 return (MolCount, ValidMolCount, MinimizationFailedCount, TorsionsMissingCount, TorsionsScanFailedCount) 186 187 def ProcessMoleculesUsingMultipleProcesses(Mols): 188 """Process and minimize molecules using multiprocessing.""" 189 190 if OptionsInfo["MPLevelTorsionAnglesMode"]: 191 return ProcessMoleculesUsingMultipleProcessesAtTorsionAnglesLevel(Mols) 192 elif OptionsInfo["MPLevelMoleculesMode"]: 193 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols) 194 else: 195 MiscUtil.PrintError("The value, %s, option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"])) 196 197 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols): 198 """Process molecules to perform torsion scan using multiprocessing at molecules level.""" 199 200 MolInfoText = "first molecule" 201 if not OptionsInfo["FirstMolMode"]: 202 MolInfoText = "all molecules" 203 204 if OptionsInfo["TorsionMinimize"]: 205 MiscUtil.PrintInfo("\nPerforming torsion scan on %s using multiprocessing at molecules level by generating conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 206 else: 207 MiscUtil.PrintInfo("\nPerforming torsion scan %s using multiprocessing at molecules level by skipping generation of conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 208 209 MPParams = OptionsInfo["MPParams"] 210 211 # Setup data for initializing a worker process... 212 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 213 214 if OptionsInfo["FirstMolMode"]: 215 Mol = Mols[0] 216 Mols = [Mol] 217 218 # Setup a encoded mols data iterable for a worker process... 219 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 220 221 # Setup process pool along with data initialization for each process... 222 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 223 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 224 225 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 226 227 # Start processing... 228 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 229 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 230 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 231 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 232 else: 233 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 234 235 (MolCount, ValidMolCount, TorsionsMissingCount, MinimizationFailedCount, TorsionsScanFailedCount) = [0] * 5 236 237 for Result in Results: 238 MolCount += 1 239 240 MolIndex, EncodedMol, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus = Result 241 242 if EncodedMol is None: 243 continue 244 ValidMolCount += 1 245 246 if not MinimizationCalcStatus: 247 MinimizationFailedCount += 1 248 continue 249 250 if not TorsionsMatchStatus: 251 TorsionsMissingCount += 1 252 continue 253 254 if not TorsionsScanCalcStatus: 255 TorsionsScanFailedCount += 1 256 continue 257 258 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 259 260 return (MolCount, ValidMolCount, MinimizationFailedCount, TorsionsMissingCount, TorsionsScanFailedCount) 261 262 def InitializeWorkerProcess(*EncodedArgs): 263 """Initialize data for a worker process.""" 264 265 global Options, OptionsInfo 266 267 if not OptionsInfo["QuietMode"]: 268 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 269 270 # Decode Options and OptionInfo... 271 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 272 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 273 274 # Initialize torsion patterns info... 275 SetupTorsionsPatternsInfo() 276 277 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 278 # output files for each process... 279 OptionsInfo["Psi4Initialized"] = False 280 281 def WorkerProcess(EncodedMolInfo): 282 """Process data for a worker process.""" 283 284 if not OptionsInfo["Psi4Initialized"]: 285 InitializePsi4ForWorkerProcess() 286 287 MolIndex, EncodedMol = EncodedMolInfo 288 289 MolNum = MolIndex + 1 290 (MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus) = [False] * 3 291 292 if EncodedMol is None: 293 return [MolIndex, None, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus] 294 295 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 296 if not CheckAndValidateMolecule(Mol, MolNum): 297 return [MolIndex, None, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus] 298 299 # Setup 2D coordinates for SMILES input file... 300 if OptionsInfo["SMILESInfileStatus"]: 301 AllChem.Compute2DCoords(Mol) 302 303 Mol, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus = PerformMinimizationAndTorsionScan(Mol, MolNum) 304 305 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus] 306 307 def ProcessMoleculesUsingMultipleProcessesAtTorsionAnglesLevel(Mols): 308 """Process molecules to perform torsion scan using multiprocessing at torsion angles level.""" 309 310 MolInfoText = "first molecule" 311 if not OptionsInfo["FirstMolMode"]: 312 MolInfoText = "all molecules" 313 314 if OptionsInfo["TorsionMinimize"]: 315 MiscUtil.PrintInfo("\nPerforming torsion scan on %s using multiprocessing at torsion angles level by generating conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 316 else: 317 MiscUtil.PrintInfo("\nPerforming torsion scan %s using multiprocessing at torsion angles level by skipping generation of conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 318 319 SetupTorsionsPatternsInfo() 320 321 (MolCount, ValidMolCount, TorsionsMissingCount, MinimizationFailedCount, TorsionsScanFailedCount) = [0] * 5 322 323 for Mol in Mols: 324 MolCount += 1 325 326 if OptionsInfo["FirstMolMode"] and MolCount > 1: 327 MolCount -= 1 328 break 329 330 if not CheckAndValidateMolecule(Mol, MolCount): 331 continue 332 333 # Setup 2D coordinates for SMILES input file... 334 if OptionsInfo["SMILESInfileStatus"]: 335 AllChem.Compute2DCoords(Mol) 336 337 ValidMolCount += 1 338 339 Mol, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus = PerformMinimizationAndTorsionScan(Mol, MolCount, UseMultiProcessingAtTorsionAnglesLevel = True) 340 341 if not MinimizationCalcStatus: 342 MinimizationFailedCount += 1 343 continue 344 345 if not TorsionsMatchStatus: 346 TorsionsMissingCount += 1 347 continue 348 349 if not TorsionsScanCalcStatus: 350 TorsionsScanFailedCount += 1 351 continue 352 353 return (MolCount, ValidMolCount, MinimizationFailedCount, TorsionsMissingCount, TorsionsScanFailedCount) 354 355 def ScanSingleTorsionInMolUsingMultipleProcessesAtTorsionAnglesLevel(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum): 356 """Perform torsion scan for a molecule using multiple processses at torsion angles 357 level along with constrained energy minimization. 358 """ 359 360 if OptionsInfo["MPLevelMoleculesMode"]: 361 MiscUtil.PrintError("Single torison scanning for a molecule is not allowed in multiprocessing mode at molecules level.\n") 362 363 Mols, Angles = SetupMolsForSingleTorsionScanInMol(Mol, TorsionMatches, MolNum) 364 365 MPParams = OptionsInfo["MPParams"] 366 367 # Setup data for initializing a worker process... 368 369 # Track and avoid encoding TorsionsPatternsInfo as it contains RDKit molecule object... 370 TorsionsPatternsInfo = OptionsInfo["TorsionsPatternsInfo"] 371 OptionsInfo["TorsionsPatternsInfo"] = None 372 373 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 374 375 # Restore TorsionsPatternsInfo... 376 OptionsInfo["TorsionsPatternsInfo"] = TorsionsPatternsInfo 377 378 # Setup a encoded mols data iterable for a worker process... 379 WorkerProcessDataIterable = GenerateBase64EncodedMolStringsWithTorsionScanInfo(Mol, (MolNum -1), Mols, Angles, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches) 380 381 # Setup process pool along with data initialization for each process... 382 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 383 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 384 385 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeTorsionAngleWorkerProcess, InitializeWorkerProcessArgs) 386 387 # Start processing... 388 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 389 Results = ProcessPool.imap(TorsionAngleWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 390 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 391 Results = ProcessPool.map(TorsionAngleWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 392 else: 393 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 394 395 TorsionMols = [] 396 TorsionEnergies = [] 397 TorsionAngles = [] 398 399 for Result in Results: 400 EncodedTorsionMol, CalcStatus, Angle, Energy = Result 401 402 if not CalcStatus: 403 return (Mol, False, None, None, None) 404 405 if EncodedTorsionMol is None: 406 return (Mol, False, None, None, None) 407 TorsionMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedTorsionMol) 408 409 TorsionMols.append(TorsionMol) 410 TorsionEnergies.append(Energy) 411 TorsionAngles.append(Angle) 412 413 return (Mol, True, TorsionMols, TorsionEnergies, TorsionAngles) 414 415 def InitializeTorsionAngleWorkerProcess(*EncodedArgs): 416 """Initialize data for a worker process.""" 417 418 global Options, OptionsInfo 419 420 if not OptionsInfo["QuietMode"]: 421 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 422 423 # Decode Options and OptionInfo... 424 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 425 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 426 427 # Initialize torsion patterns info... 428 SetupTorsionsPatternsInfo() 429 430 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 431 # output files for each process... 432 OptionsInfo["Psi4Initialized"] = False 433 434 def TorsionAngleWorkerProcess(EncodedMolInfo): 435 """Process data for a worker process.""" 436 437 if not OptionsInfo["Psi4Initialized"]: 438 InitializePsi4ForWorkerProcess() 439 440 MolIndex, EncodedMol, EncodedTorsionMol, TorsionAngle, TorsionID, TorsionPattern, EncodedTorsionPatternMol, TorsionMatches = EncodedMolInfo 441 442 if EncodedMol is None or EncodedTorsionMol is None or EncodedTorsionPatternMol is None: 443 return (None, False, None, None) 444 445 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 446 TorsionMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedTorsionMol) 447 TorsionPatternMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedTorsionPatternMol) 448 449 TorsionMol, CalcStatus, Energy = MinimizeCalculateEnergyForTorsionMol(Mol, TorsionMol, TorsionAngle, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, (MolIndex + 1)) 450 451 return (RDKitUtil.MolToBase64EncodedMolString(TorsionMol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, TorsionAngle, Energy) 452 453 def GenerateBase64EncodedMolStringsWithTorsionScanInfo(Mol, MolIndex, TorsionMols, TorsionAngles, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, PropertyPickleFlags = Chem.PropertyPickleOptions.AllProps): 454 """Set up an iterator for generating base64 encoded molecule string for 455 a torsion in a molecule along with appropriate trosion scan information. 456 """ 457 458 for Index, TorsionMol in enumerate(TorsionMols): 459 yield [MolIndex, None, None, TorsionAngles[Index], TorsionID, TorsionPattern, None, TorsionMatches] if (Mol is None or TorsionMol is None) else [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags), RDKitUtil.MolToBase64EncodedMolString(TorsionMol, PropertyPickleFlags), TorsionAngles[Index], TorsionID, TorsionPattern, RDKitUtil.MolToBase64EncodedMolString(TorsionPatternMol, PropertyPickleFlags), TorsionMatches] 460 461 def InitializePsi4ForWorkerProcess(): 462 """Initialize Psi4 for a worker process.""" 463 464 if OptionsInfo["Psi4Initialized"]: 465 return 466 467 OptionsInfo["Psi4Initialized"] = True 468 469 if OptionsInfo["MPLevelTorsionAnglesMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I): 470 # Run Psi4 in quiet mode during multiprocessing at Torsions level for 'auto' OutputFile... 471 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet" 472 else: 473 # Update output file... 474 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()) 475 476 # Intialize Psi4... 477 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True) 478 479 # Setup max iterations global variable... 480 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 481 482 # Setup conversion factor for energy units... 483 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 484 485 def PerformMinimizationAndTorsionScan(Mol, MolNum, UseMultiProcessingAtTorsionAnglesLevel = False): 486 """Perform minimization and torsions scan.""" 487 488 if not OptionsInfo["Infile3D"]: 489 # Add hydrogens... 490 Mol = Chem.AddHs(Mol, addCoords = True) 491 492 Mol, MinimizationCalcStatus = MinimizeMolecule(Mol, MolNum) 493 if not MinimizationCalcStatus: 494 return (Mol, False, False, False) 495 496 TorsionsMolInfo = SetupTorsionsMolInfo(Mol, MolNum) 497 if TorsionsMolInfo["NumOfMatches"] == 0: 498 return (Mol, True, False, False) 499 500 Mol, ScanCalcStatus = ScanAllTorsionsInMol(Mol, TorsionsMolInfo, MolNum, UseMultiProcessingAtTorsionAnglesLevel) 501 if not ScanCalcStatus: 502 return (Mol, True, True, False) 503 504 return (Mol, True, True, True) 505 506 def ScanAllTorsionsInMol(Mol, TorsionsMolInfo, MolNum, UseMultiProcessingAtTorsionAnglesLevel = False): 507 """Perform scans on all torsions in a molecule.""" 508 509 if TorsionsMolInfo["NumOfMatches"] == 0: 510 return Mol, True 511 512 MolName = RDKitUtil.GetMolName(Mol, MolNum) 513 514 FirstTorsionMode = OptionsInfo["FirstTorsionMode"] 515 TorsionsPatternsInfo = OptionsInfo["TorsionsPatternsInfo"] 516 517 TorsionPatternCount, TorsionScanCount, TorsionMatchCount = [0] * 3 518 TorsionMaxMatches = OptionsInfo["TorsionMaxMatches"] 519 520 for TorsionID in TorsionsPatternsInfo["IDs"]: 521 TorsionPatternCount += 1 522 TorsionPattern = TorsionsPatternsInfo["Pattern"][TorsionID] 523 TorsionPatternMol = TorsionsPatternsInfo["Mol"][TorsionID] 524 525 TorsionsMatches = TorsionsMolInfo["Matches"][TorsionID] 526 527 if TorsionsMatches is None: 528 continue 529 530 if FirstTorsionMode and TorsionPatternCount > 1: 531 if not OptionsInfo["QuietMode"]: 532 MiscUtil.PrintWarning("Already scaned first torsion pattern, \"%s\" for molecule %s during \"%s\" value of \"--modeTorsions\" option . Abandoning torsion scan...\n" % (TorsionPattern, MolName, OptionsInfo["ModeTorsions"])) 533 break 534 535 for Index, TorsionMatches in enumerate(TorsionsMatches): 536 TorsionMatchNum = Index + 1 537 TorsionMatchCount += 1 538 539 if TorsionMatchCount > TorsionMaxMatches: 540 if not OptionsInfo["QuietMode"]: 541 MiscUtil.PrintWarning("Already scaned a maximum of %s torsion matches for molecule %s specified by \"--torsionMaxMatches\" option. Abandoning torsion scan...\n" % (TorsionMaxMatches, MolName)) 542 break 543 544 TmpMol, TorsionScanStatus, TorsionMols, TorsionEnergies, TorsionAngles = ScanSingleTorsionInMol(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, TorsionMatchNum, MolNum, UseMultiProcessingAtTorsionAnglesLevel) 545 if not TorsionScanStatus: 546 continue 547 548 TorsionScanCount += 1 549 GenerateOutputFiles(Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles) 550 551 if TorsionMatchCount > TorsionMaxMatches: 552 break 553 554 if TorsionScanCount: 555 GenerateStartingTorsionScanStructureOutfile(Mol, MolNum) 556 557 Status = True if TorsionScanCount else False 558 559 return (Mol, Status) 560 561 def ScanSingleTorsionInMol(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, TorsionMatchNum, MolNum, UseMultiProcessingAtTorsionAnglesLevel): 562 """Perform torsion scan for a molecule along with constrained energy minimization.""" 563 564 if not OptionsInfo["QuietMode"]: 565 MiscUtil.PrintInfo("\nProcessing torsion pattern, %s, match number, %s, in molecule %s..." % (TorsionPattern, TorsionMatchNum, RDKitUtil.GetMolName(Mol, MolNum))) 566 567 if OptionsInfo["TorsionMinimize"]: 568 MiscUtil.PrintInfo("Generating initial ensemble of constrained conformations by distance geometry and forcefield followed by Psi4 constraned minimization to select the lowest energy structure at specific torsion angles for molecule %s..." % (RDKitUtil.GetMolName(Mol, MolNum))) 569 else: 570 MiscUtil.PrintInfo("Calculating single point energy using Psi4 for molecule, %s, at specific torsion angles..." % (RDKitUtil.GetMolName(Mol, MolNum))) 571 572 if UseMultiProcessingAtTorsionAnglesLevel: 573 return ScanSingleTorsionInMolUsingMultipleProcessesAtTorsionAnglesLevel(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum) 574 else: 575 return ScanSingleTorsionInMolUsingSingleProcess(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum) 576 577 def ScanSingleTorsionInMolUsingSingleProcess(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum): 578 """Perform torsion scan for a molecule using single processs along with constrained 579 energy minimization.""" 580 581 TorsionMols = [] 582 TorsionEnergies = [] 583 TorsionAngles = [] 584 585 Mols, Angles = SetupMolsForSingleTorsionScanInMol(Mol, TorsionMatches, MolNum) 586 587 for Index, Angle in enumerate(Angles): 588 TorsionMol = Mols[Index] 589 TorsionMol, CalcStatus, Energy = MinimizeCalculateEnergyForTorsionMol(Mol, TorsionMol, Angle, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum) 590 591 if not CalcStatus: 592 return (Mol, False, None, None, None) 593 594 TorsionMols.append(TorsionMol) 595 TorsionEnergies.append(Energy) 596 TorsionAngles.append(Angle) 597 598 return (Mol, True, TorsionMols, TorsionEnergies, TorsionAngles) 599 600 def SetupMolsForSingleTorsionScanInMol(Mol, TorsionMatches, MolNum = None): 601 """Setup molecules corresponding to all torsion angles in a molecule.""" 602 603 AtomIndex1, AtomIndex2, AtomIndex3, AtomIndex4 = TorsionMatches 604 605 TorsionMols = [] 606 TorsionAngles = OptionsInfo["TorsionAngles"] 607 608 for Angle in TorsionAngles: 609 TorsionMol = Chem.Mol(Mol) 610 TorsionMolConf = TorsionMol.GetConformer(0) 611 612 rdMolTransforms.SetDihedralDeg(TorsionMolConf, AtomIndex1, AtomIndex2, AtomIndex3, AtomIndex4, Angle) 613 TorsionMols.append(TorsionMol) 614 615 return (TorsionMols, TorsionAngles) 616 617 def MinimizeCalculateEnergyForTorsionMol(Mol, TorsionMol, TorsionAngle, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum): 618 """"Calculate energy of a torsion molecule by performing an optional constrained 619 energy minimzation. 620 """ 621 622 if OptionsInfo["TorsionMinimize"]: 623 if not OptionsInfo["QuietMode"]: 624 MolName = RDKitUtil.GetMolName(Mol, MolNum) 625 MiscUtil.PrintInfo("\nProcessing torsion angle %s for molecule %s..." % (TorsionAngle, MolName)) 626 627 # Perform constrained minimization... 628 TorsionMatchesMol = RDKitUtil.MolFromSubstructureMatch(TorsionMol, TorsionPatternMol, TorsionMatches) 629 TorsionMol, CalcStatus, Energy = ConstrainAndMinimizeMolecule(TorsionMol, TorsionAngle, TorsionMatchesMol, TorsionMatches, MolNum) 630 631 if not CalcStatus: 632 if not OptionsInfo["QuietMode"]: 633 MolName = RDKitUtil.GetMolName(Mol, MolNum) 634 MiscUtil.PrintWarning("Failed to perform constrained minimization for molecule %s with torsion angle set to %s during torsion scan for torsion pattern %s. Abandoning torsion scan..." % (MolName, TorsionAngle, TorsionPattern)) 635 return (TorsionMol, False, None) 636 else: 637 # Setup a Psi4 molecule... 638 Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], TorsionMol, MolNum) 639 if Psi4Mol is None: 640 return (TorsionMol, False, None) 641 642 # Calculate single point Psi4 energy... 643 CalcStatus, Energy = CalculateEnergyUsingPsi4(OptionsInfo["psi4"], Psi4Mol, TorsionMol, MolNum) 644 if not CalcStatus: 645 if not OptionsInfo["QuietMode"]: 646 MolName = RDKitUtil.GetMolName(Mol, MolNum) 647 MiscUtil.PrintWarning("Failed to calculate Psi4 energy for molecule %s with torsion angle set to %s during torsion scan for torsion pattern %s. Abandoning torsion scan..." % (MolName, TorsionAngle, TorsionPattern)) 648 return (TorsionMol, False, None) 649 650 return (TorsionMol, CalcStatus, Energy) 651 652 def SetupTorsionsMolInfo(Mol, MolNum = None): 653 """Setup torsions info for a molecule.""" 654 655 TorsionsPatternsInfo = OptionsInfo["TorsionsPatternsInfo"] 656 657 # Initialize... 658 TorsionsMolInfo = {} 659 TorsionsMolInfo["IDs"] = [] 660 TorsionsMolInfo["NumOfMatches"] = 0 661 TorsionsMolInfo["Matches"] = {} 662 for TorsionID in TorsionsPatternsInfo["IDs"]: 663 TorsionsMolInfo["IDs"].append(TorsionID) 664 TorsionsMolInfo["Matches"][TorsionID] = None 665 666 MolName = RDKitUtil.GetMolName(Mol, MolNum) 667 UseChirality = OptionsInfo["UseChirality"] 668 669 for TorsionID in TorsionsPatternsInfo["IDs"]: 670 # Match torsions.. 671 TorsionPattern = TorsionsPatternsInfo["Pattern"][TorsionID] 672 TorsionPatternMol = TorsionsPatternsInfo["Mol"][TorsionID] 673 TorsionsMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = UseChirality)) 674 675 # Validate tosion matches... 676 ValidTorsionsMatches = [] 677 for Index, TorsionMatch in enumerate(TorsionsMatches): 678 if len(TorsionMatch) != 4: 679 if not OptionsInfo["QuietMode"]: 680 MiscUtil.PrintWarning("Ignoring invalid torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: It must match exactly 4 atoms." % (TorsionMatch, TorsionPattern, MolName)) 681 continue 682 683 if not RDKitUtil.AreAtomIndicesSequentiallyConnected(Mol, TorsionMatch): 684 if not OptionsInfo["QuietMode"]: 685 MiscUtil.PrintInfo("") 686 MiscUtil.PrintWarning("Invalid torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices must be sequentially connected." % (TorsionMatch, TorsionPattern, MolName)) 687 MiscUtil.PrintWarning("Reordering matched atom indices in a sequentially connected manner...") 688 689 Status, ReorderdTorsionMatch = RDKitUtil.ReorderAtomIndicesInSequentiallyConnectedManner(Mol, TorsionMatch) 690 if Status: 691 TorsionMatch = ReorderdTorsionMatch 692 if not OptionsInfo["QuietMode"]: 693 MiscUtil.PrintWarning("Successfully reordered torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices are now sequentially connected." % (TorsionMatch, TorsionPattern, MolName)) 694 else: 695 if not OptionsInfo["QuietMode"]: 696 MiscUtil.PrintWarning("Ignoring torsion match. Failed to reorder torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices are not sequentially connected." % (TorsionMatch, TorsionPattern, MolName)) 697 continue 698 699 Bond = Mol.GetBondBetweenAtoms(TorsionMatch[1], TorsionMatch[2]) 700 if Bond.IsInRing(): 701 if not OptionsInfo["QuietMode"]: 702 MiscUtil.PrintWarning("Ignoring invalid torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices, %s and %s, are not allowed to be in a ring." % (TorsionMatch, TorsionPattern, MolName, TorsionMatch[1], TorsionMatch[2])) 703 continue 704 705 # Filter matched torsions... 706 if OptionsInfo["FilterTorsionsByAtomIndicesMode"]: 707 InvalidAtomIndices = [] 708 for AtomIndex in TorsionMatch: 709 if AtomIndex not in OptionsInfo["TorsionsFilterByAtomIndicesList"]: 710 InvalidAtomIndices.append(AtomIndex) 711 if len(InvalidAtomIndices): 712 if not OptionsInfo["QuietMode"]: 713 MiscUtil.PrintWarning("Ignoring invalid torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices, %s, must be present in the list, %s, specified using option \"--torsionsFilterbyAtomIndices\"." % (TorsionMatch, TorsionPattern, MolName, InvalidAtomIndices, OptionsInfo["TorsionsFilterByAtomIndicesList"])) 714 continue 715 716 ValidTorsionsMatches.append(TorsionMatch) 717 718 # Track valid matches... 719 if len(ValidTorsionsMatches): 720 TorsionsMolInfo["NumOfMatches"] += len(ValidTorsionsMatches) 721 TorsionsMolInfo["Matches"][TorsionID] = ValidTorsionsMatches 722 723 if TorsionsMolInfo["NumOfMatches"] == 0: 724 if not OptionsInfo["QuietMode"]: 725 MiscUtil.PrintWarning("Failed to match any torsions in molecule %s" % (MolName)) 726 727 return TorsionsMolInfo 728 729 def SetupTorsionsPatternsInfo(): 730 """Setup torsions patterns info.""" 731 732 TorsionsPatternsInfo = {} 733 TorsionsPatternsInfo["IDs"] = [] 734 TorsionsPatternsInfo["Pattern"] = {} 735 TorsionsPatternsInfo["Mol"] = {} 736 737 TorsionID = 0 738 for TorsionPattern in OptionsInfo["TorsionPatternsList"]: 739 TorsionID += 1 740 741 TorsionMol = Chem.MolFromSmarts(TorsionPattern) 742 if TorsionMol is None: 743 MiscUtil.PrintError("Failed to create torsion pattern molecule. The torsion SMILES/SMARTS pattern, \"%s\", specified using \"-t, --torsions\" option is not valid." % (TorsionPattern)) 744 745 TorsionsPatternsInfo["IDs"].append(TorsionID) 746 TorsionsPatternsInfo["Pattern"][TorsionID] = TorsionPattern 747 TorsionsPatternsInfo["Mol"][TorsionID] = TorsionMol 748 749 OptionsInfo["TorsionsPatternsInfo"] = TorsionsPatternsInfo 750 751 def MinimizeMolecule(Mol, MolNum = None): 752 """Minimize molecule.""" 753 754 return GenerateAndMinimizeConformersUsingForceField(Mol, MolNum) 755 756 def GenerateAndMinimizeConformersUsingForceField(Mol, MolNum = None): 757 """Generate and minimize conformers for a molecule to get the lowest energy conformer 758 as the minimized structure.""" 759 760 MolName = RDKitUtil.GetMolName(Mol, MolNum) 761 762 # Setup conformers... 763 ConfIDs = EmbedMolecule(Mol, MolNum) 764 if not len(ConfIDs): 765 if not OptionsInfo["QuietMode"]: 766 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName) 767 return (Mol, False) 768 769 if not OptionsInfo["QuietMode"]: 770 MiscUtil.PrintInfo("Performing initial minimization of molecule, %s, using forcefield by generating a conformation ensemble and selecting the lowest energy conformer - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (MolName, OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"], OptionsInfo["ConfGenerationParams"]["MaxConfs"], len(ConfIDs))) 771 772 # Minimize conformers... 773 CalcEnergyMap = {} 774 for ConfID in ConfIDs: 775 # Perform forcefield minimization... 776 Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID) 777 if not Status: 778 return (Mol, False) 779 780 EnergyStatus, Energy = CalculateEnergyUsingForceField(Mol, ConfID) 781 if not EnergyStatus: 782 if not OptionsInfo["QuietMode"]: 783 MolName = RDKitUtil.GetMolName(Mol, MolNum) 784 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) 785 return (Mol, False) 786 787 if ConvergeStatus != 0: 788 if not OptionsInfo["QuietMode"]: 789 MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])) 790 791 CalcEnergyMap[ConfID] = Energy 792 793 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 794 MinEnergyConfID = SortedConfIDs[0] 795 796 for ConfID in [Conf.GetId() for Conf in Mol.GetConformers()]: 797 if ConfID == MinEnergyConfID: 798 continue 799 Mol.RemoveConformer(ConfID) 800 801 # Set ConfID to 0 for MinEnergyConf... 802 Mol.GetConformer(MinEnergyConfID).SetId(0) 803 804 return (Mol, True) 805 806 def ConstrainAndMinimizeMolecule(Mol, TorsionAngle, RefMolCore, RefMolMatches, MolNum = None): 807 """Constrain and minimize molecule.""" 808 809 # TorsionMol, CalcStatus, Energy 810 MolName = RDKitUtil.GetMolName(Mol, MolNum) 811 812 # Setup constrained conformers... 813 MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, RefMolMatches, MolNum) 814 if not MolConfsStatus: 815 if not OptionsInfo["QuietMode"]: 816 MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName) 817 return (Mol, False, None) 818 819 # Minimize conformers... 820 ConfNums = [] 821 CalcEnergyMap = {} 822 MolConfsMap = {} 823 824 for ConfNum, MolConf in enumerate(MolConfs): 825 if not OptionsInfo["QuietMode"]: 826 MiscUtil.PrintInfo("\nPerforming constrained minimization using Psi4 for molecule, %s, conformer number, %s, at torsion angle %s..." % (MolName, ConfNum, TorsionAngle)) 827 828 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], MolConf, RefMolCore, RefMolMatches, MolNum) 829 if not CalcStatus: 830 if not OptionsInfo["QuietMode"]: 831 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 832 return (Mol, False, None) 833 834 ConfNums.append(ConfNum) 835 CalcEnergyMap[ConfNum] = Energy 836 MolConfsMap[ConfNum] = MolConf 837 838 SortedConfNums = sorted(ConfNums, key = lambda ConfNum: CalcEnergyMap[ConfNum]) 839 MinEnergyConfNum = SortedConfNums[0] 840 841 MinEnergy = CalcEnergyMap[MinEnergyConfNum] 842 MinEnergyMolConf = MolConfsMap[MinEnergyConfNum] 843 844 MinEnergyMolConf.ClearProp('EmbedRMS') 845 846 return (MinEnergyMolConf, True, MinEnergy) 847 848 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, RefMolMatches, MolNum, ConfID = -1): 849 """Minimize molecule using Psi4.""" 850 851 # Setup a list for constrained atoms... 852 ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, RefMolMatches) 853 if len(ConstrainedAtomIndices) == 0: 854 return (False, None) 855 856 # Setup a Psi4Mol... 857 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID) 858 if Psi4Mol is None: 859 return (False, None) 860 861 # Setup reference wave function... 862 Reference = SetupReferenceWavefunction(Mol) 863 Psi4Handle.set_options({'Reference': Reference}) 864 865 # Setup method name and basis set... 866 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 867 868 # Setup freeze list for constrained atoms... 869 FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices] 870 FreezeXYZString = " %s " % " ".join(FreezeXYZList) 871 Psi4Handle.set_options({"OPTKING__frozen_cartesian": FreezeXYZString}) 872 873 # Optimize geometry... 874 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"]) 875 876 if not Status: 877 PerformPsi4Cleanup(Psi4Handle) 878 return (False, None) 879 880 # Update atom positions... 881 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True) 882 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID) 883 884 # Convert energy units... 885 if OptionsInfo["ApplyEnergyConversionFactor"]: 886 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 887 888 # Clean up 889 PerformPsi4Cleanup(Psi4Handle) 890 891 return (True, Energy) 892 893 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, RefMolMatches, MolNum = None): 894 """Constrain, embed, and minimize molecule.""" 895 896 # Setup forcefield function to use for constrained minimization... 897 ForceFieldFunction = None 898 ForceFieldName = None 899 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 900 ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId) 901 ForeceFieldName = "UFF" 902 else: 903 ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) , confId = confId) 904 ForeceFieldName = "MMFF" 905 906 if ForceFieldFunction is None: 907 if not OptionsInfo["QuietMode"]: 908 MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))) 909 return (None, False) 910 911 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfsTorsions"] 912 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"] 913 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"] 914 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"] 915 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"] 916 UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"] 917 918 MolConfs = [] 919 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)] 920 921 for ConfID in ConfIDs: 922 try: 923 MolConf = Chem.Mol(Mol) 924 RDKitUtil.ConstrainAndEmbed(MolConf, RefMolCore, coreMatchesMol = RefMolMatches, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 925 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 926 if not OptionsInfo["QuietMode"]: 927 MolName = RDKitUtil.GetMolName(Mol, MolNum) 928 MiscUtil.PrintWarning("Constrained embedding couldn't be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 929 return (None, False) 930 931 MolConfs.append(MolConf) 932 933 return FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum) 934 935 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum = None): 936 """Filter contarained conformations by RMSD.""" 937 938 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"] 939 if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0: 940 if not OptionsInfo["QuietMode"]: 941 MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs))) 942 return (MolConfs, True) 943 944 FirstMolConf = True 945 SelectedMolConfs = [] 946 for MolConfIndex, MolConf in enumerate(MolConfs): 947 if FirstMolConf: 948 FirstMolConf = False 949 SelectedMolConfs.append(MolConf) 950 continue 951 952 # Compare RMSD against all selected conformers... 953 ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf)) 954 IgnoreConf = False 955 for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs): 956 RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf)) 957 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf) 958 959 if CalcRMSD < EmbedRMSDCutoff: 960 IgnoreConf = True 961 break 962 963 if IgnoreConf: 964 continue 965 966 SelectedMolConfs.append(MolConf) 967 968 if not OptionsInfo["QuietMode"]: 969 MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs))) 970 971 return (SelectedMolConfs, True) 972 973 def EmbedMolecule(Mol, MolNum = None): 974 """Embed conformations.""" 975 976 ConfIDs = [] 977 978 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"] 979 RandomSeed = OptionsInfo["ConfGenerationParams"]["RandomSeed"] 980 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"] 981 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"] 982 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"] 983 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"] 984 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"] 985 986 try: 987 ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, pruneRmsThresh = EmbedRMSDCutoff, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 988 except ValueError as ErrMsg: 989 if not OptionsInfo["QuietMode"]: 990 MolName = RDKitUtil.GetMolName(Mol, MolNum) 991 MiscUtil.PrintWarning("Embedding failed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 992 ConfIDs = [] 993 994 return ConfIDs 995 996 def MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID = -1): 997 """Minimize molecule using forcefield available in RDKit.""" 998 999 try: 1000 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 1001 ConvergeStatus = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"]) 1002 elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]: 1003 ConvergeStatus = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"], mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) 1004 else: 1005 MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ConfGenerationParams"]["ForceField"]) 1006 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 1007 if not OptionsInfo["QuietMode"]: 1008 MolName = RDKitUtil.GetMolName(Mol, MolNum) 1009 MiscUtil.PrintWarning("Minimization using forcefield couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 1010 return (False, None) 1011 1012 return (True, ConvergeStatus) 1013 1014 def CalculateEnergyUsingForceField(Mol, ConfID = None): 1015 """Calculate energy.""" 1016 1017 Status = True 1018 Energy = None 1019 1020 if ConfID is None: 1021 ConfID = -1 1022 1023 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 1024 UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID) 1025 if UFFMoleculeForcefield is None: 1026 Status = False 1027 else: 1028 Energy = UFFMoleculeForcefield.CalcEnergy() 1029 elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]: 1030 MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) 1031 MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID) 1032 if MMFFMoleculeForcefield is None: 1033 Status = False 1034 else: 1035 Energy = MMFFMoleculeForcefield.CalcEnergy() 1036 else: 1037 MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ConfGenerationParams"]["ForceField"]) 1038 1039 return (Status, Energy) 1040 1041 def CalculateEnergyUsingPsi4(Psi4Handle, Psi4Mol, Mol, MolNum = None): 1042 """Calculate single point energy using Psi4.""" 1043 1044 Status = False 1045 Energy = None 1046 1047 # Setup reference wave function... 1048 Reference = SetupReferenceWavefunction(Mol) 1049 Psi4Handle.set_options({'Reference': Reference}) 1050 1051 # Setup method name and basis set... 1052 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 1053 1054 Status, Energy = Psi4Util.CalculateSinglePointEnergy(Psi4Handle, Psi4Mol, MethodName, BasisSet, Quiet = OptionsInfo["QuietMode"]) 1055 1056 # Convert energy units... 1057 if Status: 1058 if OptionsInfo["ApplyEnergyConversionFactor"]: 1059 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 1060 1061 # Clean up 1062 PerformPsi4Cleanup(Psi4Handle) 1063 1064 return (Status, Energy) 1065 1066 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, RefMolMatches, ConstrainHydrogens = False): 1067 """Setup a list of atom indices to be constrained during Psi4 minimizaiton.""" 1068 1069 AtomIndices = [] 1070 1071 if RefMolMatches is None: 1072 ConstrainAtomIndices = Mol.GetSubstructMatch(RefMolCore) 1073 else: 1074 ConstrainAtomIndices = RefMolMatches 1075 1076 # Collect matched heavy atoms along with attached hydrogens... 1077 for AtomIndex in ConstrainAtomIndices: 1078 Atom = Mol.GetAtomWithIdx(AtomIndex) 1079 if Atom.GetAtomicNum() == 1: 1080 continue 1081 1082 AtomIndices.append(AtomIndex) 1083 for AtomNbr in Atom.GetNeighbors(): 1084 if AtomNbr.GetAtomicNum() == 1: 1085 if ConstrainHydrogens: 1086 AtomNbrIndex = AtomNbr.GetIdx() 1087 AtomIndices.append(AtomNbrIndex) 1088 1089 AtomIndices = sorted(AtomIndices) 1090 1091 # Atom indices start from 1 for Psi4 instead 0 for RDKit... 1092 AtomIndices = [ AtomIndex + 1 for AtomIndex in AtomIndices] 1093 1094 return AtomIndices 1095 1096 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1): 1097 """Setup a Psi4 molecule object.""" 1098 1099 # Turn off recentering and reorientation to perform optimization in the 1100 # constrained coordinate space... 1101 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True) 1102 1103 try: 1104 Psi4Mol = Psi4Handle.geometry(MolGeometry) 1105 except Exception as ErrMsg: 1106 Psi4Mol = None 1107 if not OptionsInfo["QuietMode"]: 1108 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 1109 MolName = RDKitUtil.GetMolName(Mol, MolNum) 1110 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 1111 1112 return Psi4Mol 1113 1114 def PerformPsi4Cleanup(Psi4Handle): 1115 """Perform clean up.""" 1116 1117 # Clean up after Psi4 run... 1118 Psi4Handle.core.clean() 1119 1120 # Clean up any leftover scratch files... 1121 if OptionsInfo["MPMode"]: 1122 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 1123 1124 def CheckAndValidateMolecule(Mol, MolCount = None): 1125 """Validate molecule for Psi4 calculations.""" 1126 1127 if Mol is None: 1128 if not OptionsInfo["QuietMode"]: 1129 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 1130 return False 1131 1132 MolName = RDKitUtil.GetMolName(Mol, MolCount) 1133 if not OptionsInfo["QuietMode"]: 1134 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 1135 1136 if RDKitUtil.IsMolEmpty(Mol): 1137 if not OptionsInfo["QuietMode"]: 1138 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 1139 return False 1140 1141 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 1142 if not OptionsInfo["QuietMode"]: 1143 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 1144 return False 1145 1146 if OptionsInfo["Infile3D"]: 1147 if not Mol.GetConformer().Is3D(): 1148 if not OptionsInfo["QuietMode"]: 1149 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName) 1150 1151 if OptionsInfo["Infile3D"]: 1152 # Otherwise, Hydrogens are always added... 1153 if RDKitUtil.AreHydrogensMissingInMolecule(Mol): 1154 if not OptionsInfo["QuietMode"]: 1155 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName) 1156 1157 return True 1158 1159 def SetupMethodNameAndBasisSet(Mol): 1160 """Setup method name and basis set.""" 1161 1162 MethodName = OptionsInfo["MethodName"] 1163 if OptionsInfo["MethodNameAuto"]: 1164 MethodName = "B3LYP" 1165 1166 BasisSet = OptionsInfo["BasisSet"] 1167 if OptionsInfo["BasisSetAuto"]: 1168 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 1169 1170 return (MethodName, BasisSet) 1171 1172 def SetupReferenceWavefunction(Mol): 1173 """Setup reference wavefunction.""" 1174 1175 Reference = OptionsInfo["Reference"] 1176 if OptionsInfo["ReferenceAuto"]: 1177 Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF' 1178 1179 return Reference 1180 1181 def SetupEnergyConversionFactor(Psi4Handle): 1182 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 1183 1184 EnergyUnits = OptionsInfo["EnergyUnits"] 1185 1186 ApplyConversionFactor = True 1187 if re.match("^kcal\/mol$", EnergyUnits, re.I): 1188 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 1189 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 1190 ConversionFactor = Psi4Handle.constants.hartree2kJmol 1191 elif re.match("^eV$", EnergyUnits, re.I): 1192 ConversionFactor = Psi4Handle.constants.hartree2ev 1193 else: 1194 ApplyConversionFactor = False 1195 ConversionFactor = 1.0 1196 1197 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 1198 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 1199 1200 def GenerateStartingTorsionScanStructureOutfile(Mol, MolNum): 1201 """Write out the structure of molecule used for starting tosion scan.""" 1202 1203 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1204 MolName = GetOutputFileMolName(Mol, MolNum) 1205 1206 Outfile = "%s_%s.%s" % (FileName, MolName, FileExt) 1207 1208 # Set up a molecule writer... 1209 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 1210 if Writer is None: 1211 MiscUtil.PrintWarning("Failed to setup a writer for output fie %s " % Outfile) 1212 return 1213 1214 Writer.write(Mol) 1215 1216 if Writer is not None: 1217 Writer.close() 1218 1219 def GenerateOutputFiles(Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles): 1220 """Generate output files.""" 1221 1222 StructureOutfile, EnergyTextOutfile, PlotOutfile = SetupOutputFileNames(Mol, MolNum, TorsionID, TorsionMatchNum) 1223 1224 GenerateScannedTorsionsStructureOutfile(StructureOutfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles) 1225 GenerateEnergyTextOutfile(EnergyTextOutfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles) 1226 GeneratePlotOutfile(PlotOutfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles) 1227 1228 def GenerateScannedTorsionsStructureOutfile(Outfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles): 1229 """Write out structures generated after torsion scan along with associated data.""" 1230 1231 # Set up a molecule writer... 1232 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 1233 if Writer is None: 1234 MiscUtil.PrintWarning("Failed to setup a writer for output fie %s " % Outfile) 1235 return 1236 1237 MolName = RDKitUtil.GetMolName(Mol, MolNum) 1238 1239 RelativeTorsionEnergies = SetupRelativeEnergies(TorsionEnergies) 1240 for Index, TorsionMol in enumerate(TorsionMols): 1241 TorsionAngle = "%s" % TorsionAngles[Index] 1242 TorsionMol.SetProp("Torsion_Angle", TorsionAngle) 1243 1244 TorsionEnergy = "%.*f" % (OptionsInfo["Precision"], TorsionEnergies[Index]) 1245 TorsionMol.SetProp(OptionsInfo["EnergyDataFieldLabel"], TorsionEnergy) 1246 1247 RelativeTorsionEnergy = "%.*f" % (OptionsInfo["Precision"], RelativeTorsionEnergies[Index]) 1248 TorsionMol.SetProp(OptionsInfo["EnergyRelativeDataFieldLabel"], RelativeTorsionEnergy) 1249 1250 TorsionMolName = "%s_Deg%s" % (MolName, TorsionAngle) 1251 TorsionMol.SetProp("_Name", TorsionMolName) 1252 1253 Writer.write(TorsionMol) 1254 1255 if Writer is not None: 1256 Writer.close() 1257 1258 def GenerateEnergyTextOutfile(Outfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles): 1259 """Write out torsion angles and energies.""" 1260 1261 # Setup a writer... 1262 Writer = open(Outfile, "w") 1263 if Writer is None: 1264 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 1265 1266 # Write headers... 1267 Writer.write("TorsionAngle,%s,%s\n" % (OptionsInfo["EnergyDataFieldLabel"], OptionsInfo["EnergyRelativeDataFieldLabel"])) 1268 1269 RelativeTorsionEnergies = SetupRelativeEnergies(TorsionEnergies) 1270 for Index, TorsionAngle in enumerate(TorsionAngles): 1271 TorsionEnergy = "%.*f" % (OptionsInfo["Precision"], TorsionEnergies[Index]) 1272 RelativeTorsionEnergy = "%.*f" % (OptionsInfo["Precision"], RelativeTorsionEnergies[Index]) 1273 Writer.write("%d,%s,%s\n" % (TorsionAngle, TorsionEnergy, RelativeTorsionEnergy)) 1274 1275 if Writer is not None: 1276 Writer.close() 1277 1278 def GeneratePlotOutfile(Outfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles): 1279 """Generate a plot corresponding to torsion angles and energies.""" 1280 1281 OutPlotParams = OptionsInfo["OutPlotParams"] 1282 1283 # Initialize seaborn and matplotlib paramaters... 1284 if not OptionsInfo["OutPlotInitialized"]: 1285 OptionsInfo["OutPlotInitialized"] = True 1286 RCParams = {"figure.figsize":(OutPlotParams["Width"], OutPlotParams["Height"]), 1287 "axes.titleweight": OutPlotParams["TitleWeight"], 1288 "axes.labelweight": OutPlotParams["LabelWeight"]} 1289 sns.set(context = OutPlotParams["Context"], style = OutPlotParams["Style"], palette = OutPlotParams["Palette"], font = OutPlotParams["Font"], font_scale = OutPlotParams["FontScale"], rc = RCParams) 1290 1291 # Create a new figure... 1292 plt.figure() 1293 1294 if OptionsInfo["OutPlotRelativeEnergy"]: 1295 TorsionEnergies = SetupRelativeEnergies(TorsionEnergies) 1296 1297 # Draw plot... 1298 PlotType = OutPlotParams["Type"] 1299 if re.match("linepoint", PlotType, re.I): 1300 Axis = sns.lineplot(x = TorsionAngles, y = TorsionEnergies, marker = "o", legend = False) 1301 elif re.match("scatter", PlotType, re.I): 1302 Axis = sns.scatterplot(x = TorsionAngles, y = TorsionEnergies, legend = False) 1303 elif re.match("line", PlotType, re.I): 1304 Axis = sns.lineplot(x = TorsionAngles, y = TorsionEnergies, legend = False) 1305 else: 1306 MiscUtil.PrintError("The value, %s, specified for \"type\" using option \"--outPlotParams\" is not supported. Valid plot types: linepoint, scatter or line" % (PlotType)) 1307 1308 # Setup title and labels... 1309 Title = OutPlotParams["Title"] 1310 if OptionsInfo["OutPlotTitleTorsionSpec"]: 1311 TorsionPattern = OptionsInfo["TorsionsPatternsInfo"]["Pattern"][TorsionID] 1312 Title = "%s: %s" % (OutPlotParams["Title"], TorsionPattern) 1313 1314 # Set labels and title... 1315 Axis.set(xlabel = OutPlotParams["XLabel"], ylabel = OutPlotParams["YLabel"], title = Title) 1316 1317 # Save figure... 1318 plt.savefig(Outfile) 1319 1320 # Close the plot... 1321 plt.close() 1322 1323 def SetupRelativeEnergies(Energies): 1324 """Set up a list of relative energies.""" 1325 1326 SortedEnergies = sorted(Energies) 1327 MinEnergy = SortedEnergies[0] 1328 RelativeEnergies = [(Energy - MinEnergy) for Energy in Energies] 1329 1330 return RelativeEnergies 1331 1332 def SetupOutputFileNames(Mol, MolNum, TorsionID, TorsionMatchNum): 1333 """Setup names of output files.""" 1334 1335 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1336 MolName = GetOutputFileMolName(Mol, MolNum) 1337 1338 OutfileRoot = "%s_%s_Torsion%s_Match%s" % (FileName, MolName, TorsionID, TorsionMatchNum) 1339 1340 StructureOutfile = "%s.%s" % (OutfileRoot, FileExt) 1341 EnergyTextOutfile = "%s_Energies.csv" % (OutfileRoot) 1342 PlotExt = OptionsInfo["OutPlotParams"]["OutExt"] 1343 PlotOutfile = "%s_Plot.%s" % (OutfileRoot, PlotExt) 1344 1345 return (StructureOutfile, EnergyTextOutfile, PlotOutfile) 1346 1347 def GetOutputFileMolName(Mol, MolNum): 1348 """Get output file prefix.""" 1349 1350 MolName = "Mol%s" % MolNum 1351 if OptionsInfo["OutfileMolName"]: 1352 MolName = re.sub("[^a-zA-Z0-9]", "_", RDKitUtil.GetMolName(Mol, MolNum), re.I) 1353 1354 return MolName 1355 1356 def ProcessTorsionRangeOptions(): 1357 """Process tosion range options. """ 1358 1359 TosionRangeMode = Options["--torsionRangeMode"] 1360 OptionsInfo["TosionRangeMode"] = TosionRangeMode 1361 1362 if re.match("^Range$", TosionRangeMode, re.I): 1363 ProcessTorsionRangeValues() 1364 elif re.match("^Angles$", TosionRangeMode, re.I): 1365 ProcessTorsionAnglesValues() 1366 else: 1367 MiscUtil.PrintError("The value, %s, option \"--torsionRangeMode\" is not supported." % TosionRangeMode) 1368 1369 1370 def ProcessTorsionRangeValues(): 1371 """Process tosion range values. """ 1372 1373 TorsionRange = Options["--torsionRange"] 1374 if re.match("^auto$", TorsionRange, re.I): 1375 TorsionRange = "0,360,5" 1376 TorsionRangeWords = TorsionRange.split(",") 1377 1378 TorsionStart = int(TorsionRangeWords[0]) 1379 TorsionStop = int(TorsionRangeWords[1]) 1380 TorsionStep = int(TorsionRangeWords[2]) 1381 1382 if TorsionStart >= TorsionStop: 1383 MiscUtil.PrintError("The start value, %d, specified for option \"--torsionRange\" in string \"%s\" must be less than stop value, %s." % (TorsionStart, Options["--torsionRange"], TorsionStop)) 1384 if TorsionStep == 0: 1385 MiscUtil.PrintError("The step value, %d, specified for option \"--torsonRange\" in string \"%s\" must be > 0." % (TorsionStep, Options["--torsionRange"])) 1386 if TorsionStep >= (TorsionStop - TorsionStart): 1387 MiscUtil.PrintError("The step value, %d, specified for option \"--torsonRange\" in string \"%s\" must be less than, %s." % (TorsionStep, Options["--torsionRange"], (TorsionStop - TorsionStart))) 1388 1389 if TorsionStart < 0: 1390 if TorsionStart < -180: 1391 MiscUtil.PrintError("The start value, %d, specified for option \"--torsionRange\" in string \"%s\" must be >= -180 to use scan range from -180 to 180." % (TorsionStart, Options["--torsionRange"])) 1392 if TorsionStop > 180: 1393 MiscUtil.PrintError("The stop value, %d, specified for option \"--torsionRange\" in string \"%s\" must be <= 180 to use scan range from -180 to 180." % (TorsionStop, Options["--torsionRange"])) 1394 else: 1395 if TorsionStop > 360: 1396 MiscUtil.PrintError("The stop value, %d, specified for option \"--torsionRange\" in string \"%s\" must be <= 360 to use scan range from 0 to 360." % (TorsionStop, Options["--torsionRange"])) 1397 1398 TorsionAngles = [Angle for Angle in range(TorsionStart, TorsionStop, TorsionStep)] 1399 TorsionAngles.append(TorsionStop) 1400 1401 OptionsInfo["TorsionRange"] = TorsionRange 1402 OptionsInfo["TorsionStart"] = TorsionStart 1403 OptionsInfo["TorsionStop"] = TorsionStop 1404 OptionsInfo["TorsionStep"] = TorsionStep 1405 1406 OptionsInfo["TorsionAngles"] = TorsionAngles 1407 1408 def ProcessTorsionAnglesValues(): 1409 """Process tosion angle values. """ 1410 1411 TorsionRange = Options["--torsionRange"] 1412 if re.match("^auto$", TorsionRange, re.I): 1413 MiscUtil.PrintError("The value specified, %s, for option \"--torsionRange\" is not valid." % (TorsionRange)) 1414 1415 TorsionAngles = [] 1416 1417 for TorsionAngle in TorsionRange.split(","): 1418 TorsionAngle = int(TorsionAngle) 1419 1420 if TorsionAngle < -180: 1421 MiscUtil.PrintError("The stop value, %d, specified for option \"--torsionRange\" in string \"%s\" must be >= -180." % (TorsionAngle, TorsionRange)) 1422 1423 if TorsionAngle > 360: 1424 MiscUtil.PrintError("The stop value, %d, specified for option \"--torsionRange\" in string \"%s\" must be <= 360." % (TorsionAngle, TorsionRange)) 1425 1426 TorsionAngles.append(TorsionAngle) 1427 1428 OptionsInfo["TorsionRange"] = TorsionRange 1429 OptionsInfo["TorsionStart"] = None 1430 OptionsInfo["TorsionStop"] = None 1431 OptionsInfo["TorsionStep"] = None 1432 1433 OptionsInfo["TorsionAngles"] = sorted(TorsionAngles) 1434 1435 def ProcessOptions(): 1436 """Process and validate command line arguments and options.""" 1437 1438 MiscUtil.PrintInfo("Processing options...") 1439 1440 # Validate options... 1441 ValidateOptions() 1442 1443 OptionsInfo["ModeMols"] = Options["--modeMols"] 1444 OptionsInfo["FirstMolMode"] = True if re.match("^First$", Options["--modeMols"], re.I) else False 1445 1446 OptionsInfo["ModeTorsions"] = Options["--modeTorsions"] 1447 OptionsInfo["FirstTorsionMode"] = True if re.match("^First$", Options["--modeTorsions"], re.I) else False 1448 1449 OptionsInfo["Infile"] = Options["--infile"] 1450 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False 1451 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 1452 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 1453 OptionsInfo["Infile3D"] = True if re.match("^yes$", Options["--infile3D"], re.I) else False 1454 1455 OptionsInfo["Outfile"] = Options["--outfile"] 1456 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 1457 1458 # Method, basis set, and reference wavefunction... 1459 OptionsInfo["BasisSet"] = Options["--basisSet"] 1460 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 1461 1462 OptionsInfo["MethodName"] = Options["--methodName"] 1463 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 1464 1465 OptionsInfo["Reference"] = Options["--reference"] 1466 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 1467 1468 # Run and options parameters... 1469 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"]) 1470 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"]) 1471 1472 # Conformer generation paramaters... 1473 ParamsDefaultInfoOverride = {"MaxConfs": 250, "MaxConfsTorsions": 50} 1474 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride) 1475 1476 # Energy units and label... 1477 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 1478 1479 EnergyDataFieldLabel = Options["--energyDataFieldLabel"] 1480 if re.match("^auto$", EnergyDataFieldLabel, re.I): 1481 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"] 1482 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel 1483 1484 EnergyRelativeDataFieldLabel = Options["--energyRelativeDataFieldLabel"] 1485 if re.match("^auto$", EnergyRelativeDataFieldLabel, re.I): 1486 EnergyRelativeDataFieldLabel = "Psi4_Relative_Energy (%s)" % Options["--energyUnits"] 1487 OptionsInfo["EnergyRelativeDataFieldLabel"] = EnergyRelativeDataFieldLabel 1488 1489 # Plot parameters... 1490 OptionsInfo["OutfileMolName"] = True if re.match("^yes$", Options["--outfileMolName"], re.I) else False 1491 OptionsInfo["OutPlotRelativeEnergy"] = True if re.match("^yes$", Options["--outPlotRelativeEnergy"], re.I) else False 1492 OptionsInfo["OutPlotTitleTorsionSpec"] = True if re.match("^yes$", Options["--outPlotTitleTorsionSpec"], re.I) else False 1493 1494 # The default width and height, 10.0 and 7.5, map to aspect raito of 16/9 (1.778)... 1495 if OptionsInfo["OutPlotRelativeEnergy"]: 1496 EnergyLabel = "Relative Energy (%s)" % OptionsInfo["EnergyUnits"] 1497 else: 1498 EnergyLabel = "Energy (%s)" % OptionsInfo["EnergyUnits"] 1499 1500 DefaultValues = {'Type': 'linepoint', 'Width': 10.0, 'Height': 5.6, 'Title': 'Psi4 Torsion Scan', 'XLabel': 'Torsion Angle (degrees)', 'YLabel': EnergyLabel} 1501 OptionsInfo["OutPlotParams"] = MiscUtil.ProcessOptionSeabornPlotParameters("--outPlotParams", Options["--outPlotParams"], DefaultValues) 1502 if not re.match("^(linepoint|scatter|Line)$", OptionsInfo["OutPlotParams"]["Type"], re.I): 1503 MiscUtil.PrintError("The value, %s, specified for \"type\" using option \"--outPlotParams\" is not supported. Valid plot types: linepoint, scatter or line" % (OptionsInfo["OutPlotParams"]["Type"])) 1504 1505 OptionsInfo["OutPlotInitialized"] = False 1506 1507 OptionsInfo["Overwrite"] = Options["--overwrite"] 1508 1509 OptionsInfo["MaxIters"] = int(Options["--maxIters"]) 1510 1511 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 1512 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 1513 1514 # Multiprocessing level... 1515 MPLevelMoleculesMode = False 1516 MPLevelTorsionAnglesMode = False 1517 MPLevel = Options["--mpLevel"] 1518 if re.match("^Molecules$", MPLevel, re.I): 1519 MPLevelMoleculesMode = True 1520 elif re.match("^TorsionAngles$", MPLevel, re.I): 1521 MPLevelTorsionAnglesMode = True 1522 else: 1523 MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel) 1524 OptionsInfo["MPLevel"] = MPLevel 1525 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode 1526 OptionsInfo["MPLevelTorsionAnglesMode"] = MPLevelTorsionAnglesMode 1527 1528 OptionsInfo["Precision"] = int(Options["--precision"]) 1529 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 1530 1531 # Procsss and validate specified SMILES/SMARTS torsion patterns... 1532 TorsionPatterns = Options["--torsions"] 1533 TorsionPatternsList = [] 1534 for TorsionPattern in TorsionPatterns.split(","): 1535 TorsionPattern = TorsionPattern.strip() 1536 if not len(TorsionPattern): 1537 MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in \"-t, --torsions\" option: %s" % TorsionPatterns) 1538 1539 TorsionMol = Chem.MolFromSmarts(TorsionPattern) 1540 if TorsionMol is None: 1541 MiscUtil.PrintError("Failed to create torsion pattern molecule. The torsion SMILES/SMARTS pattern, \"%s\", specified using \"-t, --torsions\" option, \"%s\", is not valid." % (TorsionPattern, TorsionPatterns)) 1542 TorsionPatternsList.append(TorsionPattern) 1543 1544 OptionsInfo["TorsionPatterns"] = TorsionPatterns 1545 OptionsInfo["TorsionPatternsList"] = TorsionPatternsList 1546 1547 # Process and validate any specified torsion atom indices for filtering torsion matches... 1548 TorsionsFilterByAtomIndices = Options["--torsionsFilterbyAtomIndices"] 1549 TorsionsFilterByAtomIndicesList = [] 1550 if not re.match("^None$", TorsionsFilterByAtomIndices, re.I): 1551 for AtomIndex in TorsionsFilterByAtomIndices.split(","): 1552 AtomIndex = AtomIndex.strip() 1553 if not MiscUtil.IsInteger(AtomIndex): 1554 MiscUtil.PrintError("The value specified, %s, for option \"--torsionsFilterbyAtomIndices\" must be an integer." % AtomIndex) 1555 AtomIndex = int(AtomIndex) 1556 if AtomIndex < 0: 1557 MiscUtil.PrintError("The value specified, %s, for option \"--torsionsFilterbyAtomIndices\" must be >= 0." % AtomIdex) 1558 TorsionsFilterByAtomIndicesList.append(AtomIndex) 1559 1560 if len(TorsionsFilterByAtomIndicesList) < 4: 1561 MiscUtil.PrintError("The number of values, %s, specified, %s, for option \"--torsionsFilterbyAtomIndices\" must be >=4." % (len(TorsionsFilterByAtomIndicesList), TorsionsFilterByAtomIndices)) 1562 1563 OptionsInfo["TorsionsFilterByAtomIndices"] = TorsionsFilterByAtomIndices 1564 OptionsInfo["TorsionsFilterByAtomIndicesList"] = TorsionsFilterByAtomIndicesList 1565 OptionsInfo["FilterTorsionsByAtomIndicesMode"] = True if len(TorsionsFilterByAtomIndicesList) > 0 else False 1566 1567 OptionsInfo["TorsionMaxMatches"] = int(Options["--torsionMaxMatches"]) 1568 OptionsInfo["TorsionMinimize"] = True if re.match("^yes$", Options["--torsionMinimize"], re.I) else False 1569 1570 ProcessTorsionRangeOptions() 1571 1572 TorsionRange = Options["--torsionRange"] 1573 TorsionRangeWords = TorsionRange.split(",") 1574 1575 OptionsInfo["UseChirality"] = True if re.match("^yes$", Options["--useChirality"], re.I) else False 1576 1577 def RetrieveOptions(): 1578 """Retrieve command line arguments and options.""" 1579 1580 # Get options... 1581 global Options 1582 Options = docopt(_docoptUsage_) 1583 1584 # Set current working directory to the specified directory... 1585 WorkingDir = Options["--workingdir"] 1586 if WorkingDir: 1587 os.chdir(WorkingDir) 1588 1589 # Handle examples option... 1590 if "--examples" in Options and Options["--examples"]: 1591 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1592 sys.exit(0) 1593 1594 def ValidateOptions(): 1595 """Validate option values.""" 1596 1597 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 1598 1599 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1600 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 1601 MiscUtil.ValidateOptionTextValue("--infile3D", Options["--infile3D"], "yes no") 1602 1603 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 1604 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1605 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1606 1607 if not Options["--overwrite"]: 1608 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1609 FileNames = glob.glob("%s_*" % FileName) 1610 if len(FileNames): 1611 MiscUtil.PrintError("The outfile names, %s_*, generated from file specified, %s, for option \"-o, --outfile\" already exist. Use option \"--overwrite\" or \"--ov\" and try again.\n" % (FileName, Options["--outfile"])) 1612 1613 MiscUtil.ValidateOptionTextValue("--outPlotRelativeEnergy", Options["--outPlotRelativeEnergy"], "yes no") 1614 MiscUtil.ValidateOptionTextValue("--outPlotTitleTorsionSpec", Options["--outPlotTitleTorsionSpec"], "yes no") 1615 1616 MiscUtil.ValidateOptionTextValue("--outfileMolName ", Options["--outfileMolName"], "yes no") 1617 1618 MiscUtil.ValidateOptionTextValue("--modeMols", Options["--modeMols"], "First All") 1619 MiscUtil.ValidateOptionTextValue("--modeTorsions", Options["--modeTorsions"], "First All") 1620 1621 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0}) 1622 1623 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 1624 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules TorsionAngles") 1625 1626 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 1627 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 1628 1629 MiscUtil.ValidateOptionIntegerValue("--torsionMaxMatches", Options["--torsionMaxMatches"], {">": 0}) 1630 MiscUtil.ValidateOptionTextValue("--torsionMinimize", Options["--torsionMinimize"], "yes no") 1631 1632 MiscUtil.ValidateOptionTextValue("--torsionRangeMode", Options["--torsionRangeMode"], "Range or Angles") 1633 TorsionRange = Options["--torsionRange"] 1634 if re.match("^Range$", Options["--torsionRangeMode"], re.I): 1635 if not re.match("^auto$", TorsionRange, re.I): 1636 MiscUtil.ValidateOptionNumberValues("--torsionRange", TorsionRange, 3, ",", "integer", {}) 1637 else: 1638 if re.match("^auto$", TorsionRange, re.I): 1639 MiscUtil.PrintError("The value, %s, specified for option \"-torsionRange\" is not valid for \"%s\" value of \"--torsionRangeMode\" option. You must specify a torsion angle or a comma delimited list of torsion angles." % (TorsionRange, Options["--torsionRangeMode"])) 1640 TorsionAngles = [] 1641 for TorsionAngle in TorsionRange.split(","): 1642 TorsionAngle = TorsionAngle.strip() 1643 if not MiscUtil.IsInteger(TorsionAngle): 1644 MiscUtil.PrintError("The value specified, %s, for option \"--torsionRange\" in string \"%s\" must be an integer." % (TorsionAngle, TorsionRange)) 1645 if TorsionAngle in TorsionAngles: 1646 MiscUtil.PrintError("The value specified, %s, for option \"--torsionRange\" in string \"%s\" is a duplicate value." % (TorsionAngle, TorsionRange)) 1647 TorsionAngles.append(TorsionAngle) 1648 1649 MiscUtil.ValidateOptionTextValue("--useChirality", Options["--useChirality"], "yes no") 1650 1651 # Setup a usage string for docopt... 1652 _docoptUsage_ = """ 1653 Psi4PerformTorsionScan.py - Perform torsion scan 1654 1655 Usage: 1656 Psi4PerformTorsionScan.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyDataFieldLabel <text>] 1657 [--energyRelativeDataFieldLabel <text>] [--energyUnits <text>] [--infile3D <yes or no>] 1658 [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>] 1659 [--modeMols <First or All>] [--modeTorsions <First or All>] [--mp <yes or no>] 1660 [--mpLevel <Molecules or TorsionAngles>] [--mpParams <Name,Value,...>] 1661 [--outfileMolName <yes or no>] [--outfileParams <Name,Value,...>] [--outPlotParams <Name,Value,...>] 1662 [--outPlotRelativeEnergy <yes or no>] [--outPlotTitleTorsionSpec <yes or no>] [--overwrite] 1663 [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 1664 [--quiet <yes or no>] [--reference <text>] [--torsionsFilterbyAtomIndices <Index1, Index2, ...>] 1665 [--torsionMaxMatches <number>] [--torsionMinimize <yes or no>] [--torsionRangeMode <Range or Angles>] 1666 [--torsionRange <Start,Stop,Step or Angle1,Angle2,...>] [--useChirality <yes or no>] 1667 [-w <dir>] -t <torsions> -i <infile> -o <outfile> 1668 Psi4PerformTorsionScan.py -h | --help | -e | --examples 1669 1670 Description: 1671 Perform torsion scan for molecules around torsion angles specified using 1672 SMILES/SMARTS patterns. A molecule is optionally minimized before performing 1673 a torsion scan using a forcefield. A set of initial 3D structures are generated for 1674 a molecule by scanning the torsion angle across the specified range and updating 1675 the 3D coordinates of the molecule. A conformation ensemble is optionally generated 1676 for each 3D structure representing a specific torsion angle using a combination of 1677 distance geometry and forcefield followed by constrained geometry optimization 1678 using a quantum chemistry method. The conformation with the lowest energy is 1679 selected to represent the torsion angle. An option is available to skip the generation 1680 of the conformation ensemble and simply calculate the energy for the initial 3D 1681 structure for a specific torsion torsion angle using a quantum chemistry method. 1682 1683 The torsions are specified using SMILES or SMARTS patterns. A substructure match 1684 is performed to select torsion atoms in a molecule. The SMILES pattern match must 1685 correspond to four torsion atoms. The SMARTS patterns containing atom map numbers 1686 may match more than four atoms. The atom map numbers, however, must match 1687 exactly four torsion atoms. For example: [s:1][c:2]([aX2,cH1])!@[CX3:3](O)=[O:4] for 1688 thiophene esters and carboxylates as specified in Torsion Library (TorLib) [Ref 146]. 1689 1690 A Psi4 XYZ format geometry string is automatically generated for each molecule 1691 in input file. It contains atom symbols and 3D coordinates for each atom in a 1692 molecule. In addition, the formal charge and spin multiplicity are present in the 1693 the geometry string. These values are either retrieved from molecule properties 1694 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 1695 molecule. 1696 1697 A set of four output files is generated for each torsion match in each 1698 molecule. The names of the output files are generated using the root of 1699 the specified output file. They may either contain sequential molecule 1700 numbers or molecule names as shown below: 1701 1702 <OutfileRoot>_Mol<Num>.sdf 1703 <OutfileRoot>_Mol<Num>_Torsion<Num>_Match<Num>.sdf 1704 <OutfileRoot>_Mol<Num>_Torsion<Num>_Match<Num>_Energies.csv 1705 <OutfileRoot>_Mol<Num>_Torsion<Num>_Match<Num>_Plot.<ImgExt> 1706 1707 or 1708 1709 <OutfileRoot>_<MolName>.sdf 1710 <OutfileRoot>_<MolName>_Torsion<Num>_Match<Num>.sdf 1711 <OutfileRoot>_<MolName>_Torsion<Num>_Match<Num>_Energies.csv 1712 <OutfileRoot>_<MolName>_Torsion<Num>_Match<Num>_Plot.<ImgExt> 1713 1714 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 1715 .csv, .tsv, .txt) 1716 1717 The supported output file formats are: SD (.sdf, .sd) 1718 1719 Options: 1720 -b, --basisSet <text> [default: auto] 1721 Basis set to use for energy calculation or constrained energy minimization. 1722 Default: 6-31+G** for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. 1723 The specified value must be a valid Psi4 basis set. No validation is performed. 1724 1725 The following list shows a representative sample of basis sets available 1726 in Psi4: 1727 1728 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 1729 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 1730 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 1731 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 1732 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 1733 1734 --confParams <Name,Value,...> [default: auto] 1735 A comma delimited list of parameter name and value pairs for generating 1736 initial 3D coordinates for molecules in input file at specific torsion angles. A 1737 conformation ensemble is optionally generated for each 3D structure 1738 representing a specific torsion angle using a combination of distance geometry 1739 and forcefield followed by constrained geometry optimization using a quantum 1740 chemistry method. The conformation with the lowest energy is selected to 1741 represent the torsion angle. 1742 1743 The supported parameter names along with their default values are shown 1744 below: 1745 1746 confMethod,ETKDGv2, 1747 forceField,MMFF, forceFieldMMFFVariant,MMFF94, 1748 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,250, 1749 maxConfsTorsions,50,useTethers,yes 1750 1751 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG, 1752 ETKDG , or ETKDGv2] 1753 forceField, MMFF [ Possible values: UFF or MMFF ] 1754 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ] 1755 enforceChirality,yes [ Possible values: yes or no ] 1756 useTethers,yes [ Possible values: yes or no ] 1757 1758 confMethod: Conformation generation methodology for generating initial 3D 1759 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental 1760 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms 1761 with Distance Geometry (KDG) and Experimental Torsion-angle preference 1762 along with basic Knowledge-terms and Distance Geometry (ETKDG or 1763 ETKDGv2) [Ref 129, 167] . 1764 1765 forceField: Forcefield method to use for energy minimization. Possible values: 1766 Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force 1767 Field [ Ref 83-87 ] . 1768 1769 enforceChirality: Enforce chirality for defined chiral centers during 1770 forcefield minimization. 1771 1772 maxConfs: Maximum number of conformations to generate for each molecule 1773 during the generation of an initial 3D conformation ensemble using a conformation 1774 generation methodology. The conformations are minimized using the specified 1775 forcefield. The lowest energy structure is selected for performing the torsion scan. 1776 1777 maxConfsTorsion: Maximum number of 3D conformations to generate for 1778 conformation ensemble representing a specific torsion. The conformations are 1779 constrained at specific torsions angles and minimized using the specified forcefield 1780 and a quantum chemistry method. The lowest energy conformation is selected to 1781 calculate final torsion energy and written to the output file. 1782 1783 embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded 1784 using distance geometry and forcefield minimization. All embedded conformers 1785 are kept for 'None' value. Otherwise, only those conformers which are different 1786 from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first 1787 embedded conformer is always retained. 1788 1789 useTethers: Use tethers to optimize the final embedded conformation by 1790 applying a series of extra forces to align matching atoms to the positions of 1791 the core atoms. Otherwise, use simple distance constraints during the 1792 optimization. 1793 --energyDataFieldLabel <text> [default: auto] 1794 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 1795 --energyRelativeDataFieldLabel <text> [default: auto] 1796 Relative energy data field label for writing energy values. Default: 1797 Psi4_Relative_Energy (<Units>). 1798 --energyUnits <text> [default: kcal/mol] 1799 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 1800 -e, --examples 1801 Print examples. 1802 -h, --help 1803 Print this help message. 1804 -i, --infile <infile> 1805 Input file name. 1806 --infile3D <yes or no> [default: no] 1807 Skip generation and minimization of initial 3D structures for molecules in 1808 input file containing 3D coordinates. 1809 --infileParams <Name,Value,...> [default: auto] 1810 A comma delimited list of parameter name and value pairs for reading 1811 molecules from files. The supported parameter names for different file 1812 formats, along with their default values, are shown below: 1813 1814 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 1815 1816 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 1817 smilesTitleLine,auto,sanitize,yes 1818 1819 Possible values for smilesDelimiter: space, comma or tab. 1820 --maxIters <number> [default: 50] 1821 Maximum number of iterations to perform for each molecule or conformer 1822 during constrained energy minimization by a quantum chemistry method. 1823 -m, --methodName <text> [default: auto] 1824 Method to use for energy calculation or constrained energy minimization. 1825 Default: B3LYP [ Ref 150 ]. The specified value must be a valid Psi4 method 1826 name. No validation is performed. 1827 1828 The following list shows a representative sample of methods available 1829 in Psi4: 1830 1831 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 1832 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 1833 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 1834 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 1835 1836 --modeMols <First or All> [default: First] 1837 Perform torsion scan for the first molecule or all molecules in input 1838 file. 1839 --modeTorsions <First or All> [default: First] 1840 Perform torsion scan for the first or all specified torsion pattern in 1841 molecules up to a maximum number of matches for each torsion 1842 specification as indicated by '--torsionMaxMatches' option. 1843 --mp <yes or no> [default: no] 1844 Use multiprocessing. 1845 1846 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 1847 function employing lazy RDKit data iterable. This allows processing of 1848 arbitrary large data sets without any additional requirements memory. 1849 1850 All input data may be optionally loaded into memory by mp.Pool.map() 1851 before starting worker processes in a process pool by setting the value 1852 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 1853 1854 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 1855 data mode may adversely impact the performance. The '--mpParams' section 1856 provides additional information to tune the value of 'chunkSize'. 1857 --mpLevel <Molecules or TorsionAngles> [default: Molecules] 1858 Perform multiprocessing at molecules or torsion angles level. Possible values: 1859 Molecules or TorsionAngles. The 'Molecules' value starts a process pool at the 1860 molecules level. All torsion angles of a molecule are processed in a single 1861 process. The 'TorsionAngles' value, however, starts a process pool at the 1862 torsion angles level. Each torsion angle in a torsion match for a molecule is 1863 processed in an individual process in the process pool. 1864 --mpParams <Name,Value,...> [default: auto] 1865 A comma delimited list of parameter name and value pairs to configure 1866 multiprocessing. 1867 1868 The supported parameter names along with their default and possible 1869 values are shown below: 1870 1871 chunkSize, auto 1872 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 1873 numProcesses, auto [ Default: mp.cpu_count() ] 1874 1875 These parameters are used by the following functions to configure and 1876 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 1877 mp.Pool.imap(). 1878 1879 The chunkSize determines chunks of input data passed to each worker 1880 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 1881 The default value of chunkSize is dependent on the value of 'inputDataMode'. 1882 1883 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 1884 automatically converts RDKit data iterable into a list, loads all data into 1885 memory, and calculates the default chunkSize using the following method 1886 as shown in its code: 1887 1888 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 1889 if extra: chunkSize += 1 1890 1891 For example, the default chunkSize will be 7 for a pool of 4 worker processes 1892 and 100 data items. 1893 1894 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 1895 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 1896 data into memory. Consequently, the size of input data is not known a priori. 1897 It's not possible to estimate an optimal value for the chunkSize. The default 1898 chunkSize is set to 1. 1899 1900 The default value for the chunkSize during 'Lazy' data mode may adversely 1901 impact the performance due to the overhead associated with exchanging 1902 small chunks of data. It is generally a good idea to explicitly set chunkSize to 1903 a larger value during 'Lazy' input data mode, based on the size of your input 1904 data and number of processes in the process pool. 1905 1906 The mp.Pool.map() function waits for all worker processes to process all 1907 the data and return the results. The mp.Pool.imap() function, however, 1908 returns the the results obtained from worker processes as soon as the 1909 results become available for specified chunks of data. 1910 1911 The order of data in the results returned by both mp.Pool.map() and 1912 mp.Pool.imap() functions always corresponds to the input data. 1913 -o, --outfile <outfile> 1914 Output file name. The output file root is used for generating the names 1915 of the output files corresponding to structures, energies, and plots during 1916 the torsion scan. 1917 --outfileMolName <yes or no> [default: no] 1918 Append molecule name to output file root during the generation of the names 1919 for output files. The default is to use <MolNum>. The non alphabetical 1920 characters in molecule names are replaced by underscores. 1921 --outfileParams <Name,Value,...> [default: auto] 1922 A comma delimited list of parameter name and value pairs for writing 1923 molecules to files. The supported parameter names for different file 1924 formats, along with their default values, are shown below: 1925 1926 SD: kekulize,yes,forceV3000,no 1927 1928 --outPlotParams <Name,Value,...> [default: auto] 1929 A comma delimited list of parameter name and value pairs for generating 1930 plots using Seaborn module. The supported parameter names along with their 1931 default values are shown below: 1932 1933 type,linepoint,outExt,svg,width,10,height,5.6, 1934 title,auto,xlabel,auto,ylabel,auto,titleWeight,bold,labelWeight,bold 1935 style,darkgrid,palette,deep,font,sans-serif,fontScale,1, 1936 context,notebook 1937 1938 Possible values: 1939 1940 type: linepoint, scatter, or line. Both points and lines are drawn 1941 for linepoint plot type. 1942 outExt: Any valid format supported by Python module Matplotlib. 1943 For example: PDF (.pdf), PNG (.png), PS (.ps), SVG (.svg) 1944 titleWeight, labelWeight: Font weight for title and axes labels. 1945 Any valid value. 1946 style: darkgrid, whitegrid, dark, white, ticks 1947 palette: deep, muted, pastel, dark, bright, colorblind 1948 font: Any valid font name 1949 context: paper, notebook, talk, poster, or any valid name 1950 1951 --outPlotRelativeEnergy <yes or no> [default: yes] 1952 Plot relative energies in the torsion plot. The minimum energy value is 1953 subtracted from energy values to calculate relative energies. 1954 --outPlotTitleTorsionSpec <yes or no> [default: yes] 1955 Append torsion specification to the title of the torsion plot. 1956 --overwrite 1957 Overwrite existing files. 1958 --precision <number> [default: 6] 1959 Floating point precision for writing energy values. 1960 --psi4OptionsParams <Name,Value,...> [default: none] 1961 A comma delimited list of Psi4 option name and value pairs for setting 1962 global and module options. The names are 'option_name' for global options 1963 and 'module_name__option_name' for options local to a module. The 1964 specified option names must be valid Psi4 names. No validation is 1965 performed. 1966 1967 The specified option name and value pairs are processed and passed to 1968 psi4.set_options() as a dictionary. The supported value types are float, 1969 integer, boolean, or string. The float value string is converted into a float. 1970 The valid values for a boolean string are yes, no, true, false, on, or off. 1971 --psi4RunParams <Name,Value,...> [default: auto] 1972 A comma delimited list of parameter name and value pairs for configuring 1973 Psi4 jobs. 1974 1975 The supported parameter names along with their default and possible 1976 values are shown below: 1977 1978 MemoryInGB, 1 1979 NumThreads, 1 1980 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 1981 ScratchDir, auto [ Possivle values: DirName] 1982 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 1983 1984 These parameters control the runtime behavior of Psi4. 1985 1986 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 1987 is appended to output file name during multiprocessing as shown: 1988 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 1989 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 1990 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 1991 'Conformers' of '--mpLevel' option. 1992 1993 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 1994 of any existing Psi4 before creating new files to append output from 1995 multiple Psi4 runs. 1996 1997 The option 'ScratchDir' is a directory path to the location of scratch 1998 files. The default value corresponds to Psi4 default. It may be used to 1999 override the deafult path. 2000 -q, --quiet <yes or no> [default: no] 2001 Use quiet mode. The warning and information messages will not be printed. 2002 --reference <text> [default: auto] 2003 Reference wave function to use for energy calculation or constrained energy 2004 minimization. Default: RHF or UHF. The default values are Restricted Hartree-Fock 2005 (RHF) for closed-shell molecules with all electrons paired and Unrestricted 2006 Hartree-Fock (UHF) for open-shell molecules with unpaired electrons. 2007 2008 The specified value must be a valid Psi4 reference wave function. No validation 2009 is performed. For example: ROHF, CUHF, RKS, etc. 2010 2011 The spin multiplicity determines the default value of reference wave function 2012 for input molecules. It is calculated from number of free radical electrons using 2013 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 2014 electron spin. The total spin is 1/2 the number of free radical electrons in a 2015 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 2016 over the calculated value of spin multiplicity. 2017 -t, --torsions <SMILES/SMARTS,...,...> 2018 SMILES/SMARTS patterns corresponding to torsion specifications. It's a 2019 comma delimited list of valid SMILES/SMART patterns. 2020 2021 A substructure match is performed to select torsion atoms in a molecule. 2022 The SMILES pattern match must correspond to four torsion atoms. The 2023 SMARTS patterns containing atom map numbers may match more than four 2024 atoms. The atom map numbers, however, must match exactly four torsion 2025 atoms. For example: [s:1][c:2]([aX2,cH1])!@[CX3:3](O)=[O:4] for thiophene 2026 esters and carboxylates as specified in Torsion Library (TorLib) [Ref 146]. 2027 --torsionsFilterbyAtomIndices <Index1, Index2, ...> [default: none] 2028 Comma delimited list of atom indices for filtering torsion matches 2029 corresponding to torsion specifications "-t, --torsions". The atom indices 2030 must be valid. No explicit validation is performed. The list must contain at 2031 least 4 atom indices. 2032 2033 The torsion atom indices, matched by "-t, --torsions" specifications, must be 2034 present in the list. Otherwise, the torsion matches are ignored. 2035 --torsionMaxMatches <number> [default: 5] 2036 Maximum number of torsions to match for each torsion specification in a 2037 molecule. 2038 --torsionMinimize <yes or no> [default: no] 2039 Perform constrained energy minimization on a conformation ensemble 2040 for a specific torsion angle and select the lowest energy conformation 2041 representing the torsion angle. A conformation ensemble is generated for 2042 each 3D structure representing a specific torsion angle using a combination 2043 of distance geometry and forcefield followed by constrained geometry 2044 optimization using a quantum chemistry method. 2045 --torsionRangeMode <Range or Angles> [default: Range] 2046 Perform torsion scan using torsion angles corresponding to a torsion range 2047 or an explicit list of torsion angles. Possible values: Range or Angles. You 2048 may use '--torsionRange' option to specify values for torsion angle or 2049 torsion angles. 2050 --torsionRange <Start,Stop,Step or Angle1,Angle2...> [default: auto] 2051 Start, stop, and step size angles or a comma delimited list of angles in 2052 degrees for a torsion scan. 2053 2054 This value is '--torsionRangeMode' specific. It must be a triplet corresponding 2055 to 'start,Stop,Step' for 'Range' value of '--torsionRange' option. Otherwise, it 2056 is comma delimited list of one or more torsion angles for 'Angles' value of 2057 '--torsionRange' option. 2058 2059 The default values, based on '--torsionRangeMode' option, are shown below: 2060 2061 TorsionRangeMode Default value 2062 Range 0,360,5 2063 Angles None 2064 2065 You must explicitly provide a list of torsion angle(s) for 'Angles' of 2066 '--torsionRangeMode' option. 2067 --useChirality <yes or no> [default: no] 2068 Use chirrality during substructure matches for identification of torsions. 2069 -w, --workingdir <dir> 2070 Location of working directory which defaults to the current directory. 2071 2072 Examples: 2073 To perform a torsion scan on the first molecule in a SMILES file using a minimum 2074 energy structure of the molecule selected from an initial ensemble of conformations 2075 generated using distance geometry and forcefield, skip generation of conformation 2076 ensembles for specific torsion angles and constrained energy minimization of the 2077 ensemble, calculating single point at a specific torsion angle energy using B3LYP/6-31G** 2078 and B3LYP/6-31+G** for non-sulfur and sulfur containing molecules, generate output files 2079 corresponding to structure, energy and torsion plot, type: 2080 2081 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2082 -o SampleOut.sdf 2083 2084 To run the previous example for performing a torsion scan using a specific list 2085 of torsion angles, type: 2086 2087 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2088 -o SampleOut.sdf --torsionRangeMode Angles 2089 --torsionRange "0,180,360" 2090 2091 To run the previous example on the first molecule in a SD file containing 3D 2092 coordinates and skip the generations of initial 3D structure, type: 2093 2094 % Psi4PerformTorsionScan.py -t "CCCC" --infile3D yes 2095 -i Psi4SampleTorsionScan3D.sdf -o SampleOut.sdf 2096 2097 To run the first example on all molecules in a SD file, type: 2098 2099 % Psi4PerformTorsionScan.py -t "CCCC" --modeMols All 2100 -i Psi4SampleTorsionScan.sdf -o SampleOut.sdf 2101 2102 To run the first example on all molecules in a SD file containing 3D 2103 coordinates and skip the generation of initial 3D structures, type: 2104 2105 % Psi4PerformTorsionScan.py -t "CCCC" --infile3D yes 2106 --modeMols All -i Psi4SampleTorsionScan3D.sdf -o SampleOut.sdf 2107 2108 To perform a torsion scan on the first molecule in a SMILES file using a minimum 2109 energy structure of the molecule selected from an initial ensemble of conformations 2110 generated using distance geometry and forcefield, generate up to 50 conformations 2111 for specific torsion angles using ETKDGv2 methodology followed by initial MMFF 2112 forcefield minimization and final energy minimization using B3LYP/6-31G** and 2113 B3LYP/6-31+G** for non-sulfur and sulfur containing molecules, generate output files 2114 corresponding to minimum energy structure, energy and torsion plot, type: 2115 2116 % Psi4PerformTorsionScan.py -t "CCCC" --torsionMinimize Yes 2117 -i Psi4SampleTorsionScan.smi -o SampleOut.sdf 2118 2119 To run the previous example on all molecules in a SD file, type: 2120 2121 % Psi4PerformTorsionScan.py -t "CCCC" --modeMols All 2122 --torsionMinimize Yes -i Psi4SampleTorsionScan.sdf -o SampleOut.sdf 2123 2124 To run the previous example on all molecules in a SD file containing 3D 2125 coordinates and skip the generation of initial 3D structures, type: 2126 2127 % Psi4PerformTorsionScan.py -t "CCCC" --modeMols All 2128 --infile3D yes --modeMols All --torsionMinimize Yes 2129 -i Psi4SampleTorsionScan.sdf -o SampleOut.sdf 2130 2131 To run the previous example in multiprocessing mode at molecules level 2132 on all available CPUs without loading all data into memory and write out 2133 a SD file, type: 2134 2135 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2136 -o SampleOut.sdf --modeMols All --torsionMinimize Yes --mp yes 2137 2138 To run the previous example in multiprocessing mode at torsion angles level 2139 on all available CPUs without loading all data into memory and write out 2140 a SD file, type: 2141 2142 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2143 -o SampleOut.sdf --modeMols All --torsionMinimize Yes --mp yes 2144 --mpLevel TorsionAngles 2145 2146 To run the previous example in multiprocessing mode on all available CPUs 2147 by loading all data into memory and write out a SD file, type: 2148 2149 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2150 -o SampleOut.sdf --modeMols All --torsionMinimize Yes --mp yes 2151 --mpParams "inputDataMode,InMemory" 2152 2153 To run the previous example in multiprocessing mode on specific number of 2154 CPUs and chunk size without loading all data into memory and write out a SD file, 2155 type: 2156 2157 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2158 -o SampleOut.sdf --modeMols All --torsionMinimize Yes --mp yes 2159 --mpParams "inputDataMode,Lazy,numProcesses,4,chunkSize,8" 2160 2161 Author: 2162 Manish Sud(msud@san.rr.com) 2163 2164 See also: 2165 Psi4CalculateEnergy.py, Psi4GenerateConformers.py, 2166 Psi4GenerateConstrainedConformers.py, Psi4PerformConstrainedMinimization.py 2167 2168 Copyright: 2169 Copyright (C) 2024 Manish Sud. All rights reserved. 2170 2171 The functionality available in this script is implemented using RDKit, an 2172 open source toolkit for cheminformatics developed by Greg Landrum. 2173 2174 This file is part of MayaChemTools. 2175 2176 MayaChemTools is free software; you can redistribute it and/or modify it under 2177 the terms of the GNU Lesser General Public License as published by the Free 2178 Software Foundation; either version 3 of the License, or (at your option) any 2179 later version. 2180 2181 """ 2182 2183 if __name__ == "__main__": 2184 main()