1 #!/bin/env python 2 # 3 # File: Psi4PerformMinimization.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using Psi4, an 9 # open source quantum chemistry software package, and RDKit, an open 10 # source toolkit for cheminformatics developed by Greg Landrum. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 import shutil 38 import multiprocessing as mp 39 40 # Psi4 imports... 41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None): 42 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 43 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 44 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 45 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 46 sys.stderr.write("Psi4 environment and try again.\n\n") 47 48 # RDKit imports... 49 try: 50 from rdkit import rdBase 51 from rdkit import Chem 52 from rdkit.Chem import AllChem 53 except ImportError as ErrMsg: 54 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 55 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 56 sys.exit(1) 57 58 # MayaChemTools imports... 59 try: 60 from docopt import docopt 61 import MiscUtil 62 import Psi4Util 63 import RDKitUtil 64 except ImportError as ErrMsg: 65 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 66 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 67 sys.exit(1) 68 69 ScriptName = os.path.basename(sys.argv[0]) 70 Options = {} 71 OptionsInfo = {} 72 73 def main(): 74 """Start execution of the script.""" 75 76 MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 77 78 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 79 80 # Retrieve command line arguments and options... 81 RetrieveOptions() 82 83 # Process and validate command line arguments and options... 84 ProcessOptions() 85 86 # Perform actions required by the script... 87 PerformMinimization() 88 89 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 90 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 91 92 def PerformMinimization(): 93 """Perform minimization.""" 94 95 # Setup a molecule reader... 96 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 97 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 98 99 # Set up a molecule writer... 100 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 101 if Writer is None: 102 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 103 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 104 105 MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(Mols, Writer) 106 107 if Writer is not None: 108 Writer.close() 109 110 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 111 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 112 MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount) 113 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount) 114 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount + WriteFailedCount)) 115 116 def ProcessMolecules(Mols, Writer): 117 """Process and minimize molecules.""" 118 119 if OptionsInfo["MPMode"]: 120 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer) 121 else: 122 return ProcessMoleculesUsingSingleProcess(Mols, Writer) 123 124 def ProcessMoleculesUsingSingleProcess(Mols, Writer): 125 """Process and minimize molecules using a single process.""" 126 127 # Intialize Psi4... 128 MiscUtil.PrintInfo("\nInitializing Psi4...") 129 Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True) 130 if OptionsInfo["Psi4DDXSolvationMode"]: 131 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"]) 132 OptionsInfo["psi4"] = Psi4Handle 133 134 # Setup max iterations global variable... 135 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 136 137 # Setup conversion factor for energy units... 138 SetupEnergyConversionFactor(Psi4Handle) 139 140 if OptionsInfo["SkipConformerGeneration"]: 141 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...") 142 else: 143 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...") 144 145 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4 146 for Mol in Mols: 147 MolCount += 1 148 149 if not CheckAndValidateMolecule(Mol, MolCount): 150 continue 151 152 # Setup 2D coordinates for SMILES input file... 153 if OptionsInfo["SMILESInfileStatus"]: 154 AllChem.Compute2DCoords(Mol) 155 156 ValidMolCount += 1 157 158 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolCount) 159 160 if not CalcStatus: 161 if not OptionsInfo["QuietMode"]: 162 MolName = RDKitUtil.GetMolName(Mol, MolCount) 163 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName) 164 165 MinimizationFailedCount += 1 166 continue 167 168 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy) 169 170 if not WriteStatus: 171 WriteFailedCount += 1 172 173 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) 174 175 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): 176 """Process and minimize molecules using multiprocessing.""" 177 178 if OptionsInfo["MPLevelConformersMode"]: 179 return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer) 180 elif OptionsInfo["MPLevelMoleculesMode"]: 181 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer) 182 else: 183 MiscUtil.PrintError("The value, %s, option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"])) 184 185 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer): 186 """Process and minimize molecules using multiprocessing at molecules level.""" 187 188 if OptionsInfo["SkipConformerGeneration"]: 189 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers using multiprocessing at molecules level...") 190 else: 191 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing at molecules level...") 192 193 MPParams = OptionsInfo["MPParams"] 194 195 # Setup data for initializing a worker process... 196 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 197 198 # Setup a encoded mols data iterable for a worker process... 199 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 200 201 # Setup process pool along with data initialization for each process... 202 if not OptionsInfo["QuietMode"]: 203 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 204 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 205 206 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 207 208 # Start processing... 209 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 210 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 211 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 212 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 213 else: 214 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 215 216 # Print out Psi4 version in the main process... 217 MiscUtil.PrintInfo("\nInitializing Psi4...\n") 218 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False) 219 OptionsInfo["psi4"] = Psi4Handle 220 221 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4 222 for Result in Results: 223 MolCount += 1 224 MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result 225 226 if EncodedMol is None: 227 continue 228 ValidMolCount += 1 229 230 if not CalcStatus: 231 MinimizationFailedCount += 1 232 continue 233 234 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 235 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy) 236 237 if not WriteStatus: 238 WriteFailedCount += 1 239 240 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) 241 242 def InitializeWorkerProcess(*EncodedArgs): 243 """Initialize data for a worker process.""" 244 245 global Options, OptionsInfo 246 247 if not OptionsInfo["QuietMode"]: 248 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 249 250 # Decode Options and OptionInfo... 251 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 252 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 253 254 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 255 # output files for each process... 256 OptionsInfo["Psi4Initialized"] = False 257 258 def WorkerProcess(EncodedMolInfo): 259 """Process data for a worker process.""" 260 261 if not OptionsInfo["Psi4Initialized"]: 262 InitializePsi4ForWorkerProcess() 263 264 MolIndex, EncodedMol = EncodedMolInfo 265 266 MolNum = MolIndex + 1 267 CalcStatus = False 268 ConfID = None 269 Energy = None 270 SolvationEnergy = None 271 272 if EncodedMol is None: 273 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy] 274 275 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 276 277 if not CheckAndValidateMolecule(Mol, MolNum): 278 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy] 279 280 # Setup 2D coordinates for SMILES input file... 281 if OptionsInfo["SMILESInfileStatus"]: 282 AllChem.Compute2DCoords(Mol) 283 284 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(OptionsInfo["psi4"], Mol, MolNum) 285 286 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy, SolvationEnergy] 287 288 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer): 289 """Process and minimize molecules using multiprocessing at conformers level.""" 290 291 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing at conformers level...") 292 293 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4 294 for Mol in Mols: 295 MolCount += 1 296 297 if not CheckAndValidateMolecule(Mol, MolCount): 298 continue 299 300 # Setup 2D coordinates for SMILES input file... 301 if OptionsInfo["SMILESInfileStatus"]: 302 AllChem.Compute2DCoords(Mol) 303 304 ValidMolCount += 1 305 306 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = ProcessConformersUsingMultipleProcesses(Mol, MolCount) 307 308 if not CalcStatus: 309 if not OptionsInfo["QuietMode"]: 310 MolName = RDKitUtil.GetMolName(Mol, MolCount) 311 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName) 312 313 MinimizationFailedCount += 1 314 continue 315 316 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy) 317 318 if not WriteStatus: 319 WriteFailedCount += 1 320 321 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) 322 323 def ProcessConformersUsingMultipleProcesses(Mol, MolNum): 324 """Generate conformers and minimize them using multiple processes. """ 325 326 # Add hydrogens... 327 Mol = Chem.AddHs(Mol) 328 329 # Setup conformers... 330 ConfIDs = EmbedMolecule(Mol, MolNum) 331 if not len(ConfIDs): 332 if not OptionsInfo["QuietMode"]: 333 MolName = RDKitUtil.GetMolName(Mol, MolNum) 334 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName) 335 return (Mol, False, None, None, None) 336 337 MPParams = OptionsInfo["MPParams"] 338 339 # Setup data for initializing a worker process... 340 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 341 342 # Setup a encoded mols data iterable for a worker process... 343 MolIndex = MolNum - 1 344 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringWithConfIDs(Mol, MolIndex, ConfIDs) 345 346 # Setup process pool along with data initialization for each process... 347 if not OptionsInfo["QuietMode"]: 348 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 349 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 350 351 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs) 352 353 # Start processing... 354 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 355 Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 356 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 357 Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 358 else: 359 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 360 361 CalcEnergyMap = {} 362 CalcSolvationEnergyMap = {} 363 CalcFailedCount = 0 364 for Result in Results: 365 MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result 366 367 if EncodedMol is None: 368 CalcFailedCount += 1 369 continue 370 371 if not CalcStatus: 372 CalcFailedCount += 1 373 continue 374 375 # Retrieve minimized atom positions... 376 MinimizedMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 377 AtomPositions = RDKitUtil.GetAtomPositions(MinimizedMol, ConfID = ConfID) 378 379 # Update atom positions... 380 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID) 381 382 CalcEnergyMap[ConfID] = Energy 383 CalcSolvationEnergyMap[ConfID] = SolvationEnergy 384 385 if CalcFailedCount: 386 return (Mol, False, None, None, None) 387 388 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 389 MinEnergyConfID = SortedConfIDs[0] 390 MinEnergy = CalcEnergyMap[MinEnergyConfID] 391 SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID] 392 393 return (Mol, True, MinEnergyConfID, MinEnergy, SolvationEnergy) 394 395 def InitializeConformerWorkerProcess(*EncodedArgs): 396 """Initialize data for a conformer worker process.""" 397 398 global Options, OptionsInfo 399 400 if not OptionsInfo["QuietMode"]: 401 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 402 403 # Decode Options and OptionInfo... 404 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 405 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 406 407 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 408 # output files for each process... 409 OptionsInfo["Psi4Initialized"] = False 410 411 def ConformerWorkerProcess(EncodedMolInfo): 412 """Process data for a conformer worker process.""" 413 414 if not OptionsInfo["Psi4Initialized"]: 415 InitializePsi4ForWorkerProcess() 416 417 MolIndex, EncodedMol, ConfID = EncodedMolInfo 418 419 MolNum = MolIndex + 1 420 421 CalcStatus = False 422 Energy = None 423 SolvationEnergy = None 424 425 if EncodedMol is None: 426 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy] 427 428 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 429 MolName = RDKitUtil.GetMolName(Mol, MolNum) 430 431 if not OptionsInfo["QuietMode"]: 432 MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID)) 433 434 Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID) 435 if not Status: 436 return [MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy] 437 438 if ConvergeStatus != 0: 439 if not OptionsInfo["QuietMode"]: 440 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 441 MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])) 442 443 # Perform Psi4 minimization... 444 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], Mol, MolNum, ConfID) 445 if not CalcStatus: 446 if not OptionsInfo["QuietMode"]: 447 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 448 return [MolIndex, EncodedMol, False, ConfID, Energy, SolvationEnergy] 449 450 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy, SolvationEnergy] 451 452 def InitializePsi4ForWorkerProcess(): 453 """Initialize Psi4 for a worker process.""" 454 455 if OptionsInfo["Psi4Initialized"]: 456 return 457 458 OptionsInfo["Psi4Initialized"] = True 459 460 if OptionsInfo["MPLevelConformersMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I): 461 # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile... 462 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet" 463 else: 464 # Update output file... 465 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()) 466 467 # Intialize Psi4... 468 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True) 469 470 if OptionsInfo["Psi4DDXSolvationMode"]: 471 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"]) 472 473 # Setup max iterations global variable... 474 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 475 476 # Setup conversion factor for energy units... 477 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 478 479 def MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolNum): 480 """Minimize molecule or conformers.""" 481 482 ConfID = None 483 if OptionsInfo["SkipConformerGeneration"]: 484 Mol, CalcStatus, Energy, SolvationEnergy = MinimizeMolecule(Psi4Handle, Mol, MolNum) 485 else: 486 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum) 487 488 return (Mol, CalcStatus, ConfID, Energy, SolvationEnergy) 489 490 def GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum = None): 491 """Generate and minimize conformers for a molecule to get the lowest energy conformer.""" 492 493 # Add hydrogens.. 494 Mol = Chem.AddHs(Mol) 495 496 MolName = RDKitUtil.GetMolName(Mol, MolNum) 497 498 # Setup conformers... 499 ConfIDs = EmbedMolecule(Mol, MolNum) 500 if not len(ConfIDs): 501 if not OptionsInfo["QuietMode"]: 502 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName) 503 return (Mol, False, None, None, None) 504 505 # Minimize conformers... 506 CalcEnergyMap = {} 507 CalcSolvationEnergyMap = {} 508 for ConfID in ConfIDs: 509 if not OptionsInfo["QuietMode"]: 510 MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID)) 511 512 # Perform forcefield minimization... 513 Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID) 514 if not Status: 515 return (Mol, False, None, None, None) 516 517 if ConvergeStatus != 0: 518 if not OptionsInfo["QuietMode"]: 519 MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])) 520 521 # Perform Psi4 minimization... 522 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID) 523 if not CalcStatus: 524 if not OptionsInfo["QuietMode"]: 525 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 526 return (Mol, False, None, None, None) 527 528 CalcEnergyMap[ConfID] = Energy 529 CalcSolvationEnergyMap[ConfID] = SolvationEnergy 530 531 # Select the lowest energy conformer.. 532 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 533 MinEnergyConfID = SortedConfIDs[0] 534 Energy = CalcEnergyMap[MinEnergyConfID] 535 SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID] 536 537 return (Mol, True, MinEnergyConfID, Energy, SolvationEnergy) 538 539 def MinimizeMolecule(Psi4Handle, Mol, MolNum): 540 """Minimize molecule.""" 541 542 # Skip conformation generation and forcefield minimization... 543 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum) 544 545 return (Mol, CalcStatus, Energy, SolvationEnergy) 546 547 def MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID = -1): 548 """Minimize molecule using Psi4.""" 549 550 # Setup a Psi4Mol... 551 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID) 552 if Psi4Mol is None: 553 return (False, None, None) 554 555 # Setup reference wave function... 556 Reference = SetupReferenceWavefunction(Mol) 557 Psi4Handle.set_options({'Reference': Reference}) 558 559 # Setup method name and basis set... 560 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 561 562 # Optimize geometry... 563 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"]) 564 565 if not Status: 566 PerformPsi4Cleanup(Psi4Handle) 567 return (False, None, None) 568 569 SolvationEnergy = None 570 if OptionsInfo["Psi4DDXSolvationMode"]: 571 SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction) 572 573 # Update atom positions... 574 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True) 575 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID) 576 577 # Convert energy units... 578 if OptionsInfo["ApplyEnergyConversionFactor"]: 579 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 580 if SolvationEnergy is not None: 581 SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"] 582 583 # Clean up 584 PerformPsi4Cleanup(Psi4Handle) 585 586 return (True, Energy, SolvationEnergy) 587 588 def RetrievePsi4DDXSolvationEnergy(WaveFunction): 589 """Retrieve DDX solvation energy.""" 590 591 SolvationEnergy = None 592 try: 593 SolvationEnergy = WaveFunction.variable('DD Solvation Energy') 594 except Exception as ErrMsg: 595 SolvationEnergy = None 596 if not OptionsInfo["QuietMode"]: 597 MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg) 598 599 return SolvationEnergy 600 601 def MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID = -1): 602 """Minimize molecule using forcefield available in RDKit.""" 603 604 try: 605 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 606 ConvergeStatus = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"]) 607 elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]: 608 ConvergeStatus = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"], mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) 609 else: 610 MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ConfGenerationParams"]["ForceField"]) 611 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 612 if not OptionsInfo["QuietMode"]: 613 MolName = RDKitUtil.GetMolName(Mol, MolNum) 614 MiscUtil.PrintWarning("Minimization using forcefield couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 615 return (False, None) 616 617 return (True, ConvergeStatus) 618 619 def EmbedMolecule(Mol, MolNum = None): 620 """Embed conformations.""" 621 622 ConfIDs = [] 623 624 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"] 625 RandomSeed = OptionsInfo["ConfGenerationParams"]["RandomSeed"] 626 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"] 627 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"] 628 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"] 629 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"] 630 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"] 631 632 try: 633 ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, pruneRmsThresh = EmbedRMSDCutoff, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 634 except ValueError as ErrMsg: 635 if not OptionsInfo["QuietMode"]: 636 MolName = RDKitUtil.GetMolName(Mol, MolNum) 637 MiscUtil.PrintWarning("Embedding failed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 638 ConfIDs = [] 639 640 if not OptionsInfo["QuietMode"]: 641 if EmbedRMSDCutoff > 0: 642 MiscUtil.PrintInfo("Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, MaxConfs, len(ConfIDs))) 643 else: 644 MiscUtil.PrintInfo("Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(ConfIDs))) 645 646 return ConfIDs 647 648 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1): 649 """Setup a Psi4 molecule object.""" 650 651 if OptionsInfo["RecenterAndReorient"]: 652 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = False, NoReorient = False) 653 else: 654 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True) 655 656 try: 657 Psi4Mol = Psi4Handle.geometry(MolGeometry) 658 except Exception as ErrMsg: 659 Psi4Mol = None 660 if not OptionsInfo["QuietMode"]: 661 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 662 MolName = RDKitUtil.GetMolName(Mol, MolNum) 663 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 664 665 if OptionsInfo["Symmetrize"]: 666 Psi4Mol.symmetrize(OptionsInfo["SymmetrizeTolerance"]) 667 668 return Psi4Mol 669 670 def PerformPsi4Cleanup(Psi4Handle): 671 """Perform clean up.""" 672 673 # Clean up after Psi4 run... 674 Psi4Handle.core.clean() 675 676 # Clean up any leftover scratch files... 677 if OptionsInfo["MPMode"]: 678 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 679 680 def CheckAndValidateMolecule(Mol, MolCount = None): 681 """Validate molecule for Psi4 calculations.""" 682 683 if Mol is None: 684 if not OptionsInfo["QuietMode"]: 685 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 686 return False 687 688 MolName = RDKitUtil.GetMolName(Mol, MolCount) 689 if not OptionsInfo["QuietMode"]: 690 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 691 692 if RDKitUtil.IsMolEmpty(Mol): 693 if not OptionsInfo["QuietMode"]: 694 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 695 return False 696 697 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 698 if not OptionsInfo["QuietMode"]: 699 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 700 return False 701 702 # Check for 3D flag... 703 if OptionsInfo["SkipConformerGeneration"]: 704 if not Mol.GetConformer().Is3D(): 705 if not OptionsInfo["QuietMode"]: 706 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName) 707 708 # Check for missing hydrogens... 709 if OptionsInfo["SkipConformerGeneration"]: 710 # Otherwise, Hydrogens are always added... 711 if RDKitUtil.AreHydrogensMissingInMolecule(Mol): 712 if not OptionsInfo["QuietMode"]: 713 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName) 714 715 return True 716 717 def SetupMethodNameAndBasisSet(Mol): 718 """Setup method name and basis set.""" 719 720 MethodName = OptionsInfo["MethodName"] 721 if OptionsInfo["MethodNameAuto"]: 722 MethodName = "B3LYP" 723 724 BasisSet = OptionsInfo["BasisSet"] 725 if OptionsInfo["BasisSetAuto"]: 726 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 727 728 return (MethodName, BasisSet) 729 730 def SetupReferenceWavefunction(Mol): 731 """Setup reference wavefunction.""" 732 733 Reference = OptionsInfo["Reference"] 734 if OptionsInfo["ReferenceAuto"]: 735 Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF' 736 737 return Reference 738 739 def SetupEnergyConversionFactor(Psi4Handle): 740 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 741 742 EnergyUnits = OptionsInfo["EnergyUnits"] 743 744 ApplyConversionFactor = True 745 if re.match("^kcal\/mol$", EnergyUnits, re.I): 746 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 747 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 748 ConversionFactor = Psi4Handle.constants.hartree2kJmol 749 elif re.match("^eV$", EnergyUnits, re.I): 750 ConversionFactor = Psi4Handle.constants.hartree2ev 751 else: 752 ApplyConversionFactor = False 753 ConversionFactor = 1.0 754 755 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 756 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 757 758 def WriteMolecule(Writer, Mol, MolNum = None, ConfID = None, Energy = None, SolvationEnergy = None): 759 """Write molecule.""" 760 761 if OptionsInfo["EnergyOut"] and Energy is not None: 762 Energy = "%.*f" % (OptionsInfo["Precision"], Energy) 763 Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy) 764 765 if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None: 766 SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy) 767 Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy) 768 769 try: 770 if ConfID is None: 771 Writer.write(Mol) 772 else: 773 Writer.write(Mol, confId = ConfID) 774 except (ValueError, RuntimeError) as ErrMsg: 775 if not OptionsInfo["QuietMode"]: 776 MolName = RDKitUtil.GetMolName(Mol, MolNum) 777 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg)) 778 return False 779 780 return True 781 782 def CheckAndSetupDDX(): 783 """Load pyddx and return its handle.""" 784 785 try: 786 import pyddx 787 except ImportError as ErrMsg: 788 sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg) 789 sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n") 790 sys.stderr.write("You may need to install it from pypi or conda-forge.\n") 791 sys.stderr.write("Check/update your environment and try again.\n\n") 792 sys.exit(1) 793 794 return pyddx 795 796 def ProcessDDXListSolventsOption(): 797 """List solvent information provided by tge DDX module for the calculation 798 of solvation energy.""" 799 800 PyDDXHandle = CheckAndSetupDDX() 801 802 MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n") 803 MiscUtil.PrintInfo("Solvent\tEpsilon") 804 Count = 0 805 for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()): 806 Count += 1 807 Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent] 808 MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon)) 809 810 MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count) 811 812 def ProcessOptions(): 813 """Process and validate command line arguments and options.""" 814 815 MiscUtil.PrintInfo("Processing options...") 816 817 # Validate options... 818 ValidateOptions() 819 820 OptionsInfo["Infile"] = Options["--infile"] 821 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False 822 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 823 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 824 825 OptionsInfo["Outfile"] = Options["--outfile"] 826 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 827 828 OptionsInfo["Overwrite"] = Options["--overwrite"] 829 830 # Method, basis set, and reference wavefunction... 831 OptionsInfo["BasisSet"] = Options["--basisSet"] 832 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 833 834 OptionsInfo["MethodName"] = Options["--methodName"] 835 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 836 837 OptionsInfo["Reference"] = Options["--reference"] 838 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 839 840 # Run and options parameters... 841 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"]) 842 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"]) 843 844 # DDX solvation parameters... 845 OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False 846 Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters("--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"]) 847 Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams) 848 OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams 849 OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions 850 851 if OptionsInfo["Psi4DDXSolvationMode"]: 852 CheckAndSetupDDX() 853 854 # Conformer generation paramaters... 855 OptionsInfo["ConfGeneration"] = Options["--confGeneration"] 856 OptionsInfo["SkipConformerGeneration"] = False if re.match("^yes$", Options["--confGeneration"], re.I) else True 857 858 if OptionsInfo["SkipConformerGeneration"]: 859 if OptionsInfo["SMILESInfileStatus"]: 860 MiscUtil.PrintError("The value, %s, specified for option \"-c --confGeneration\" is not allowed during, %s, value for option \"-i, --infile\" . The input file must be a 3D SD or MOL file. " % (Options["--confGeneration"], Options["--infile"])) 861 862 ParamsDefaultInfoOverride = {"MaxConfs": 50, "MaxIters": 250} 863 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride) 864 865 # Write energy parameters... 866 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False 867 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 868 869 EnergyDataFieldLabel = Options["--energyDataFieldLabel"] 870 if re.match("^auto$", EnergyDataFieldLabel, re.I): 871 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"] 872 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel 873 874 SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"] 875 OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel 876 877 OptionsInfo["MaxIters"] = int(Options["--maxIters"]) 878 879 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 880 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 881 882 # Multiprocessing level... 883 MPLevelMoleculesMode = False 884 MPLevelConformersMode = False 885 MPLevel = Options["--mpLevel"] 886 if re.match("^Molecules$", MPLevel, re.I): 887 MPLevelMoleculesMode = True 888 elif re.match("^Conformers$", MPLevel, re.I): 889 if OptionsInfo["SkipConformerGeneration"]: 890 MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not allowed during, %s, value of option \"--confGeneration\" . " % (MPLevel, Options["--confGeneration"])) 891 MPLevelConformersMode = True 892 else: 893 MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel) 894 OptionsInfo["MPLevel"] = MPLevel 895 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode 896 OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode 897 898 OptionsInfo["Precision"] = int(Options["--precision"]) 899 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 900 901 RecenterAndReorient = Options["--recenterAndReorient"] 902 if re.match("^auto$", RecenterAndReorient, re.I): 903 RecenterAndReorient = False if OptionsInfo["SkipConformerGeneration"] else True 904 else: 905 RecenterAndReorient = True if re.match("^yes$", RecenterAndReorient, re.I) else False 906 OptionsInfo["RecenterAndReorient"] = RecenterAndReorient 907 908 Symmetrize = Options["--symmetrize"] 909 if re.match("^auto$", Symmetrize, re.I): 910 Symmetrize = True if RecenterAndReorient else False 911 else: 912 Symmetrize = True if re.match("^yes$", Symmetrize, re.I) else False 913 OptionsInfo["Symmetrize"] = Symmetrize 914 915 OptionsInfo["SymmetrizeTolerance"] = float(Options["--symmetrizeTolerance"]) 916 917 def RetrieveOptions(): 918 """Retrieve command line arguments and options.""" 919 920 # Get options... 921 global Options 922 Options = docopt(_docoptUsage_) 923 924 # Set current working directory to the specified directory... 925 WorkingDir = Options["--workingdir"] 926 if WorkingDir: 927 os.chdir(WorkingDir) 928 929 # Handle examples option... 930 if "--examples" in Options and Options["--examples"]: 931 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 932 sys.exit(0) 933 934 # Handle listing of solvent information... 935 if Options and Options["--psi4DDXListSolvents"]: 936 ProcessDDXListSolventsOption() 937 sys.exit(0) 938 939 def ValidateOptions(): 940 """Validate option values.""" 941 942 MiscUtil.ValidateOptionTextValue("-c, --confGeneration", Options["--confGeneration"], "yes no") 943 944 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no") 945 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 946 947 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 948 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 949 950 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0}) 951 952 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 953 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 954 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 955 956 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 957 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers") 958 959 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 960 961 MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no") 962 963 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 964 965 MiscUtil.ValidateOptionTextValue("--recenterAndReorient", Options["--recenterAndReorient"], "yes no auto") 966 MiscUtil.ValidateOptionTextValue("--symmetrize", Options["--symmetrize"], "yes no auto") 967 MiscUtil.ValidateOptionFloatValue("--symmetrizeTolerance", Options["--symmetrizeTolerance"], {">": 0}) 968 969 # Setup a usage string for docopt... 970 _docoptUsage_ = """ 971 Psi4PerformMinimization.py - Perform structure minimization 972 973 Usage: 974 Psi4PerformMinimization.py [--basisSet <text>] [--confGeneration <yes or no> ] [--confParams <Name,Value,...>] 975 [--energyOut <yes or no>] [--energyDataFieldLabel <text>] [--energyUnits <text>] 976 [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>] 977 [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>] 978 [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ] 979 [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 980 [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>] 981 [--quiet <yes or no>] [--reference <text>] [--recenterAndReorient <yes or no>] 982 [--symmetrize <yes or no>] [--symmetrizeTolerance <number>] [-w <dir>] -i <infile> -o <outfile> 983 Psi4PerformMinimization.py --psi4DDXListSolvents 984 Psi4PerformMinimization.py -h | --help | -e | --examples 985 986 Description: 987 Generate 3D structures for molecules using a combination of distance geometry 988 and forcefield minimization followed by geometry optimization using a quantum 989 chemistry method. A set of initial 3D structures are generated for a molecule 990 employing distance geometry. The 3D structures in the conformation ensemble 991 are sequentially minimized using forcefield and a quantum chemistry method. 992 The conformation with lowest energy is selected to represent the final structure. 993 An option is available to skip the generation of the conformation ensemble along 994 with forcefield minimization and simply perform minimization on the initial 3D 995 structure using a quantum chemistry method. 996 997 A Psi4 XYZ format geometry string is automatically generated for each molecule 998 in input file. It contains atom symbols and 3D coordinates for each atom in a 999 molecule. In addition, the formal charge and spin multiplicity are present in the 1000 the geometry string. These values are either retrieved from molecule properties 1001 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 1002 molecule. 1003 1004 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 1005 .csv, .tsv, .txt) 1006 1007 The supported output file formats are: SD (.sdf, .sd) 1008 1009 Options: 1010 -b, --basisSet <text> [default: auto] 1011 Basis set to use for energy minimization. Default: 6-31+G** for sulfur 1012 containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 1013 value must be a valid Psi4 basis set. No validation is performed. 1014 1015 The following list shows a representative sample of basis sets available 1016 in Psi4: 1017 1018 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 1019 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 1020 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 1021 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 1022 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 1023 1024 -c, --confGeneration <yes or no> [default: yes] 1025 Generate an initial 3D conformation ensemble using distance geometry and 1026 forcefield minimization before final geometry optimization by a specified 1027 method name and basis set. Possible values: yes or no. 1028 1029 The 'No' value skips the generation of conformations employing distance 1030 geometry and forcefield minimization. The 3D structures in input file are 1031 minimized by a quantum method. It is not allowed for SMILES input file. 1032 --confParams <Name,Value,...> [default: auto] 1033 A comma delimited list of parameter name and value pairs for generating 1034 initial sets of 3D conformations for molecules. The 3D conformation ensemble 1035 is generated using distance geometry and forcefield functionality available 1036 in RDKit. The 3D structures in the conformation ensemble are subsequently 1037 minimized by a quantum chemistry method available in Psi4. 1038 1039 The supported parameter names along with their default values are shown 1040 below: 1041 1042 confMethod,ETKDGv2, 1043 forceField,MMFF, forceFieldMMFFVariant,MMFF94, 1044 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50, 1045 maxIters,250,randomSeed,auto 1046 1047 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG, 1048 ETKDG , or ETKDGv2] 1049 forceField, MMFF [ Possible values: UFF or MMFF ] 1050 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ] 1051 enforceChirality,yes [ Possible values: yes or no ] 1052 embedRMSDCutoff,0.5 [ Possible values: number or None] 1053 1054 confMethod: Conformation generation methodology for generating initial 3D 1055 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental 1056 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms 1057 with Distance Geometry (KDG) and Experimental Torsion-angle preference 1058 along with basic Knowledge-terms and Distance Geometry (ETKDG or 1059 ETKDGv2) [Ref 129, 167] . 1060 1061 forceField: Forcefield method to use for energy minimization. Possible 1062 values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics 1063 Force Field [ Ref 83-87 ] . 1064 1065 enforceChirality: Enforce chirality for defined chiral centers during 1066 forcefield minimization. 1067 1068 maxConfs: Maximum number of conformations to generate for each molecule 1069 during the generation of an initial 3D conformation ensemble using 1070 conformation generation methodology. The conformations are minimized 1071 using the specified forcefield and a quantum chemistry method. The lowest 1072 energy conformation is written to the output file. 1073 1074 embedRMSDCutoff: RMSD cutoff for retaining initial set conformers embedded 1075 using distance geometry and before forcefield minimization. All embedded 1076 conformers are kept for 'None' value. Otherwise, only those conformers which 1077 are different from each other by the specified RMSD cutoff, 0.5 by default, 1078 are kept. The first embedded conformer is always retained. 1079 1080 maxIters: Maximum number of iterations to perform for each conformation 1081 during forcefield minimization. 1082 1083 randomSeed: Seed for the random number generator for reproducing initial 1084 3D coordinates in a conformation ensemble. Default is to use a random seed. 1085 --energyOut <yes or no> [default: yes] 1086 Write out energy values. 1087 --energyDataFieldLabel <text> [default: auto] 1088 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 1089 --energyUnits <text> [default: kcal/mol] 1090 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 1091 -e, --examples 1092 Print examples. 1093 -h, --help 1094 Print this help message. 1095 -i, --infile <infile> 1096 Input file name. 1097 --infileParams <Name,Value,...> [default: auto] 1098 A comma delimited list of parameter name and value pairs for reading 1099 molecules from files. The supported parameter names for different file 1100 formats, along with their default values, are shown below: 1101 1102 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 1103 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 1104 smilesTitleLine,auto,sanitize,yes 1105 1106 Possible values for smilesDelimiter: space, comma or tab. 1107 --maxIters <number> [default: 50] 1108 Maximum number of iterations to perform for each molecule or conformer 1109 during energy minimization by a quantum chemistry method. 1110 -m, --methodName <text> [default: auto] 1111 Method to use for energy minimization. Default: B3LYP [ Ref 150 ]. The 1112 specified value must be a valid Psi4 method name. No validation is 1113 performed. 1114 1115 The following list shows a representative sample of methods available 1116 in Psi4: 1117 1118 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 1119 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 1120 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 1121 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 1122 1123 --mp <yes or no> [default: no] 1124 Use multiprocessing. 1125 1126 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 1127 function employing lazy RDKit data iterable. This allows processing of 1128 arbitrary large data sets without any additional requirements memory. 1129 1130 All input data may be optionally loaded into memory by mp.Pool.map() 1131 before starting worker processes in a process pool by setting the value 1132 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 1133 1134 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 1135 data mode may adversely impact the performance. The '--mpParams' section 1136 provides additional information to tune the value of 'chunkSize'. 1137 --mpLevel <Molecules or Conformers> [default: Molecules] 1138 Perform multiprocessing at molecules or conformers level. Possible values: 1139 Molecules or Conformers. The 'Molecules' value starts a process pool at the 1140 molecules level. All conformers of a molecule are processed in a single 1141 process. The 'Conformers' value, however, starts a process pool at the 1142 conformers level. Each conformer of a molecule is processed in an individual 1143 process in the process pool. The default Psi4 'OutputFile' is set to 'quiet' 1144 using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate 1145 a large number of Psi4 output files. 1146 --mpParams <Name,Value,...> [default: auto] 1147 A comma delimited list of parameter name and value pairs to configure 1148 multiprocessing. 1149 1150 The supported parameter names along with their default and possible 1151 values are shown below: 1152 1153 chunkSize, auto 1154 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 1155 numProcesses, auto [ Default: mp.cpu_count() ] 1156 1157 These parameters are used by the following functions to configure and 1158 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 1159 mp.Pool.imap(). 1160 1161 The chunkSize determines chunks of input data passed to each worker 1162 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 1163 The default value of chunkSize is dependent on the value of 'inputDataMode'. 1164 1165 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 1166 automatically converts RDKit data iterable into a list, loads all data into 1167 memory, and calculates the default chunkSize using the following method 1168 as shown in its code: 1169 1170 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 1171 if extra: chunkSize += 1 1172 1173 For example, the default chunkSize will be 7 for a pool of 4 worker processes 1174 and 100 data items. 1175 1176 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 1177 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 1178 data into memory. Consequently, the size of input data is not known a priori. 1179 It's not possible to estimate an optimal value for the chunkSize. The default 1180 chunkSize is set to 1. 1181 1182 The default value for the chunkSize during 'Lazy' data mode may adversely 1183 impact the performance due to the overhead associated with exchanging 1184 small chunks of data. It is generally a good idea to explicitly set chunkSize to 1185 a larger value during 'Lazy' input data mode, based on the size of your input 1186 data and number of processes in the process pool. 1187 1188 The mp.Pool.map() function waits for all worker processes to process all 1189 the data and return the results. The mp.Pool.imap() function, however, 1190 returns the the results obtained from worker processes as soon as the 1191 results become available for specified chunks of data. 1192 1193 The order of data in the results returned by both mp.Pool.map() and 1194 mp.Pool.imap() functions always corresponds to the input data. 1195 -o, --outfile <outfile> 1196 Output file name. 1197 --outfileParams <Name,Value,...> [default: auto] 1198 A comma delimited list of parameter name and value pairs for writing 1199 molecules to files. The supported parameter names for different file 1200 formats, along with their default values, are shown below: 1201 1202 SD: kekulize,yes,forceV3000,no 1203 1204 --overwrite 1205 Overwrite existing files. 1206 --precision <number> [default: 6] 1207 Floating point precision for writing energy values. 1208 --psi4OptionsParams <Name,Value,...> [default: none] 1209 A comma delimited list of Psi4 option name and value pairs for setting 1210 global and module options. The names are 'option_name' for global options 1211 and 'module_name__option_name' for options local to a module. The 1212 specified option names must be valid Psi4 names. No validation is 1213 performed. 1214 1215 The specified option name and value pairs are processed and passed to 1216 psi4.set_options() as a dictionary. The supported value types are float, 1217 integer, boolean, or string. The float value string is converted into a float. 1218 The valid values for a boolean string are yes, no, true, false, on, or off. 1219 --psi4RunParams <Name,Value,...> [default: auto] 1220 A comma delimited list of parameter name and value pairs for configuring 1221 Psi4 jobs. 1222 1223 The supported parameter names along with their default and possible 1224 values are shown below: 1225 1226 MemoryInGB, 1 1227 NumThreads, 1 1228 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 1229 ScratchDir, auto [ Possivle values: DirName] 1230 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 1231 1232 These parameters control the runtime behavior of Psi4. 1233 1234 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 1235 is appended to output file name during multiprocessing as shown: 1236 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 1237 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 1238 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 1239 'Conformers' of '--mpLevel' option. 1240 1241 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 1242 of any existing Psi4 before creating new files to append output from 1243 multiple Psi4 runs. 1244 1245 The option 'ScratchDir' is a directory path to the location of scratch 1246 files. The default value corresponds to Psi4 default. It may be used to 1247 override the deafult path. 1248 --psi4DDXSolvation <yes or no> [default: no] 1249 Perform minimization in solution using domain-decomposition-based 1250 continuum solvation models [ Ref 160-161 ]. The script relies on Psi4 1251 interface to the DDX module to perform these calculations. The DDX library 1252 provides a linear-scaling implementation of standard continuum solvation 1253 models using a domain- decomposition ansatz. Two solvation models are 1254 supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and 1255 Polarizable Continuum Model (PCM) [ Ref 164-165 ]. 1256 1257 The solvation energy is included in the value of the total energy written to 1258 the output SD file. In addition, the value of solvation energy is written to the 1259 output file under its own data field label. 1260 1261 Psi4 relies on Python module PYDDX to calculate solvation energy. It must be 1262 present in your environment. 1263 --psi4DDXSolvationParams <Name,Value,...> [default: auto] 1264 A space delimited list of parameter name and value pairs for calculating 1265 solvation energy using the DDX module. The supported parameter names, 1266 along with their default values, are shown below: 1267 1268 solvationModel PCM [ Possible values: COSMO or PCM] 1269 solvent water [ Possible values: A valid solvent name] 1270 solventEpsilon None 1271 radiiSet UFF [ Possible values: Bondi or UFF] 1272 radiiScaling auto [ Default: 1.2 for Bondi; 1.1 for UFF] 1273 1274 solvationModel: Solvation model for calculating solvation energy. 1275 The corresponding Psi4 option is DDX_MODEL. 1276 1277 solvent: Solvent to use. The corresponding Ps4 option is 1278 DDX_SOLVENT 1279 1280 solventEpsilon: Dielectric constant of the solvent. The 1281 corresponding Psi4 option is DDX_SOLVENT_EPSILON. 1282 1283 radiiSet: Radius set for cavity spheres. The corresponding Psi4 1284 option is DDX_RADII_SET. 1285 1286 radiiScaling: Scaling factor for cavity spheres. The default value 1287 depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding 1288 Psi4 option is DDX_RADII_SCALING. 1289 1290 These parameter names are automatically mapped to appropriate DDX keywords. 1291 1292 You may specify the solvent either by directly providing a dielectric constant using 1293 'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter. 1294 The solvent name must be a valid named supported by the DDX module. For example: 1295 water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent' 1296 parameter is ignored for non-zero value of 'solvent' option. 1297 1298 The DDX module contains a variety of addtional options to confgure the calculation 1299 of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may 1300 use '--psi4OptionsParams' to modify values for additional DDX options. 1301 --psi4DDXListSolvents 1302 List solvent names, along with dielectric values, supported by the DDX module 1303 for the calculation of solvent energy without performing any calculation. 1304 -q, --quiet <yes or no> [default: no] 1305 Use quiet mode. The warning and information messages will not be printed. 1306 -r, --reference <text> [default: auto] 1307 Reference wave function to use for energy calculation. Default: RHF or UHF. 1308 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules 1309 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell 1310 molecules with unpaired electrons. 1311 1312 The specified value must be a valid Psi4 reference wave function. No validation 1313 is performed. For example: ROHF, CUHF, RKS, etc. 1314 1315 The spin multiplicity determines the default value of reference wave function 1316 for input molecules. It is calculated from number of free radical electrons using 1317 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 1318 electron spin. The total spin is 1/2 the number of free radical electrons in a 1319 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 1320 over the calculated value of spin multiplicity. 1321 --recenterAndReorient <yes or no> [default: auto] 1322 Recenter and reorient a molecule during creation of a Psi4 molecule from 1323 a geometry string. Default: 'No' during 'No' value of '--confGeneration'; 1324 Otherwise, 'Yes'. 1325 1326 The 'No' values allows the minimization of a molecule in its initial 3D 1327 coordinate space in input file or conformer ensemble. 1328 --symmetrize <yes or no> [default: auto] 1329 Symmetrize molecules before energy minimization. Default: 'Yes' during 1330 'Yes' value of '--recenterAndReorient'; Otherwise, 'No'. The psi4 function, 1331 psi4mol.symmetrize( SymmetrizeTolerance), is called to symmetrize 1332 the molecule before calling psi4.optimize(). 1333 1334 The 'No' value of '--symmetrize' during 'Yes' value of '--recenterAndReorient' 1335 may cause psi4.optimize() to fail with a 'Point group changed...' error 1336 message. 1337 --symmetrizeTolerance <number> [default: 0.01] 1338 Symmetry tolerance for '--symmetrize'. 1339 -w, --workingdir <dir> 1340 Location of working directory which defaults to the current directory. 1341 1342 Examples: 1343 To generate an initial conformer ensemble of up to 50 conformations using a 1344 combination of ETKDGv2 distance geometry methodology, applying RMSD cutoff 1345 of 0.5 and MMFF forcefield minimization, followed by energy minimization 1346 using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and sulfur containing 1347 molecules in a SMILES file and write out a SD file containing minimum energy 1348 structure corresponding to each molecule, type: 1349 1350 % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf 1351 1352 To run the first example in a quiet mode and write out a SD file, type: 1353 1354 % Psi4PerformMinimization.py -q yes -i Psi4Sample.smi -o 1355 Psi4SampleOut.sdf --psi4DDXSolvation yes 1356 1357 To run the first example along with performing energy mimimization in solution 1358 using default DDX solvation parameters for water abd write oyt a SD file, type: 1359 1360 % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf 1361 1362 To run the first example in multiprocessing mode at molecules level on all 1363 available CPUs without loading all data into memory and write out a SD file, 1364 type: 1365 1366 % Psi4PerformMinimization.py --mp yes -i Psi4Sample.smi -o 1367 Psi4SampleOut.sdf 1368 1369 To run the first example in multiprocessing mode at conformers level on all 1370 available CPUs without loading all data into memory and write out a SD file, 1371 type: 1372 1373 % Psi4PerformMinimization.py --mp yes --mpLevel Conformers 1374 -i Psi4Sample.smi -o Psi4SampleOut.sdf 1375 1376 To run the first example in multiprocessing mode at molecules level on all 1377 available CPUs by loading all data into memory and write out a SD file, type: 1378 1379 % Psi4PerformMinimization.py --mp yes --mpParams "inputDataMode, 1380 InMemory" -i Psi4Sample.smi -o Psi4SampleOut.sdf 1381 1382 To run the first example in multiprocessing mode at molecules level on specific 1383 number of CPUs and chunk size without loading all data into memory and write 1384 out a SD file, type: 1385 1386 % Psi4PerformMinimization.py --mp yes --mpParams "inputDataMode,Lazy, 1387 numProcesses,4,chunkSize,8" -i Psi4Sample.smi -o Psi4SampleOut.sdf 1388 1389 To generate an initial conformer ensemble of up to 20 conformations using a 1390 combination of ETKDGv2 distance geometry methodology and MMFF94s forcefield 1391 minimization followed by energy minimization for a maxium of 20 iterations 1392 using B3LYP/6-31+G** molecules in a SMILES file and write out a SD file 1393 containing minimum energy structure along with energy in specific units, 1394 type: 1395 1396 % Psi4PerformMinimization.py --confGeneration yes --confParams 1397 "confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s, 1398 maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP 1399 -b "6-31+G**" --maxIters 20 -i Psi4Sample.smi -o Psi4SampleOut.sdf 1400 1401 To minimize molecules in a 3D files using B3LYP/6-31G** and B3LYP/6-31+G** 1402 for non-sulfur and sulfur containing molecules for a maximum of 25 iterations 1403 without generating any conformations and write out a SD file containing 1404 minimum energy structures corresponding to each molecule, type: 1405 1406 % Psi4PerformMinimization.py --confGeneration no --maxIters 25 1407 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 1408 1409 To run the first example for molecules in a CSV SMILES file, SMILES strings 1410 in column 1, name column 2, and write out a SD file, type: 1411 1412 % Psi4PerformMinimization.py --infileParams "smilesDelimiter,comma, 1413 smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2" 1414 -i Psi4Sample.csv -o Psi4SampleOut.sdf 1415 1416 Author: 1417 1418 Manish Sud(msud@san.rr.com) 1419 1420 See also: 1421 Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py 1422 1423 Copyright: 1424 Copyright (C) 2024 Manish Sud. All rights reserved. 1425 1426 The functionality available in this script is implemented using Psi4, an 1427 open source quantum chemistry software package, and RDKit, an open 1428 source toolkit for cheminformatics developed by Greg Landrum. 1429 1430 This file is part of MayaChemTools. 1431 1432 MayaChemTools is free software; you can redistribute it and/or modify it under 1433 the terms of the GNU Lesser General Public License as published by the Free 1434 Software Foundation; either version 3 of the License, or (at your option) any 1435 later version. 1436 1437 """ 1438 1439 if __name__ == "__main__": 1440 main()