1 #!/bin/env python 2 # 3 # File: RDKitConvertFileFormat.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2026 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 import os 32 import sys 33 import time 34 35 # RDKit imports... 36 try: 37 from rdkit import rdBase 38 except ImportError as ErrMsg: 39 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 40 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 41 sys.exit(1) 42 43 # MayaChemTools imports... 44 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 45 try: 46 from docopt import docopt 47 import MiscUtil 48 import RDKitUtil 49 except ImportError as ErrMsg: 50 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 51 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 52 sys.exit(1) 53 54 ScriptName = os.path.basename(sys.argv[0]) 55 Options = {} 56 OptionsInfo = {} 57 58 59 def main(): 60 """Start execution of the script.""" 61 62 MiscUtil.PrintInfo( 63 "\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" 64 % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()) 65 ) 66 67 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 68 69 # Retrieve command line arguments and options... 70 RetrieveOptions() 71 72 # Process and validate command line arguments and options... 73 ProcessOptions() 74 75 # Perform actions required by the script... 76 ConvertFileFormat() 77 78 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 79 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 80 81 82 def ConvertFileFormat(): 83 """Convert between file formats.""" 84 85 Infile = OptionsInfo["Infile"] 86 Outfile = OptionsInfo["Outfile"] 87 88 # Read molecules... 89 MiscUtil.PrintInfo("\nReading file %s..." % Infile) 90 Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) 91 92 # Write molecules... 93 MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile) 94 MolCount, ProcessedMolCount = RDKitUtil.WriteMolecules(Outfile, Mols, **OptionsInfo["OutfileParams"]) 95 96 MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) 97 MiscUtil.PrintInfo("Number of molecules processed: %d" % ProcessedMolCount) 98 MiscUtil.PrintInfo("Number of molecules ignored: %d" % (MolCount - ProcessedMolCount)) 99 100 101 def ProcessOptions(): 102 """Process and validate command line arguments and options.""" 103 104 MiscUtil.PrintInfo("Processing options...") 105 106 # Validate options... 107 ValidateOptions() 108 109 # Process and setup options for RDKit functions... 110 OptionsInfo["Infile"] = Options["--infile"] 111 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters( 112 "--infileParams", Options["--infileParams"], Options["--infile"] 113 ) 114 115 OptionsInfo["Outfile"] = Options["--outfile"] 116 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters( 117 "--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"] 118 ) 119 120 OptionsInfo["Overwrite"] = Options["--overwrite"] 121 122 123 def RetrieveOptions(): 124 """Retrieve command line arguments and options.""" 125 126 # Get options... 127 global Options 128 Options = docopt(_docoptUsage_) 129 130 # Set current working directory to the specified directory... 131 WorkingDir = Options["--workingdir"] 132 if WorkingDir: 133 os.chdir(WorkingDir) 134 135 # Handle examples option... 136 if "--examples" in Options and Options["--examples"]: 137 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 138 sys.exit(0) 139 140 141 def ValidateOptions(): 142 """Validate option values.""" 143 144 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 145 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv mol2 pdb") 146 147 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd mol smi pdb") 148 MiscUtil.ValidateOptionsOutputFileOverwrite( 149 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"] 150 ) 151 MiscUtil.ValidateOptionsDistinctFileNames( 152 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"] 153 ) 154 155 156 # Setup a usage string for docopt... 157 _docoptUsage_ = """ 158 RDKitConvertFileFormat.py - Convert between molecular file formats 159 160 Usage: 161 RDKitConvertFileFormat.py [--infileParams <Name,Value,...>] 162 [ --outfileParams <Name,Value,...> ] [--overwrite] 163 [-w <dir>] -i <infile> -o <outfile> 164 RDKitConvertFileFormat.py -h | --help | -e | --examples 165 166 Description: 167 Convert between molecular file formats. 168 169 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 170 .txt, .csv, .tsv), MOL2 (.mol2), PDB (.pdb) 171 172 The supported output file formats are: SD (.sdf, .sd), SMILES (.smi), PDB (.pdb) 173 174 Options: 175 -e, --examples 176 Print examples. 177 -h, --help 178 Print this help message. 179 -i, --infile <infile> 180 Input file name. 181 --infileParams <Name,Value,...> [default: auto] 182 A comma delimited list of parameter name and value pairs for reading 183 molecules from files. The supported parameter names for different file 184 formats, along with their default values, are shown below: 185 186 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes 187 MOL2: removeHydrogens,yes,sanitize,yes 188 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 189 smilesTitleLine,auto,sanitize,yes 190 PDB: removeHydrogens,yes,sanitize,yes 191 192 Possible values for smilesDelimiter: space, comma or tab. 193 -o, --outfile <outfile> 194 Output file name. 195 --outfileParams <Name,Value,...> [default: auto] 196 A comma delimited list of parameter name and value pairs for writing 197 molecules to files. The supported parameter names for different file 198 formats, along with their default values, are shown below: 199 200 SD: compute2DCoords,auto,kekulize,yes,forceV3000,no 201 SMILES: smilesKekulize,no,smilesDelimiter,space, smilesIsomeric,yes, 202 smilesTitleLine,yes,smilesMolName,yes,smilesMolProps,no 203 204 Default value for compute2DCoords: yes for SMILES input file; no for all other 205 file types. 206 --overwrite 207 Overwrite existing files. 208 -w, --workingdir <dir> 209 Location of working directory which defaults to the current directory. 210 211 Examples: 212 To convert a SD file into a isomeric SMILES file, type: 213 214 % RDKitConvertFileFormat.py -i Sample.sdf -o SampleOut.smi 215 216 To convert a SD file into a non isomeric SMILES file, type 217 218 % RDKitConvertFileFormat.py --outfileParams "smilesIsomeric,no" 219 -i Sample.sdf -o SampleOut.smi 220 221 To convert a SMILES file into a SD file along with calculation of 2D 222 coordinates, type: 223 224 % RDKitConvertFileFormat.py -i Sample.smi -o SampleOut.sdf 225 226 To convert a MDL MOL file into a PDB file, type: 227 228 % RDKitConvertFileFormat.py -i Sample.mol -o SampleOut.pdb 229 230 To convert a CSV SMILES file with column headers, SMILES strings 231 in column 1, and name in column 2 into a SD file containing 2D coordinates, type: 232 233 % RDKitConvertFileFormat.py --infileParams "smilesDelimiter,comma, 234 smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2" -i Sample.csv 235 -o SampleOut.sdf 236 237 Author: 238 Manish Sud(msud@san.rr.com) 239 240 See also: 241 RDKitDrawMolecules.py, RDKitRemoveDuplicateMolecules.py, RDKitSearchFunctionalGroups.py, 242 RDKitSearchSMARTS.py 243 244 Copyright: 245 Copyright (C) 2026 Manish Sud. All rights reserved. 246 247 The functionality available in this script is implemented using RDKit, an 248 open source toolkit for cheminformatics developed by Greg Landrum. 249 250 This file is part of MayaChemTools. 251 252 MayaChemTools is free software; you can redistribute it and/or modify it under 253 the terms of the GNU Lesser General Public License as published by the Free 254 Software Foundation; either version 3 of the License, or (at your option) any 255 later version. 256 257 """ 258 259 if __name__ == "__main__": 260 main()