MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: SDToMolFiles.pl,v $
   4 # $Date: 2008/01/30 21:45:03 $
   5 # $Revision: 1.18 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use 5.006;
  30 use strict;
  31 use FindBin; use lib "$FindBin::Bin/../lib";
  32 use Getopt::Long;
  33 use File::Basename;
  34 use Text::ParseWords;
  35 use Benchmark;
  36 use SDFileUtil;
  37 use FileUtil;
  38 
  39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  40 my($SDFile, @SDFilesList, $MOLFile, $MOLFileName, $MolEndId, $CmpdString, $CmpdCount, $Index, $FileDir, $FileName, $FileExt);
  41 
  42 # Autoflush STDOUT
  43 $| = 1;
  44 
  45 # Starting message...
  46 $ScriptName = basename $0;
  47 print "\n$ScriptName:Starting...\n\n";
  48 $StartTime = new Benchmark;
  49 
  50 # Get the options and setup script...
  51 SetupScriptUsage();
  52 if ($Options{help} || @ARGV < 1) {
  53   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  54 }
  55 
  56 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
  57 
  58 if (@SDFilesList > 1) {
  59   print "Processing SD files...\n";
  60 }
  61 FILELIST: for $Index (0 .. $#SDFilesList) {
  62   $SDFile = $SDFilesList[$Index];
  63   if (@SDFilesList > 1) {
  64     print "\nProcessing file $SDFile...\n";
  65   }
  66   else {
  67     print "Processing file $SDFile...\n"
  68   }
  69   if (!(-e $SDFile)) {
  70     warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
  71     next FILELIST;
  72   }
  73   if (!CheckFileType($SDFile, "sd sdf")) {
  74     warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
  75     next FILELIST;
  76   }
  77   if (!open SDFILE, "$SDFile") {
  78     warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
  79     next FILELIST;
  80   }
  81   $FileDir = ""; $FileName = ""; $FileExt = "";
  82   ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
  83   $MOLFileName = $FileName;
  84   if ($Options{root} && (@SDFilesList == 1)) {
  85     my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
  86     if ($RootFileName && $RootFileExt) {
  87       $MOLFileName = $RootFileName;
  88     }
  89     else {
  90       $MOLFileName = $Options{root};
  91     }
  92   }
  93   $CmpdCount = 0;
  94   $MolEndId = "M  END";
  95   CMPDSTRING: while ($CmpdString = ReadCmpdString(\*SDFILE)) {
  96     $CmpdCount++;
  97     $MOLFile = $MOLFileName . "Cmpd" . "$CmpdCount" . ".mol";
  98     if (!$Options{overwrite}) {
  99       if (-e $MOLFile) {
 100 	warn "Warning: Ignoring compound number, $CmpdCount, in $SDFile: New MOL file, $MOLFile, already exists\n";
 101 	next CMPDSTRING;
 102       }
 103     }
 104     if (!($CmpdString =~ /$MolEndId/)) {
 105       warn "Warning: Ignoring compound number, $CmpdCount, in $SDFile: Invalid compound data\n";
 106       next CMPDSTRING;
 107     }
 108     print "Generating $MOLFile file\n";
 109     open MOLFILE, ">$MOLFile" or die "Error: Can't open $MOLFile: $! \n";
 110     ($CmpdString) = split "$MolEndId", $CmpdString;
 111     print MOLFILE "$CmpdString";
 112     print MOLFILE "$MolEndId\n";
 113     close MOLFILE;
 114   }
 115   close SDFILE;
 116 }
 117 print "$ScriptName:Done...\n\n";
 118 
 119 $EndTime = new Benchmark;
 120 $TotalTime = timediff ($EndTime, $StartTime);
 121 print "Total time: ", timestr($TotalTime), "\n";
 122 
 123 ###############################################################################
 124 
 125 # Setup script usage  and retrieve command line arguments specified using various options...
 126 sub SetupScriptUsage {
 127 
 128   # Retrieve all the options...
 129   %Options = ();
 130   if (!GetOptions(\%Options, "help|h", "overwrite|o", "root|r=s", "workingdir|w=s")) {
 131     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 132   }
 133   if ($Options{workingdir}) {
 134     if (! -d $Options{workingdir}) {
 135       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 136     }
 137     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 138   }
 139 }
 140