MayaChemTools

   1 package SDFileIO;
   2 #
   3 # $RCSfile: SDFileIO.pm,v $
   4 # $Date: 2008/04/22 02:54:50 $
   5 # $Revision: 1.15 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 use 5.006;
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use TextUtil ();
  34 use FileUtil ();
  35 use SDFileUtil ();
  36 use FileIO::FileIO;
  37 use FileIO::MDLMolFileIO;
  38 use Molecule;
  39 
  40 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  41 
  42 $VERSION = '1.00';
  43 @ISA = qw(FileIO Exporter);
  44 @EXPORT = qw();
  45 @EXPORT_OK = qw(IsSDFile);
  46 
  47 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  48 
  49 # Setup class variables...
  50 my($ClassName);
  51 _InitializeClass();
  52 
  53 # Class constructor...
  54 sub new {
  55   my($Class, %NamesAndValues) = @_;
  56 
  57   # Initialize object...
  58   my $This = $Class->SUPER::new();
  59   bless $This, ref($Class) || $Class;
  60   $This->_InitializeSDFileIO();
  61 
  62   $This->_InitializeSDFileIOProperties(%NamesAndValues);
  63 
  64   return $This;
  65 }
  66 
  67 # Initialize any local object data...
  68 #
  69 sub _InitializeSDFileIO {
  70   my($This) = @_;
  71 
  72   # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically...
  73   $This->{SortMDLDataFieldsDuringOutput} = 'No';
  74 
  75   return $This;
  76 }
  77 
  78 # Initialize class ...
  79 sub _InitializeClass {
  80   #Class name...
  81   $ClassName = __PACKAGE__;
  82 
  83 }
  84 
  85 # Initialize object values...
  86 sub _InitializeSDFileIOProperties {
  87   my($This, %NamesAndValues) = @_;
  88 
  89   # All other property names and values along with all Set/Get<PropertyName> methods
  90   # are implemented on-demand using ObjectProperty class.
  91 
  92   my($Name, $Value, $MethodName);
  93   while (($Name, $Value) = each  %NamesAndValues) {
  94     $MethodName = "Set${Name}";
  95     $This->$MethodName($Value);
  96   }
  97 
  98   if (!exists $NamesAndValues{Name}) {
  99     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
 100   }
 101 
 102   # Make sure it's a SD file...
 103   $Name = $NamesAndValues{Name};
 104   if (!$This->IsSDFile($Name)) {
 105     croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format...";
 106   }
 107 
 108   return $This;
 109 }
 110 
 111 # Is it a SD file?
 112 sub IsSDFile ($;$) {
 113   my($FirstParameter, $SecondParameter) = @_;
 114   my($This, $FileName, $Status);
 115 
 116   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 117     ($This, $FileName) = ($FirstParameter, $SecondParameter);
 118   }
 119   else {
 120     $FileName = $FirstParameter;
 121   }
 122 
 123   # Check file extension...
 124   $Status = FileUtil::CheckFileType($FileName, "sd sdf");
 125 
 126   return $Status;
 127 }
 128 
 129 # Read molecule from file and return molecule objest...
 130 sub ReadMolecule {
 131   my($This) = @_;
 132   my($FileHandle);
 133 
 134   $FileHandle = $This->GetFileHandle();
 135   return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle));
 136 }
 137 
 138 # Write compound data along with any data field label and values using Molecule object...
 139 sub WriteMolecule {
 140   my($This, $Molecule) = @_;
 141 
 142   if (!(defined($Molecule) && $Molecule->IsMolecule())) {
 143     carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified...";
 144     return $This;
 145   }
 146   my($FileHandle);
 147   $FileHandle = $This->GetFileHandle();
 148 
 149   print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n";
 150 
 151   return $This;
 152 }
 153 
 154 # Retrieve molecule string...
 155 sub ReadMoleculeString {
 156   my($This) = @_;
 157   my($FileHandle);
 158 
 159   $FileHandle = $This->GetFileHandle();
 160   return SDFileUtil::ReadCmpdString($FileHandle);
 161 }
 162 
 163 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class
 164 # method or a package function.
 165 #
 166 sub ParseMoleculeString {
 167   my($FirstParameter, $SecondParameter) = @_;
 168   my($This, $MoleculeString);
 169 
 170   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 171     ($This, $MoleculeString) = ($FirstParameter, $SecondParameter);
 172   }
 173   else {
 174     $MoleculeString = $FirstParameter;
 175     $This = undef;
 176   }
 177   if (!$MoleculeString) {
 178     return undef;
 179   }
 180   # Parse molecule data...
 181   my($Molecule);
 182   $Molecule = MDLMolFileIO::ParseMoleculeString($MoleculeString);
 183 
 184   # Process data label/value pairs...
 185   my(@MoleculeLines, @DataLabels, %DataLabelsAndValues);
 186 
 187   %DataLabelsAndValues = ();
 188   @MoleculeLines = split /\n/, $MoleculeString;
 189   @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines);
 190   %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines);
 191 
 192   # Store reference to data labels to keep track of their initial order in SD file...
 193   $Molecule->SetMDLDataFieldLabels(\@DataLabels);
 194 
 195   # Store reference to SD data label/value paris hash as a generic property of molecule...
 196   $Molecule->SetMDLDataFieldLabelAndValues(\%DataLabelsAndValues);
 197 
 198   return $Molecule;
 199 }
 200 
 201 # Generate molecule string using molecule object...
 202 sub GenerateMoleculeString {
 203   my($FirstParameter, $SecondParameter) = @_;
 204   my($This, $Molecule);
 205 
 206   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 207     ($This, $Molecule) = ($FirstParameter, $SecondParameter);
 208   }
 209   else {
 210     $Molecule = $FirstParameter;
 211     $This = undef;
 212   }
 213   if (!defined($Molecule)) {
 214     return undef;
 215   }
 216   # Generate CTAB data...
 217   my($CmpdString);
 218   $CmpdString = MDLMolFileIO::GenerateMoleculeString($Molecule);
 219 
 220   # Generate any data field labels and values...
 221   my($DataFieldLabelsAndValuesString);
 222 
 223   $DataFieldLabelsAndValuesString = '';
 224   if ($Molecule->HasProperty('MDLDataFieldLabels')) {
 225     my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
 226 
 227     $SortDataFields = ($This->{SortMDLDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0;
 228 
 229     $DataFieldLabelsRef = $Molecule->GetMDLDataFieldLabels();
 230     $DataFieldLabelAndValuesRef = $Molecule->GetMDLDataFieldLabelAndValues();
 231     $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
 232   }
 233 
 234   return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$";
 235 }
 236 
 237 
 238 # Is it a SDFileIO object?
 239 sub _IsSDFileIO {
 240   my($Object) = @_;
 241 
 242   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 243 }
 244