1 package SDFileIO; 2 # 3 # $RCSfile: SDFileIO.pm,v $ 4 # $Date: 2008/04/22 02:54:50 $ 5 # $Revision: 1.15 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 use 5.006; 29 use strict; 30 use Carp; 31 use Exporter; 32 use Scalar::Util (); 33 use TextUtil (); 34 use FileUtil (); 35 use SDFileUtil (); 36 use FileIO::FileIO; 37 use FileIO::MDLMolFileIO; 38 use Molecule; 39 40 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 41 42 $VERSION = '1.00'; 43 @ISA = qw(FileIO Exporter); 44 @EXPORT = qw(); 45 @EXPORT_OK = qw(IsSDFile); 46 47 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 48 49 # Setup class variables... 50 my($ClassName); 51 _InitializeClass(); 52 53 # Class constructor... 54 sub new { 55 my($Class, %NamesAndValues) = @_; 56 57 # Initialize object... 58 my $This = $Class->SUPER::new(); 59 bless $This, ref($Class) || $Class; 60 $This->_InitializeSDFileIO(); 61 62 $This->_InitializeSDFileIOProperties(%NamesAndValues); 63 64 return $This; 65 } 66 67 # Initialize any local object data... 68 # 69 sub _InitializeSDFileIO { 70 my($This) = @_; 71 72 # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically... 73 $This->{SortMDLDataFieldsDuringOutput} = 'No'; 74 75 return $This; 76 } 77 78 # Initialize class ... 79 sub _InitializeClass { 80 #Class name... 81 $ClassName = __PACKAGE__; 82 83 } 84 85 # Initialize object values... 86 sub _InitializeSDFileIOProperties { 87 my($This, %NamesAndValues) = @_; 88 89 # All other property names and values along with all Set/Get<PropertyName> methods 90 # are implemented on-demand using ObjectProperty class. 91 92 my($Name, $Value, $MethodName); 93 while (($Name, $Value) = each %NamesAndValues) { 94 $MethodName = "Set${Name}"; 95 $This->$MethodName($Value); 96 } 97 98 if (!exists $NamesAndValues{Name}) { 99 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; 100 } 101 102 # Make sure it's a SD file... 103 $Name = $NamesAndValues{Name}; 104 if (!$This->IsSDFile($Name)) { 105 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format..."; 106 } 107 108 return $This; 109 } 110 111 # Is it a SD file? 112 sub IsSDFile ($;$) { 113 my($FirstParameter, $SecondParameter) = @_; 114 my($This, $FileName, $Status); 115 116 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 117 ($This, $FileName) = ($FirstParameter, $SecondParameter); 118 } 119 else { 120 $FileName = $FirstParameter; 121 } 122 123 # Check file extension... 124 $Status = FileUtil::CheckFileType($FileName, "sd sdf"); 125 126 return $Status; 127 } 128 129 # Read molecule from file and return molecule objest... 130 sub ReadMolecule { 131 my($This) = @_; 132 my($FileHandle); 133 134 $FileHandle = $This->GetFileHandle(); 135 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle)); 136 } 137 138 # Write compound data along with any data field label and values using Molecule object... 139 sub WriteMolecule { 140 my($This, $Molecule) = @_; 141 142 if (!(defined($Molecule) && $Molecule->IsMolecule())) { 143 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified..."; 144 return $This; 145 } 146 my($FileHandle); 147 $FileHandle = $This->GetFileHandle(); 148 149 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n"; 150 151 return $This; 152 } 153 154 # Retrieve molecule string... 155 sub ReadMoleculeString { 156 my($This) = @_; 157 my($FileHandle); 158 159 $FileHandle = $This->GetFileHandle(); 160 return SDFileUtil::ReadCmpdString($FileHandle); 161 } 162 163 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class 164 # method or a package function. 165 # 166 sub ParseMoleculeString { 167 my($FirstParameter, $SecondParameter) = @_; 168 my($This, $MoleculeString); 169 170 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 171 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter); 172 } 173 else { 174 $MoleculeString = $FirstParameter; 175 $This = undef; 176 } 177 if (!$MoleculeString) { 178 return undef; 179 } 180 # Parse molecule data... 181 my($Molecule); 182 $Molecule = MDLMolFileIO::ParseMoleculeString($MoleculeString); 183 184 # Process data label/value pairs... 185 my(@MoleculeLines, @DataLabels, %DataLabelsAndValues); 186 187 %DataLabelsAndValues = (); 188 @MoleculeLines = split /\n/, $MoleculeString; 189 @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines); 190 %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines); 191 192 # Store reference to data labels to keep track of their initial order in SD file... 193 $Molecule->SetMDLDataFieldLabels(\@DataLabels); 194 195 # Store reference to SD data label/value paris hash as a generic property of molecule... 196 $Molecule->SetMDLDataFieldLabelAndValues(\%DataLabelsAndValues); 197 198 return $Molecule; 199 } 200 201 # Generate molecule string using molecule object... 202 sub GenerateMoleculeString { 203 my($FirstParameter, $SecondParameter) = @_; 204 my($This, $Molecule); 205 206 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 207 ($This, $Molecule) = ($FirstParameter, $SecondParameter); 208 } 209 else { 210 $Molecule = $FirstParameter; 211 $This = undef; 212 } 213 if (!defined($Molecule)) { 214 return undef; 215 } 216 # Generate CTAB data... 217 my($CmpdString); 218 $CmpdString = MDLMolFileIO::GenerateMoleculeString($Molecule); 219 220 # Generate any data field labels and values... 221 my($DataFieldLabelsAndValuesString); 222 223 $DataFieldLabelsAndValuesString = ''; 224 if ($Molecule->HasProperty('MDLDataFieldLabels')) { 225 my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); 226 227 $SortDataFields = ($This->{SortMDLDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0; 228 229 $DataFieldLabelsRef = $Molecule->GetMDLDataFieldLabels(); 230 $DataFieldLabelAndValuesRef = $Molecule->GetMDLDataFieldLabelAndValues(); 231 $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); 232 } 233 234 return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$"; 235 } 236 237 238 # Is it a SDFileIO object? 239 sub _IsSDFileIO { 240 my($Object) = @_; 241 242 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 243 } 244