1 package AminoAcids; 2 # 3 # $RCSfile: AminoAcids.pm,v $ 4 # $Date: 2008/04/19 16:10:58 $ 5 # $Revision: 1.13 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 use 5.006; 29 use strict; 30 use Carp; 31 use Text::ParseWords; 32 use TextUtil; 33 use FileUtil; 34 35 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 36 37 $VERSION = '1.00'; 38 @ISA = qw(Exporter); 39 @EXPORT = qw(); 40 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty); 41 42 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 43 44 # 45 # Load amino acids data... 46 # 47 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, ); 48 _LoadAminoAcidsData(); 49 50 # 51 # Get a list of all known amino acids as one of these values: 52 # one letter code, three letter code, or amino acid name... 53 # 54 sub GetAminoAcids { 55 my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap); 56 57 $NameType = 'ThreeLetterCode'; 58 if (@_ >= 1) { 59 ($NameType) = @_; 60 } 61 62 # Collect names... 63 %AminoAcidNamesMap = (); 64 for $ThreeLetterCode (keys %AminoAcidDataMap) { 65 NAME : { 66 if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; } 67 if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; } 68 $Name = $ThreeLetterCode; 69 } 70 $AminoAcidNamesMap{$Name} = $Name; 71 } 72 73 # Sort 'em out 74 @AminoAcidNames = (); 75 for $Name (sort keys %AminoAcidNamesMap) { 76 push @AminoAcidNames, $Name; 77 } 78 79 return (wantarray ? @AminoAcidNames : \@AminoAcidNames); 80 } 81 82 83 # 84 # Get all available properties data for an amino acid using any of these symbols: 85 # three letter code; one letter code; name. 86 # 87 # A reference to a hash array is returned with keys and values representing property 88 # name and its values respectively. 89 # 90 sub GetAminoAcidPropertiesData { 91 my($AminoAcidID) = @_; 92 my($ThreeLetterCode); 93 94 if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) { 95 return \%{$AminoAcidDataMap{$ThreeLetterCode}}; 96 } 97 else { 98 return undef; 99 } 100 } 101 102 # 103 # Get names of all available amino acid properties. A reference to an array containing 104 # names of all available properties is returned. 105 # 106 sub GetAminoAcidPropertiesNames { 107 my($Mode); 108 my($PropertyName, @PropertyNames); 109 110 $Mode = 'ByGroup'; 111 if (@_ == 1) { 112 ($Mode) = @_; 113 } 114 115 @PropertyNames = (); 116 if ($Mode =~ /^Alphabetical$/i) { 117 my($PropertyName); 118 # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first... 119 push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid); 120 for $PropertyName (sort keys %AminoAcidPropertyNamesMap) { 121 if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) { 122 push @PropertyNames, $PropertyName; 123 } 124 } 125 } 126 else { 127 push @PropertyNames, @AminoAcidPropertyNames; 128 } 129 return (wantarray ? @PropertyNames : \@PropertyNames); 130 } 131 132 # 133 # Is it a known amino acid? Input is either an one/three letter code or a name. 134 # 135 sub IsAminoAcid { 136 my($AminoAcidID) = @_; 137 my($Status); 138 139 $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0; 140 141 return $Status; 142 } 143 144 145 # 146 # Is it an available amino acid property? 147 # 148 sub IsAminoAcidProperty { 149 my($PropertyName) = @_; 150 my($Status); 151 152 $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0; 153 154 return $Status; 155 } 156 157 # 158 # Implents GetAminoAcid<PropertyName> for a valid proprty name. 159 # 160 sub AUTOLOAD { 161 my($AminoAcidID) = @_; 162 my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode); 163 164 $PropertyValue = undef; 165 166 use vars qw($AUTOLOAD); 167 $FunctionName = $AUTOLOAD; 168 $FunctionName =~ s/.*:://; 169 170 # Only Get<PropertyName> functions are supported... 171 if ($FunctionName !~ /^Get/) { 172 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented..."; 173 } 174 175 $PropertyName = $FunctionName; 176 $PropertyName =~ s/^GetAminoAcid//; 177 if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) { 178 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified..."; 179 } 180 181 if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) { 182 return undef; 183 } 184 $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName}; 185 return $PropertyValue; 186 } 187 188 189 # 190 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory... 191 # 192 sub _LoadAminoAcidsData { 193 my($AminoAcidsDataFile, $MayaChemToolsLibDir); 194 195 $MayaChemToolsLibDir = GetMayaChemToolsLibDirName(); 196 197 $AminoAcidsDataFile = "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv"; 198 199 if (! -e "$AminoAcidsDataFile") { 200 croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems..."; 201 } 202 203 _LoadData($AminoAcidsDataFile); 204 } 205 206 # 207 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory... 208 # 209 sub _LoadData { 210 my($AminoAcidsDataFile) = @_; 211 212 %AminoAcidDataMap = (); 213 @AminoAcidPropertyNames = (); 214 %AminoAcidPropertyNamesMap = (); 215 %AminoAcidThreeLetterCodeMap = (); 216 %AminoAcidOneLetterCodeMap = (); 217 %AminoAcidNameMap = (); 218 219 # Load property data for all amino acids... 220 # 221 # File Format: 222 #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4" 223 # 224 # 225 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels); 226 227 $InDelim = "\,"; 228 open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ..."; 229 230 # Skip lines up to column labels... 231 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { 232 if ($Line !~ /^#/) { 233 last LINE; 234 } 235 } 236 @ColLabels= quotewords($InDelim, 0, $Line); 237 $NumOfCols = @ColLabels; 238 239 # Extract property names from column labels... 240 @AminoAcidPropertyNames = (); 241 for $Index (0 .. $#ColLabels) { 242 $Name = $ColLabels[$Index]; 243 push @AminoAcidPropertyNames, $Name; 244 245 # Store property names... 246 $AminoAcidPropertyNamesMap{$Name} = $Name; 247 } 248 249 # Process amino acid data... 250 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { 251 if ($Line =~ /^#/) { 252 next LINE; 253 } 254 @LineWords = (); 255 @LineWords = quotewords($InDelim, 0, $Line); 256 if (@LineWords != $NumOfCols) { 257 croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line..."; 258 } 259 $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3]; 260 if (exists $AminoAcidDataMap{$ThreeLetterCode}) { 261 carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line...."; 262 next LINE; 263 } 264 265 # Store all the values... 266 %{$AminoAcidDataMap{$ThreeLetterCode}} = (); 267 for $Index (0 .. $#LineWords) { 268 $Name = $AminoAcidPropertyNames[$Index]; 269 $Value = $LineWords[$Index]; 270 $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value; 271 } 272 } 273 close AMINOACIDSDATAFILE; 274 275 # Setup one letter and amino acid name maps... 276 _SetupAminoAcidIDMap(); 277 } 278 279 280 # 281 # Setup lowercase three/one letter code and name maps pointing 282 # to three letter code as show in data file. 283 # 284 sub _SetupAminoAcidIDMap { 285 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName); 286 287 %AminoAcidThreeLetterCodeMap = (); 288 %AminoAcidOneLetterCodeMap = (); 289 %AminoAcidNameMap = (); 290 291 for $ThreeLetterCode (keys %AminoAcidDataMap) { 292 $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; 293 $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; 294 295 $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode; 296 $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode; 297 $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode; 298 } 299 } 300 301 # Validate amino acid ID... 302 sub _ValidateAminoAcidID { 303 my($AminoAcidID) = @_; 304 my($ThreeLetterCode); 305 306 307 if (length($AminoAcidID) == 3) { 308 if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) { 309 return undef; 310 } 311 $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}; 312 } 313 elsif (length($AminoAcidID) == 1) { 314 if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) { 315 return undef; 316 } 317 $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}; 318 } 319 else { 320 if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) { 321 return undef; 322 } 323 $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)}; 324 } 325 return $ThreeLetterCode; 326 } 327 328