MayaChemTools

   1 package AminoAcids;
   2 #
   3 # $RCSfile: AminoAcids.pm,v $
   4 # $Date: 2008/04/19 16:10:58 $
   5 # $Revision: 1.13 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 use 5.006;
  29 use strict;
  30 use Carp;
  31 use Text::ParseWords;
  32 use TextUtil;
  33 use FileUtil;
  34 
  35 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  36 
  37 $VERSION = '1.00';
  38 @ISA = qw(Exporter);
  39 @EXPORT = qw();
  40 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty);
  41 
  42 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  43 
  44 #
  45 # Load amino acids data...
  46 #
  47 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, );
  48 _LoadAminoAcidsData();
  49 
  50 #
  51 # Get a list of all known amino acids as one of these values:
  52 # one letter code, three letter code, or amino acid name...
  53 #
  54 sub GetAminoAcids {
  55   my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap);
  56 
  57   $NameType = 'ThreeLetterCode';
  58   if (@_ >= 1) {
  59     ($NameType) = @_;
  60   }
  61 
  62   # Collect names...
  63   %AminoAcidNamesMap = ();
  64   for $ThreeLetterCode (keys %AminoAcidDataMap) {
  65     NAME : {
  66       if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; }
  67       if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; }
  68       $Name = $ThreeLetterCode;
  69     }
  70     $AminoAcidNamesMap{$Name} = $Name;
  71   }
  72 
  73   # Sort 'em out
  74   @AminoAcidNames = ();
  75   for $Name (sort keys %AminoAcidNamesMap) {
  76     push @AminoAcidNames, $Name;
  77   }
  78 
  79   return (wantarray ? @AminoAcidNames : \@AminoAcidNames);
  80 }
  81 
  82 
  83 #
  84 # Get all available properties data for an amino acid using any of these symbols:
  85 # three letter code; one letter code; name.
  86 #
  87 # A reference to a hash array is returned with keys and values representing property
  88 # name and its values respectively.
  89 #
  90 sub GetAminoAcidPropertiesData {
  91   my($AminoAcidID) = @_;
  92   my($ThreeLetterCode);
  93 
  94   if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) {
  95     return \%{$AminoAcidDataMap{$ThreeLetterCode}};
  96   }
  97   else {
  98     return undef;
  99   }
 100 }
 101 
 102 #
 103 # Get names of all available amino acid properties. A reference to  an array containing
 104 # names of all available properties is returned.
 105 #
 106 sub GetAminoAcidPropertiesNames {
 107   my($Mode);
 108   my($PropertyName, @PropertyNames);
 109 
 110   $Mode = 'ByGroup';
 111   if (@_ == 1) {
 112     ($Mode) = @_;
 113   }
 114 
 115   @PropertyNames = ();
 116   if ($Mode =~ /^Alphabetical$/i) {
 117     my($PropertyName);
 118     # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first...
 119     push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid);
 120     for $PropertyName (sort keys %AminoAcidPropertyNamesMap) {
 121       if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) {
 122 	push @PropertyNames, $PropertyName;
 123       }
 124     }
 125   }
 126   else {
 127     push @PropertyNames, @AminoAcidPropertyNames;
 128   }
 129   return (wantarray ? @PropertyNames : \@PropertyNames);
 130 }
 131 
 132 #
 133 # Is it a known amino acid? Input is either an one/three letter code or a name.
 134 #
 135 sub IsAminoAcid {
 136   my($AminoAcidID) = @_;
 137   my($Status);
 138 
 139   $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0;
 140 
 141   return $Status;
 142 }
 143 
 144 
 145 #
 146 # Is it an available amino acid property?
 147 #
 148 sub IsAminoAcidProperty {
 149   my($PropertyName) = @_;
 150   my($Status);
 151 
 152   $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0;
 153 
 154   return $Status;
 155 }
 156 
 157 #
 158 # Implents GetAminoAcid<PropertyName> for a valid proprty name.
 159 #
 160 sub AUTOLOAD {
 161   my($AminoAcidID) = @_;
 162   my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode);
 163 
 164   $PropertyValue = undef;
 165 
 166   use vars qw($AUTOLOAD);
 167   $FunctionName = $AUTOLOAD;
 168   $FunctionName =~ s/.*:://;
 169 
 170   # Only Get<PropertyName> functions are supported...
 171   if ($FunctionName !~ /^Get/) {
 172     croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented...";
 173   }
 174 
 175   $PropertyName = $FunctionName;
 176   $PropertyName =~  s/^GetAminoAcid//;
 177   if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) {
 178     croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified...";
 179   }
 180 
 181   if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) {
 182     return undef;
 183   }
 184   $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName};
 185   return $PropertyValue;
 186 }
 187 
 188 
 189 #
 190 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory...
 191 #
 192 sub _LoadAminoAcidsData {
 193   my($AminoAcidsDataFile, $MayaChemToolsLibDir);
 194 
 195   $MayaChemToolsLibDir = GetMayaChemToolsLibDirName();
 196 
 197   $AminoAcidsDataFile =  "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv";
 198 
 199   if (! -e "$AminoAcidsDataFile") {
 200     croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems...";
 201   }
 202 
 203   _LoadData($AminoAcidsDataFile);
 204 }
 205 
 206 #
 207 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory...
 208 #
 209 sub _LoadData {
 210   my($AminoAcidsDataFile) = @_;
 211 
 212   %AminoAcidDataMap = ();
 213   @AminoAcidPropertyNames = ();
 214   %AminoAcidPropertyNamesMap = ();
 215   %AminoAcidThreeLetterCodeMap = ();
 216   %AminoAcidOneLetterCodeMap = ();
 217   %AminoAcidNameMap = ();
 218 
 219   # Load property data for all amino acids...
 220   #
 221   # File Format:
 222   #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4"
 223   #
 224   #
 225   my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels);
 226 
 227   $InDelim = "\,";
 228   open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ...";
 229 
 230   # Skip lines up to column labels...
 231   LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
 232     if ($Line !~ /^#/) {
 233       last LINE;
 234     }
 235   }
 236   @ColLabels= quotewords($InDelim, 0, $Line);
 237   $NumOfCols = @ColLabels;
 238 
 239   # Extract property names from column labels...
 240   @AminoAcidPropertyNames = ();
 241   for $Index (0 .. $#ColLabels) {
 242     $Name = $ColLabels[$Index];
 243     push @AminoAcidPropertyNames, $Name;
 244 
 245     # Store property names...
 246     $AminoAcidPropertyNamesMap{$Name} = $Name;
 247   }
 248 
 249   # Process amino acid data...
 250   LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
 251     if ($Line =~ /^#/) {
 252       next LINE;
 253     }
 254     @LineWords = ();
 255     @LineWords = quotewords($InDelim, 0, $Line);
 256     if (@LineWords != $NumOfCols) {
 257       croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line...";
 258     }
 259     $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3];
 260     if (exists $AminoAcidDataMap{$ThreeLetterCode}) {
 261       carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line....";
 262       next LINE;
 263     }
 264 
 265     # Store all the values...
 266     %{$AminoAcidDataMap{$ThreeLetterCode}} = ();
 267     for $Index (0 .. $#LineWords) {
 268       $Name = $AminoAcidPropertyNames[$Index];
 269       $Value = $LineWords[$Index];
 270       $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value;
 271     }
 272   }
 273   close AMINOACIDSDATAFILE;
 274 
 275   # Setup one letter and amino acid name maps...
 276   _SetupAminoAcidIDMap();
 277 }
 278 
 279 
 280 #
 281 # Setup lowercase three/one letter code and name maps pointing
 282 # to three letter code as show in data file.
 283 #
 284 sub _SetupAminoAcidIDMap {
 285   my($ThreeLetterCode, $OneLetterCode, $AminoAcidName);
 286 
 287   %AminoAcidThreeLetterCodeMap = ();
 288   %AminoAcidOneLetterCodeMap = ();
 289   %AminoAcidNameMap = ();
 290 
 291   for $ThreeLetterCode (keys %AminoAcidDataMap) {
 292     $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode};
 293     $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid};
 294 
 295     $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode;
 296     $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode;
 297     $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode;
 298   }
 299 }
 300 
 301 # Validate amino acid ID...
 302 sub _ValidateAminoAcidID {
 303   my($AminoAcidID) = @_;
 304   my($ThreeLetterCode);
 305 
 306 
 307   if (length($AminoAcidID) == 3) {
 308     if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) {
 309       return undef;
 310     }
 311     $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)};
 312   }
 313   elsif (length($AminoAcidID) == 1) {
 314     if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) {
 315       return undef;
 316     }
 317     $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)};
 318   }
 319   else {
 320     if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) {
 321       return undef;
 322     }
 323     $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)};
 324   }
 325   return $ThreeLetterCode;
 326 }
 327 
 328