MayaChemTools

   1 package TextUtil;
   2 #
   3 # $RCSfile: TextUtil.pm,v $
   4 # $Date: 2011/12/16 00:04:12 $
   5 # $Revision: 1.37 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2012 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Exporter;
  31 
  32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  33 
  34 @ISA = qw(Exporter);
  35 @EXPORT = qw(AddNumberSuffix ContainsWhiteSpaces GetTextLine HashCode IsEmpty IsNumberPowerOfNumber IsInteger IsPositiveInteger IsFloat IsNotEmpty IsNumerical JoinWords SplitWords  QuoteAWord RemoveLeadingWhiteSpaces RemoveTrailingWhiteSpaces RemoveLeadingAndTrailingWhiteSpaces WrapText);
  36 @EXPORT_OK = qw();
  37 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  38 
  39 # Add number suffix...
  40 sub AddNumberSuffix {
  41   my($Value) = @_;
  42   my($ValueWithSuffix, $Suffix);
  43 
  44   $ValueWithSuffix = $Value;
  45   if (!IsPositiveInteger($Value)) {
  46     return $ValueWithSuffix;
  47   }
  48   $Suffix = "th";
  49   if ($Value < 10 || $Value > 20) {
  50     my $Remainder = $Value % 10;
  51     $Suffix = ($Remainder == 1) ? "st" : (($Remainder == 2) ? "nd" : (($Remainder == 3) ? "rd" : "th"));
  52   }
  53   $ValueWithSuffix = "${ValueWithSuffix}${Suffix}";
  54   return $ValueWithSuffix;
  55 }
  56 
  57 # Check out the string: Doen it contain any white space characters?
  58 sub ContainsWhiteSpaces {
  59   my($TheString) = @_;
  60   my($Status) = 0;
  61 
  62   if (defined($TheString) && length($TheString)) {
  63     $Status = ($TheString =~ /[ \t\r\n\f]/ ) ? 1 : 0;
  64   }
  65   return $Status;
  66 }
  67 
  68 # Read the line, change to UNIX new line char, and chop off new line char as well...
  69 sub GetTextLine {
  70   my($TextFileRef) = @_;
  71   my($Line) = "";
  72 
  73   # Get the next non empty line...
  74   LINE: while (defined($_ = <$TextFileRef>)) {
  75       # Change Windows and Mac new line char to UNIX...
  76       s/(\r\n)|(\r)/\n/g;
  77       chomp;
  78       $Line = $_;
  79       if (length($Line)) {
  80         last LINE;
  81       }
  82       else {
  83         next LINE;
  84       }
  85     }
  86   return $Line;
  87 }
  88 
  89 # Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38]. It's also implemented in
  90 # Perl for internal hash keys in hv.h include file.
  91 #
  92 # It's not clear how to force Perl perform unsigned integer arithmetic irrespective of the OS/Platform and
  93 # the value of use64bitint flag used during its compilation.
  94 #
  95 # In order to generate a consistent 32 bit has code across OS/platforms, the following methodology appear
  96 # to work:
  97 #
  98 #    o Use MaxHashCodeMask to retrieve appropriate bits after left shifting by bit operators and additions
  99 #    o Stay away from "use integer" to avoid signed integer arithmetic for bit operators
 100 #
 101 #
 102 #   MaxHashCodeMask (2147483647) corresponds to the maximum value which can be stored in 31 bits
 103 #
 104 my($MaxHashCodeMask);
 105 $MaxHashCodeMask = 2**31 - 1;
 106 
 107 sub HashCode {
 108   my($String) = @_;
 109   my($HashCode, $Value, $ShiftedHashCode);
 110 
 111   $HashCode = 0;
 112   for $Value (unpack('C*', $String)) {
 113     $HashCode += $Value;
 114 
 115     $ShiftedHashCode = $HashCode << 10;
 116     if ($ShiftedHashCode > $MaxHashCodeMask) {
 117       $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
 118     }
 119 
 120     $HashCode += $ShiftedHashCode;
 121     if ($HashCode > $MaxHashCodeMask) {
 122       $HashCode = $HashCode & $MaxHashCodeMask;
 123     }
 124 
 125     $HashCode ^= ($HashCode >> 6);
 126   }
 127 
 128   $ShiftedHashCode = $HashCode << 3;
 129   if ($ShiftedHashCode > $MaxHashCodeMask) {
 130     $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
 131   }
 132 
 133   $HashCode += $ShiftedHashCode;
 134   if ($HashCode > $MaxHashCodeMask) {
 135     $HashCode = $HashCode & $MaxHashCodeMask;
 136   }
 137   $HashCode ^= ($HashCode >> 11);
 138 
 139   $ShiftedHashCode = $HashCode << 15;
 140   if ($ShiftedHashCode > $MaxHashCodeMask) {
 141     $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
 142   }
 143 
 144   $HashCode += $ShiftedHashCode;
 145   if ($HashCode > $MaxHashCodeMask) {
 146     $HashCode = $HashCode & $MaxHashCodeMask;
 147   }
 148   return $HashCode;
 149 }
 150 
 151 # Check out the string: Is it defined and has a non zero length?
 152 sub IsEmpty {
 153   my($TheString) = @_;
 154   my($Status) = 1;
 155 
 156   $Status = (defined($TheString) && length($TheString)) ? 0 : 1;
 157 
 158   return $Status;
 159 }
 160 
 161 # Is first specified number power of second specified number...
 162 sub IsNumberPowerOfNumber {
 163   my($FirstNum, $SecondNum) = @_;
 164   my($PowerValue);
 165 
 166   $PowerValue = log($FirstNum)/log($SecondNum);
 167 
 168   return IsInteger($PowerValue) ? 1 : 0;
 169 }
 170 
 171 # Check out the string: Is it an integer?
 172 sub IsInteger {
 173   my($TheString) = @_;
 174   my($Status) = 0;
 175 
 176   if (defined($TheString) && length($TheString)) {
 177     $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
 178     $TheString =~ s/^[+-]//;
 179     $Status = ($TheString =~ /[^0-9]/) ? 0 : 1;
 180   }
 181   return $Status;
 182 }
 183 
 184 # Check out the string: Is it an integer with value > 0?
 185 sub IsPositiveInteger {
 186   my($TheString) = @_;
 187   my($Status) = 0;
 188 
 189   $Status = IsInteger($TheString) ? ($TheString > 0 ? 1 : 0) : 0;
 190 
 191   return $Status;
 192 }
 193 
 194 
 195 # Check out the string: Is it a float?
 196 sub IsFloat {
 197   my($TheString) = @_;
 198   my($Status) = 0;
 199 
 200   if (defined($TheString) && length($TheString)) {
 201     $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
 202     $TheString =~ s/^[+-]//;
 203     $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1);
 204   }
 205   return $Status;
 206 }
 207 
 208 # Check out the string: Is it defined and has a non zero length?
 209 sub IsNotEmpty {
 210   my($TheString) = @_;
 211   my($Status);
 212 
 213   $Status = IsEmpty($TheString) ? 0 : 1;
 214 
 215   return $Status;
 216 }
 217 
 218 # Check out the string: Does it only contain numerical data?
 219 sub IsNumerical {
 220   my($TheString) = @_;
 221   my($Status) = 0;
 222 
 223   if (defined($TheString) && length($TheString)) {
 224     $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
 225     $TheString =~ s/^[+-]//;
 226     $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1);
 227   }
 228   return $Status;
 229 }
 230 
 231 # Join different words using delimiter and quote parameters. And return as
 232 # a string value.
 233 sub JoinWords {
 234   my($Words, $Delim, $Quote) = @_;
 235 
 236   if (!@$Words) {
 237     return "";
 238   }
 239 
 240   $Quote = $Quote ? "\"" : "";
 241   my(@NewWords) = map { (defined($_) && length($_)) ? "${Quote}$_${Quote}" : "${Quote}${Quote}" } @$Words;
 242 
 243   return join $Delim, @NewWords;
 244 }
 245 
 246 # Split string value containing quoted or unquoted words in to an array containing
 247 # unquoted words.
 248 #
 249 # This function is used to split strings generated by JoinWords.
 250 #
 251 sub SplitWords {
 252   my($Line, $Delim) = @_;
 253 
 254   if (!$Line) {
 255     return ();
 256   }
 257 
 258   # Is it a quoted string?
 259   if ($Line =~ /^\"/) {
 260     # Take out first and last quote...
 261     $Line =~ s/^\"//; $Line =~ s/\"$//;
 262 
 263     $Delim = "\"$Delim\"";
 264   }
 265   return split /$Delim/, $Line;
 266 }
 267 
 268 # Based on quote parameter, figure out what to do
 269 sub QuoteAWord {
 270   my($Word, $Quote) = @_;
 271   my($QuotedWord);
 272 
 273   $QuotedWord = "";
 274   if ($Word) {
 275     $QuotedWord = $Word;
 276   }
 277   if ($Quote) {
 278     $QuotedWord = "\"$QuotedWord\"";
 279   }
 280   return ($QuotedWord);
 281 }
 282 
 283 # Remove leading white space characters from the string...
 284 sub RemoveLeadingWhiteSpaces {
 285   my($InString) = @_;
 286   my($OutString, $TrailingString, $LeadingWhiteSpace);
 287 
 288   $OutString = $InString;
 289   if (length($InString) && ContainsWhiteSpaces($InString)) {
 290     $OutString =~ s/^([ \t\r\n\f]*)(.*?)$/$2/;
 291   }
 292   return $OutString;
 293 }
 294 
 295 # Remove Trailing white space characters from the string...
 296 sub RemoveTrailingWhiteSpaces {
 297   my($InString) = @_;
 298   my($OutString, $LeadingString, $TrailingWhiteSpace);
 299 
 300   $OutString = $InString;
 301   if (length($InString) && ContainsWhiteSpaces($InString)) {
 302     $OutString =~ s/^(.*?)([ \t\r\n\f]*)$/$1/;
 303   }
 304   return $OutString;
 305 }
 306 
 307 # Remove both leading and trailing white space characters from the string...
 308 sub RemoveLeadingAndTrailingWhiteSpaces {
 309   my($InString) = @_;
 310   my($OutString);
 311 
 312   $OutString = $InString;
 313   if (length($InString) && ContainsWhiteSpaces($InString)) {
 314     $OutString =~ s/^([ \t\r\n\f]*)(.*?)([ \t\r\n\f]*)$/$2/;
 315   }
 316   return $OutString;
 317 }
 318 
 319 # Wrap text string...
 320 sub WrapText {
 321   my($InString, $WrapLength, $WrapDelimiter);
 322   my($OutString);
 323 
 324   $WrapLength = 40;
 325   $WrapDelimiter = "\n";
 326   if (@_ == 3) {
 327     ($InString, $WrapLength, $WrapDelimiter) = @_;
 328   }
 329   elsif (@_ == 2) {
 330     ($InString, $WrapLength) = @_;
 331   }
 332   else {
 333     ($InString, $WrapLength) = @_;
 334   }
 335   $OutString = $InString;
 336   if ($InString && (length($InString) > $WrapLength)) {
 337     $OutString = "";
 338     my($Index, $Length, $FirstPiece, $StringPiece);
 339     $Index = 0; $Length = length($InString);
 340     $FirstPiece = 1;
 341     for ($Index = 0; $Index < $Length; $Index += $WrapLength) {
 342       if (($Index + $WrapLength) < $Length) {
 343         $StringPiece = substr($InString, $Index, $WrapLength);
 344       }
 345       else {
 346         # Last piece of the string...
 347         $StringPiece = substr($InString, $Index, $WrapLength);
 348       }
 349       if ($FirstPiece) {
 350         $FirstPiece = 0;
 351         $OutString = $StringPiece;
 352       }
 353       else {
 354         $OutString .= "${WrapDelimiter}${StringPiece}";
 355       }
 356     }
 357   }
 358   return $OutString;
 359 }
 360