MayaChemTools

   1 package TextUtil;
   2 #
   3 # $RCSfile: TextUtil.pm,v $
   4 # $Date: 2008/04/30 02:33:39 $
   5 # $Revision: 1.25 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 use 5.006;
  29 use strict;
  30 use Exporter;
  31 
  32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  33 
  34 $VERSION = '1.00';
  35 @ISA = qw(Exporter);
  36 @EXPORT = qw(AddNumberSuffix ContainsWhiteSpaces GetTextLine HashCode IsEmpty IsNumberPowerOfNumber IsInteger IsPositiveInteger IsFloat IsNotEmpty IsNumerical JoinWords MonthNumberToName QuoteAWord RemoveLeadingWhiteSpaces RemoveTrailingWhiteSpaces RemoveLeadingAndTrailingWhiteSpaces WrapText);
  37 @EXPORT_OK = qw();
  38 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  39 
  40 # Add number suffix...
  41 sub AddNumberSuffix {
  42   my($Value) = @_;
  43   my($ValueWithSuffix, $Suffix);
  44 
  45   $ValueWithSuffix = $Value;
  46   if (!IsPositiveInteger($Value)) {
  47     return $ValueWithSuffix;
  48   }
  49   $Suffix = "th";
  50   if ($Value < 10 || $Value > 20) {
  51     my $Remainder = $Value % 10;
  52     $Suffix = ($Remainder == 1) ? "st" : (($Remainder == 2) ? "nd" : (($Remainder == 3) ? "rd" : "th"));
  53   }
  54   $ValueWithSuffix = "${ValueWithSuffix}${Suffix}";
  55   return $ValueWithSuffix;
  56 }
  57 
  58 # Check out the string: Doen it contain any white space characters?
  59 sub ContainsWhiteSpaces {
  60   my($TheString) = @_;
  61   my($Status) = 0;
  62 
  63   if (defined($TheString) && length($TheString)) {
  64     $Status = ($TheString =~ /[ \t\r\n\f]/ ) ? 1 : 0;
  65   }
  66   return $Status;
  67 }
  68 
  69 # Read the line, change to UNIX new line char, and chop off new line char as well...
  70 sub GetTextLine {
  71   my($TextFileRef) = @_;
  72   my($Line) = "";
  73 
  74   # Get the next non empty line...
  75   LINE: while (defined($_ = <$TextFileRef>)) {
  76       # Change Windows and Mac new line char to UNIX...
  77       s/(\r\n)|(\r)/\n/g;
  78       chomp;
  79       $Line = $_;
  80       if (length($Line)) {
  81 	last LINE;
  82       }
  83       else {
  84 	next LINE;
  85       }
  86     }
  87   return $Line;
  88 }
  89 
  90 # Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38]. It's also implemented in
  91 # Perl for internal hash keys in hv.h include file.
  92 #
  93 # It's not clear how to force Perl perform unsigned integer arithmetic irrespective of the OS/Platform and
  94 # the value of use64bitint flag used during its compilation.
  95 #
  96 # In order to generate a consistent 32 bit has code across OS/platforms, the following methodology appear
  97 # to work:
  98 #
  99 #    o Use MaxHashCodeMask to retrieve appropriate bits after left shifting by bit operators and additions
 100 #    o Stay away from "use integer" to avoid signed integer arithmetic for bit operators
 101 #
 102 #
 103 my($MaxHashCodeMask);
 104 $MaxHashCodeMask = 2**31 - 1;
 105 
 106 sub HashCode {
 107   my($String) = @_;
 108   my($HashCode, $Value, $ShiftedHashCode);
 109 
 110   $HashCode = 0;
 111   for $Value (unpack('C*', $String)) {
 112     $HashCode += $Value;
 113 
 114     $ShiftedHashCode = $HashCode << 10;
 115     if ($ShiftedHashCode > $MaxHashCodeMask) {
 116       $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
 117     }
 118 
 119     $HashCode += $ShiftedHashCode;
 120     if ($HashCode > $MaxHashCodeMask) {
 121       $HashCode = $HashCode & $MaxHashCodeMask;
 122     }
 123 
 124     $HashCode ^= ($HashCode >> 6);
 125   }
 126 
 127   $ShiftedHashCode = $HashCode << 3;
 128   if ($ShiftedHashCode > $MaxHashCodeMask) {
 129     $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
 130   }
 131 
 132   $HashCode += $ShiftedHashCode;
 133   if ($HashCode > $MaxHashCodeMask) {
 134     $HashCode = $HashCode & $MaxHashCodeMask;
 135   }
 136   $HashCode ^= ($HashCode >> 11);
 137 
 138   $ShiftedHashCode = $HashCode << 15;
 139   if ($ShiftedHashCode > $MaxHashCodeMask) {
 140     $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
 141   }
 142 
 143   $HashCode += $ShiftedHashCode;
 144   if ($HashCode > $MaxHashCodeMask) {
 145     $HashCode = $HashCode & $MaxHashCodeMask;
 146   }
 147   return $HashCode;
 148 }
 149 
 150 # Check out the string: Is it defined and has a non zero length?
 151 sub IsEmpty {
 152   my($TheString) = @_;
 153   my($Status) = 1;
 154 
 155   $Status = (defined($TheString) && length($TheString)) ? 0 : 1;
 156 
 157   return $Status;
 158 }
 159 
 160 # Is first specified number power of second specified number...
 161 sub IsNumberPowerOfNumber {
 162   my($FirstNum, $SecondNum) = @_;
 163   my($PowerValue);
 164 
 165   $PowerValue = log($FirstNum)/log($SecondNum);
 166 
 167   return IsInteger($PowerValue) ? 1 : 0;
 168 }
 169 
 170 # Check out the string: Is it an integer?
 171 sub IsInteger {
 172   my($TheString) = @_;
 173   my($Status) = 0;
 174 
 175   if (defined($TheString) && length($TheString)) {
 176     $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
 177     $TheString =~ s/^[+-]//;
 178     $Status = ($TheString =~ /[^0-9]/) ? 0 : 1;
 179   }
 180   return $Status;
 181 }
 182 
 183 # Check out the string: Is it an integer with value > 0?
 184 sub IsPositiveInteger {
 185   my($TheString) = @_;
 186   my($Status) = 0;
 187 
 188   $Status = IsInteger($TheString) ? ($TheString > 0 ? 1 : 0) : 0;
 189 
 190   return $Status;
 191 }
 192 
 193 
 194 # Check out the string: Is it a float?
 195 sub IsFloat {
 196   my($TheString) = @_;
 197   my($Status) = 0;
 198 
 199   if (defined($TheString) && length($TheString)) {
 200     $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
 201     $TheString =~ s/^[+-]//;
 202     $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1);
 203   }
 204   return $Status;
 205 }
 206 
 207 # Check out the string: Is it defined and has a non zero length?
 208 sub IsNotEmpty {
 209   my($TheString) = @_;
 210   my($Status);
 211 
 212   $Status = IsEmpty($TheString) ? 0 : 1;
 213 
 214   return $Status;
 215 }
 216 
 217 # Check out the string: Does it only contain numerical data?
 218 sub IsNumerical {
 219   my($TheString) = @_;
 220   my($Status) = 0;
 221 
 222   if (defined($TheString) && length($TheString)) {
 223     $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
 224     $TheString =~ s/^[+-]//;
 225     $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1);
 226   }
 227   return $Status;
 228 }
 229 
 230 # Join different words using delimiter and quote parameters. And return as
 231 # a string value.
 232 sub JoinWords {
 233   my($Words, $Delim, $Quote) = @_;
 234   my($JoinedWords) = "";
 235 
 236   if (@$Words) {
 237     my(@NewWords) = map { (defined($_) && length($_)) ? $_ : "" } @$Words;
 238     if ($Quote) {
 239       @NewWords = map { "\"$_\"" } @NewWords;
 240     }
 241     $JoinedWords = join $Delim, @NewWords;
 242   }
 243   return ($JoinedWords);
 244 }
 245 
 246 # Return month name from month number...
 247 sub MonthNumberToName {
 248   my($Number) = @_;
 249   my(%NumberToName) = (1 => 'January', 2 => 'February', 3 => 'March', 4 => 'April',
 250 		      5 => 'May', 6 => 'June', 7 => 'July', 8 => 'August',
 251 		      9 => 'September', 10 => 'October', 11 => 'November', 12 => 'December');
 252 
 253   return (exists $NumberToName{$Number}) ? $NumberToName{$Number} : '';
 254 }
 255 
 256 # Based on quote parameter, figure out what to do
 257 sub QuoteAWord {
 258   my($Word, $Quote) = @_;
 259   my($QuotedWord);
 260 
 261   $QuotedWord = "";
 262   if ($Word) {
 263     $QuotedWord = $Word;
 264   }
 265   if ($Quote) {
 266     $QuotedWord = "\"$QuotedWord\"";
 267   }
 268   return ($QuotedWord);
 269 }
 270 
 271 # Remove leading white space characters from the string...
 272 sub RemoveLeadingWhiteSpaces {
 273   my($InString) = @_;
 274   my($OutString, $TrailingString, $LeadingWhiteSpace);
 275 
 276   $OutString = $InString;
 277   if (length($InString) && ContainsWhiteSpaces($InString)) {
 278     $OutString =~ s/^([ \t\r\n\f]*)(.*?)$/$2/;
 279   }
 280   return $OutString;
 281 }
 282 
 283 # Remove Trailing white space characters from the string...
 284 sub RemoveTrailingWhiteSpaces {
 285   my($InString) = @_;
 286   my($OutString, $LeadingString, $TrailingWhiteSpace);
 287 
 288   $OutString = $InString;
 289   if (length($InString) && ContainsWhiteSpaces($InString)) {
 290     $OutString =~ s/^(.*?)([ \t\r\n\f]*)$/$1/;
 291   }
 292   return $OutString;
 293 }
 294 
 295 # Remove both leading and trailing white space characters from the string...
 296 sub RemoveLeadingAndTrailingWhiteSpaces {
 297   my($InString) = @_;
 298   my($OutString);
 299 
 300   $OutString = $InString;
 301   if (length($InString) &&ContainsWhiteSpaces($InString)) {
 302     $OutString =~ s/^([ \t\r\n\f]*)(.*?)([ \t\r\n\f]*)$/$2/;
 303   }
 304   return $OutString;
 305 }
 306 
 307 # Wrap text string...
 308 sub WrapText {
 309   my($InString, $WrapLength, $WrapDelimiter);
 310   my($OutString);
 311 
 312   $WrapLength = 40;
 313   $WrapDelimiter = "\n";
 314   if (@_ == 3) {
 315     ($InString, $WrapLength, $WrapDelimiter) = @_;
 316   }
 317   elsif (@_ == 2) {
 318     ($InString, $WrapLength) = @_;
 319   }
 320   else {
 321     ($InString, $WrapLength) = @_;
 322   }
 323   $OutString = $InString;
 324   if ($InString && (length($InString) > $WrapLength)) {
 325     $OutString = "";
 326     my($Index, $Length, $FirstPiece, $StringPiece);
 327     $Index = 0; $Length = length($InString);
 328     $FirstPiece = 1;
 329     for ($Index = 0; $Index < $Length; $Index += $WrapLength) {
 330       if (($Index + $WrapLength) < $Length) {
 331 	$StringPiece = substr($InString, $Index, $WrapLength);
 332       }
 333       else {
 334 	# Last piece of the string...
 335 	$StringPiece = substr($InString, $Index, $WrapLength);
 336       }
 337       if ($FirstPiece) {
 338 	$FirstPiece = 0;
 339 	$OutString = $StringPiece;
 340       }
 341       else {
 342 	$OutString .= "${WrapDelimiter}$ {StringPiece}";
 343       }
 344     }
 345   }
 346   return $OutString;
 347 }
 348