1 package TextUtil; 2 # 3 # $RCSfile: TextUtil.pm,v $ 4 # $Date: 2008/04/30 02:33:39 $ 5 # $Revision: 1.25 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 use 5.006; 29 use strict; 30 use Exporter; 31 32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 33 34 $VERSION = '1.00'; 35 @ISA = qw(Exporter); 36 @EXPORT = qw(AddNumberSuffix ContainsWhiteSpaces GetTextLine HashCode IsEmpty IsNumberPowerOfNumber IsInteger IsPositiveInteger IsFloat IsNotEmpty IsNumerical JoinWords MonthNumberToName QuoteAWord RemoveLeadingWhiteSpaces RemoveTrailingWhiteSpaces RemoveLeadingAndTrailingWhiteSpaces WrapText); 37 @EXPORT_OK = qw(); 38 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 39 40 # Add number suffix... 41 sub AddNumberSuffix { 42 my($Value) = @_; 43 my($ValueWithSuffix, $Suffix); 44 45 $ValueWithSuffix = $Value; 46 if (!IsPositiveInteger($Value)) { 47 return $ValueWithSuffix; 48 } 49 $Suffix = "th"; 50 if ($Value < 10 || $Value > 20) { 51 my $Remainder = $Value % 10; 52 $Suffix = ($Remainder == 1) ? "st" : (($Remainder == 2) ? "nd" : (($Remainder == 3) ? "rd" : "th")); 53 } 54 $ValueWithSuffix = "${ValueWithSuffix}${Suffix}"; 55 return $ValueWithSuffix; 56 } 57 58 # Check out the string: Doen it contain any white space characters? 59 sub ContainsWhiteSpaces { 60 my($TheString) = @_; 61 my($Status) = 0; 62 63 if (defined($TheString) && length($TheString)) { 64 $Status = ($TheString =~ /[ \t\r\n\f]/ ) ? 1 : 0; 65 } 66 return $Status; 67 } 68 69 # Read the line, change to UNIX new line char, and chop off new line char as well... 70 sub GetTextLine { 71 my($TextFileRef) = @_; 72 my($Line) = ""; 73 74 # Get the next non empty line... 75 LINE: while (defined($_ = <$TextFileRef>)) { 76 # Change Windows and Mac new line char to UNIX... 77 s/(\r\n)|(\r)/\n/g; 78 chomp; 79 $Line = $_; 80 if (length($Line)) { 81 last LINE; 82 } 83 else { 84 next LINE; 85 } 86 } 87 return $Line; 88 } 89 90 # Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38]. It's also implemented in 91 # Perl for internal hash keys in hv.h include file. 92 # 93 # It's not clear how to force Perl perform unsigned integer arithmetic irrespective of the OS/Platform and 94 # the value of use64bitint flag used during its compilation. 95 # 96 # In order to generate a consistent 32 bit has code across OS/platforms, the following methodology appear 97 # to work: 98 # 99 # o Use MaxHashCodeMask to retrieve appropriate bits after left shifting by bit operators and additions 100 # o Stay away from "use integer" to avoid signed integer arithmetic for bit operators 101 # 102 # 103 my($MaxHashCodeMask); 104 $MaxHashCodeMask = 2**31 - 1; 105 106 sub HashCode { 107 my($String) = @_; 108 my($HashCode, $Value, $ShiftedHashCode); 109 110 $HashCode = 0; 111 for $Value (unpack('C*', $String)) { 112 $HashCode += $Value; 113 114 $ShiftedHashCode = $HashCode << 10; 115 if ($ShiftedHashCode > $MaxHashCodeMask) { 116 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; 117 } 118 119 $HashCode += $ShiftedHashCode; 120 if ($HashCode > $MaxHashCodeMask) { 121 $HashCode = $HashCode & $MaxHashCodeMask; 122 } 123 124 $HashCode ^= ($HashCode >> 6); 125 } 126 127 $ShiftedHashCode = $HashCode << 3; 128 if ($ShiftedHashCode > $MaxHashCodeMask) { 129 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; 130 } 131 132 $HashCode += $ShiftedHashCode; 133 if ($HashCode > $MaxHashCodeMask) { 134 $HashCode = $HashCode & $MaxHashCodeMask; 135 } 136 $HashCode ^= ($HashCode >> 11); 137 138 $ShiftedHashCode = $HashCode << 15; 139 if ($ShiftedHashCode > $MaxHashCodeMask) { 140 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; 141 } 142 143 $HashCode += $ShiftedHashCode; 144 if ($HashCode > $MaxHashCodeMask) { 145 $HashCode = $HashCode & $MaxHashCodeMask; 146 } 147 return $HashCode; 148 } 149 150 # Check out the string: Is it defined and has a non zero length? 151 sub IsEmpty { 152 my($TheString) = @_; 153 my($Status) = 1; 154 155 $Status = (defined($TheString) && length($TheString)) ? 0 : 1; 156 157 return $Status; 158 } 159 160 # Is first specified number power of second specified number... 161 sub IsNumberPowerOfNumber { 162 my($FirstNum, $SecondNum) = @_; 163 my($PowerValue); 164 165 $PowerValue = log($FirstNum)/log($SecondNum); 166 167 return IsInteger($PowerValue) ? 1 : 0; 168 } 169 170 # Check out the string: Is it an integer? 171 sub IsInteger { 172 my($TheString) = @_; 173 my($Status) = 0; 174 175 if (defined($TheString) && length($TheString)) { 176 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); 177 $TheString =~ s/^[+-]//; 178 $Status = ($TheString =~ /[^0-9]/) ? 0 : 1; 179 } 180 return $Status; 181 } 182 183 # Check out the string: Is it an integer with value > 0? 184 sub IsPositiveInteger { 185 my($TheString) = @_; 186 my($Status) = 0; 187 188 $Status = IsInteger($TheString) ? ($TheString > 0 ? 1 : 0) : 0; 189 190 return $Status; 191 } 192 193 194 # Check out the string: Is it a float? 195 sub IsFloat { 196 my($TheString) = @_; 197 my($Status) = 0; 198 199 if (defined($TheString) && length($TheString)) { 200 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); 201 $TheString =~ s/^[+-]//; 202 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1); 203 } 204 return $Status; 205 } 206 207 # Check out the string: Is it defined and has a non zero length? 208 sub IsNotEmpty { 209 my($TheString) = @_; 210 my($Status); 211 212 $Status = IsEmpty($TheString) ? 0 : 1; 213 214 return $Status; 215 } 216 217 # Check out the string: Does it only contain numerical data? 218 sub IsNumerical { 219 my($TheString) = @_; 220 my($Status) = 0; 221 222 if (defined($TheString) && length($TheString)) { 223 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); 224 $TheString =~ s/^[+-]//; 225 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1); 226 } 227 return $Status; 228 } 229 230 # Join different words using delimiter and quote parameters. And return as 231 # a string value. 232 sub JoinWords { 233 my($Words, $Delim, $Quote) = @_; 234 my($JoinedWords) = ""; 235 236 if (@$Words) { 237 my(@NewWords) = map { (defined($_) && length($_)) ? $_ : "" } @$Words; 238 if ($Quote) { 239 @NewWords = map { "\"$_\"" } @NewWords; 240 } 241 $JoinedWords = join $Delim, @NewWords; 242 } 243 return ($JoinedWords); 244 } 245 246 # Return month name from month number... 247 sub MonthNumberToName { 248 my($Number) = @_; 249 my(%NumberToName) = (1 => 'January', 2 => 'February', 3 => 'March', 4 => 'April', 250 5 => 'May', 6 => 'June', 7 => 'July', 8 => 'August', 251 9 => 'September', 10 => 'October', 11 => 'November', 12 => 'December'); 252 253 return (exists $NumberToName{$Number}) ? $NumberToName{$Number} : ''; 254 } 255 256 # Based on quote parameter, figure out what to do 257 sub QuoteAWord { 258 my($Word, $Quote) = @_; 259 my($QuotedWord); 260 261 $QuotedWord = ""; 262 if ($Word) { 263 $QuotedWord = $Word; 264 } 265 if ($Quote) { 266 $QuotedWord = "\"$QuotedWord\""; 267 } 268 return ($QuotedWord); 269 } 270 271 # Remove leading white space characters from the string... 272 sub RemoveLeadingWhiteSpaces { 273 my($InString) = @_; 274 my($OutString, $TrailingString, $LeadingWhiteSpace); 275 276 $OutString = $InString; 277 if (length($InString) && ContainsWhiteSpaces($InString)) { 278 $OutString =~ s/^([ \t\r\n\f]*)(.*?)$/$2/; 279 } 280 return $OutString; 281 } 282 283 # Remove Trailing white space characters from the string... 284 sub RemoveTrailingWhiteSpaces { 285 my($InString) = @_; 286 my($OutString, $LeadingString, $TrailingWhiteSpace); 287 288 $OutString = $InString; 289 if (length($InString) && ContainsWhiteSpaces($InString)) { 290 $OutString =~ s/^(.*?)([ \t\r\n\f]*)$/$1/; 291 } 292 return $OutString; 293 } 294 295 # Remove both leading and trailing white space characters from the string... 296 sub RemoveLeadingAndTrailingWhiteSpaces { 297 my($InString) = @_; 298 my($OutString); 299 300 $OutString = $InString; 301 if (length($InString) &&ContainsWhiteSpaces($InString)) { 302 $OutString =~ s/^([ \t\r\n\f]*)(.*?)([ \t\r\n\f]*)$/$2/; 303 } 304 return $OutString; 305 } 306 307 # Wrap text string... 308 sub WrapText { 309 my($InString, $WrapLength, $WrapDelimiter); 310 my($OutString); 311 312 $WrapLength = 40; 313 $WrapDelimiter = "\n"; 314 if (@_ == 3) { 315 ($InString, $WrapLength, $WrapDelimiter) = @_; 316 } 317 elsif (@_ == 2) { 318 ($InString, $WrapLength) = @_; 319 } 320 else { 321 ($InString, $WrapLength) = @_; 322 } 323 $OutString = $InString; 324 if ($InString && (length($InString) > $WrapLength)) { 325 $OutString = ""; 326 my($Index, $Length, $FirstPiece, $StringPiece); 327 $Index = 0; $Length = length($InString); 328 $FirstPiece = 1; 329 for ($Index = 0; $Index < $Length; $Index += $WrapLength) { 330 if (($Index + $WrapLength) < $Length) { 331 $StringPiece = substr($InString, $Index, $WrapLength); 332 } 333 else { 334 # Last piece of the string... 335 $StringPiece = substr($InString, $Index, $WrapLength); 336 } 337 if ($FirstPiece) { 338 $FirstPiece = 0; 339 $OutString = $StringPiece; 340 } 341 else { 342 $OutString .= "${WrapDelimiter}$ {StringPiece}"; 343 } 344 } 345 } 346 return $OutString; 347 } 348