1 package TextUtil; 2 # 3 # $RCSfile: TextUtil.pm,v $ 4 # $Date: 2011/12/16 00:04:12 $ 5 # $Revision: 1.37 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2012 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Exporter; 31 32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 33 34 @ISA = qw(Exporter); 35 @EXPORT = qw(AddNumberSuffix ContainsWhiteSpaces GetTextLine HashCode IsEmpty IsNumberPowerOfNumber IsInteger IsPositiveInteger IsFloat IsNotEmpty IsNumerical JoinWords SplitWords QuoteAWord RemoveLeadingWhiteSpaces RemoveTrailingWhiteSpaces RemoveLeadingAndTrailingWhiteSpaces WrapText); 36 @EXPORT_OK = qw(); 37 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 38 39 # Add number suffix... 40 sub AddNumberSuffix { 41 my($Value) = @_; 42 my($ValueWithSuffix, $Suffix); 43 44 $ValueWithSuffix = $Value; 45 if (!IsPositiveInteger($Value)) { 46 return $ValueWithSuffix; 47 } 48 $Suffix = "th"; 49 if ($Value < 10 || $Value > 20) { 50 my $Remainder = $Value % 10; 51 $Suffix = ($Remainder == 1) ? "st" : (($Remainder == 2) ? "nd" : (($Remainder == 3) ? "rd" : "th")); 52 } 53 $ValueWithSuffix = "${ValueWithSuffix}${Suffix}"; 54 return $ValueWithSuffix; 55 } 56 57 # Check out the string: Doen it contain any white space characters? 58 sub ContainsWhiteSpaces { 59 my($TheString) = @_; 60 my($Status) = 0; 61 62 if (defined($TheString) && length($TheString)) { 63 $Status = ($TheString =~ /[ \t\r\n\f]/ ) ? 1 : 0; 64 } 65 return $Status; 66 } 67 68 # Read the line, change to UNIX new line char, and chop off new line char as well... 69 sub GetTextLine { 70 my($TextFileRef) = @_; 71 my($Line) = ""; 72 73 # Get the next non empty line... 74 LINE: while (defined($_ = <$TextFileRef>)) { 75 # Change Windows and Mac new line char to UNIX... 76 s/(\r\n)|(\r)/\n/g; 77 chomp; 78 $Line = $_; 79 if (length($Line)) { 80 last LINE; 81 } 82 else { 83 next LINE; 84 } 85 } 86 return $Line; 87 } 88 89 # Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38]. It's also implemented in 90 # Perl for internal hash keys in hv.h include file. 91 # 92 # It's not clear how to force Perl perform unsigned integer arithmetic irrespective of the OS/Platform and 93 # the value of use64bitint flag used during its compilation. 94 # 95 # In order to generate a consistent 32 bit has code across OS/platforms, the following methodology appear 96 # to work: 97 # 98 # o Use MaxHashCodeMask to retrieve appropriate bits after left shifting by bit operators and additions 99 # o Stay away from "use integer" to avoid signed integer arithmetic for bit operators 100 # 101 # 102 # MaxHashCodeMask (2147483647) corresponds to the maximum value which can be stored in 31 bits 103 # 104 my($MaxHashCodeMask); 105 $MaxHashCodeMask = 2**31 - 1; 106 107 sub HashCode { 108 my($String) = @_; 109 my($HashCode, $Value, $ShiftedHashCode); 110 111 $HashCode = 0; 112 for $Value (unpack('C*', $String)) { 113 $HashCode += $Value; 114 115 $ShiftedHashCode = $HashCode << 10; 116 if ($ShiftedHashCode > $MaxHashCodeMask) { 117 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; 118 } 119 120 $HashCode += $ShiftedHashCode; 121 if ($HashCode > $MaxHashCodeMask) { 122 $HashCode = $HashCode & $MaxHashCodeMask; 123 } 124 125 $HashCode ^= ($HashCode >> 6); 126 } 127 128 $ShiftedHashCode = $HashCode << 3; 129 if ($ShiftedHashCode > $MaxHashCodeMask) { 130 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; 131 } 132 133 $HashCode += $ShiftedHashCode; 134 if ($HashCode > $MaxHashCodeMask) { 135 $HashCode = $HashCode & $MaxHashCodeMask; 136 } 137 $HashCode ^= ($HashCode >> 11); 138 139 $ShiftedHashCode = $HashCode << 15; 140 if ($ShiftedHashCode > $MaxHashCodeMask) { 141 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; 142 } 143 144 $HashCode += $ShiftedHashCode; 145 if ($HashCode > $MaxHashCodeMask) { 146 $HashCode = $HashCode & $MaxHashCodeMask; 147 } 148 return $HashCode; 149 } 150 151 # Check out the string: Is it defined and has a non zero length? 152 sub IsEmpty { 153 my($TheString) = @_; 154 my($Status) = 1; 155 156 $Status = (defined($TheString) && length($TheString)) ? 0 : 1; 157 158 return $Status; 159 } 160 161 # Is first specified number power of second specified number... 162 sub IsNumberPowerOfNumber { 163 my($FirstNum, $SecondNum) = @_; 164 my($PowerValue); 165 166 $PowerValue = log($FirstNum)/log($SecondNum); 167 168 return IsInteger($PowerValue) ? 1 : 0; 169 } 170 171 # Check out the string: Is it an integer? 172 sub IsInteger { 173 my($TheString) = @_; 174 my($Status) = 0; 175 176 if (defined($TheString) && length($TheString)) { 177 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); 178 $TheString =~ s/^[+-]//; 179 $Status = ($TheString =~ /[^0-9]/) ? 0 : 1; 180 } 181 return $Status; 182 } 183 184 # Check out the string: Is it an integer with value > 0? 185 sub IsPositiveInteger { 186 my($TheString) = @_; 187 my($Status) = 0; 188 189 $Status = IsInteger($TheString) ? ($TheString > 0 ? 1 : 0) : 0; 190 191 return $Status; 192 } 193 194 195 # Check out the string: Is it a float? 196 sub IsFloat { 197 my($TheString) = @_; 198 my($Status) = 0; 199 200 if (defined($TheString) && length($TheString)) { 201 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); 202 $TheString =~ s/^[+-]//; 203 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1); 204 } 205 return $Status; 206 } 207 208 # Check out the string: Is it defined and has a non zero length? 209 sub IsNotEmpty { 210 my($TheString) = @_; 211 my($Status); 212 213 $Status = IsEmpty($TheString) ? 0 : 1; 214 215 return $Status; 216 } 217 218 # Check out the string: Does it only contain numerical data? 219 sub IsNumerical { 220 my($TheString) = @_; 221 my($Status) = 0; 222 223 if (defined($TheString) && length($TheString)) { 224 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); 225 $TheString =~ s/^[+-]//; 226 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1); 227 } 228 return $Status; 229 } 230 231 # Join different words using delimiter and quote parameters. And return as 232 # a string value. 233 sub JoinWords { 234 my($Words, $Delim, $Quote) = @_; 235 236 if (!@$Words) { 237 return ""; 238 } 239 240 $Quote = $Quote ? "\"" : ""; 241 my(@NewWords) = map { (defined($_) && length($_)) ? "${Quote}$_${Quote}" : "${Quote}${Quote}" } @$Words; 242 243 return join $Delim, @NewWords; 244 } 245 246 # Split string value containing quoted or unquoted words in to an array containing 247 # unquoted words. 248 # 249 # This function is used to split strings generated by JoinWords. 250 # 251 sub SplitWords { 252 my($Line, $Delim) = @_; 253 254 if (!$Line) { 255 return (); 256 } 257 258 # Is it a quoted string? 259 if ($Line =~ /^\"/) { 260 # Take out first and last quote... 261 $Line =~ s/^\"//; $Line =~ s/\"$//; 262 263 $Delim = "\"$Delim\""; 264 } 265 return split /$Delim/, $Line; 266 } 267 268 # Based on quote parameter, figure out what to do 269 sub QuoteAWord { 270 my($Word, $Quote) = @_; 271 my($QuotedWord); 272 273 $QuotedWord = ""; 274 if ($Word) { 275 $QuotedWord = $Word; 276 } 277 if ($Quote) { 278 $QuotedWord = "\"$QuotedWord\""; 279 } 280 return ($QuotedWord); 281 } 282 283 # Remove leading white space characters from the string... 284 sub RemoveLeadingWhiteSpaces { 285 my($InString) = @_; 286 my($OutString, $TrailingString, $LeadingWhiteSpace); 287 288 $OutString = $InString; 289 if (length($InString) && ContainsWhiteSpaces($InString)) { 290 $OutString =~ s/^([ \t\r\n\f]*)(.*?)$/$2/; 291 } 292 return $OutString; 293 } 294 295 # Remove Trailing white space characters from the string... 296 sub RemoveTrailingWhiteSpaces { 297 my($InString) = @_; 298 my($OutString, $LeadingString, $TrailingWhiteSpace); 299 300 $OutString = $InString; 301 if (length($InString) && ContainsWhiteSpaces($InString)) { 302 $OutString =~ s/^(.*?)([ \t\r\n\f]*)$/$1/; 303 } 304 return $OutString; 305 } 306 307 # Remove both leading and trailing white space characters from the string... 308 sub RemoveLeadingAndTrailingWhiteSpaces { 309 my($InString) = @_; 310 my($OutString); 311 312 $OutString = $InString; 313 if (length($InString) && ContainsWhiteSpaces($InString)) { 314 $OutString =~ s/^([ \t\r\n\f]*)(.*?)([ \t\r\n\f]*)$/$2/; 315 } 316 return $OutString; 317 } 318 319 # Wrap text string... 320 sub WrapText { 321 my($InString, $WrapLength, $WrapDelimiter); 322 my($OutString); 323 324 $WrapLength = 40; 325 $WrapDelimiter = "\n"; 326 if (@_ == 3) { 327 ($InString, $WrapLength, $WrapDelimiter) = @_; 328 } 329 elsif (@_ == 2) { 330 ($InString, $WrapLength) = @_; 331 } 332 else { 333 ($InString, $WrapLength) = @_; 334 } 335 $OutString = $InString; 336 if ($InString && (length($InString) > $WrapLength)) { 337 $OutString = ""; 338 my($Index, $Length, $FirstPiece, $StringPiece); 339 $Index = 0; $Length = length($InString); 340 $FirstPiece = 1; 341 for ($Index = 0; $Index < $Length; $Index += $WrapLength) { 342 if (($Index + $WrapLength) < $Length) { 343 $StringPiece = substr($InString, $Index, $WrapLength); 344 } 345 else { 346 # Last piece of the string... 347 $StringPiece = substr($InString, $Index, $WrapLength); 348 } 349 if ($FirstPiece) { 350 $FirstPiece = 0; 351 $OutString = $StringPiece; 352 } 353 else { 354 $OutString .= "${WrapDelimiter}${StringPiece}"; 355 } 356 } 357 } 358 return $OutString; 359 } 360