MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: ExtractFromTextFiles.pl,v $
   4 # $Date: 2010/06/23 20:59:29 $
   5 # $Revision: 1.32 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2010 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use Text::ParseWords;
  34 use FileHandle;
  35 use Benchmark;
  36 use FileUtil;
  37 use TextUtil;
  38 
  39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  40 
  41 # Autoflush STDOUT
  42 $| = 1;
  43 
  44 $StartTime = new Benchmark;
  45 
  46 # Starting message...
  47 $ScriptName = basename $0;
  48 print "\n$ScriptName:Starting...\n\n";
  49 
  50 # Get the options and setup script...
  51 SetupScriptUsage();
  52 if ($Options{help} || @ARGV < 1) {
  53   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  54 }
  55 
  56 my(@TextFilesList);
  57 @TextFilesList = ExpandFileNames(\@ARGV, "csv tsv");
  58 
  59 my($OutDelim, $OutQuote, $SpecifiedCategoryCol, $SpecifiedRowsMode, @SpecifiedColumns, @SpecifiedRowValues);
  60 ProcessOptions();
  61 
  62 # Collect column information for all the text files...
  63 print "Checking input text file(s)...\n";
  64 my(@TextFilesOkay, @TextFilesColCount, @TextFilesColLabels, @TextFilesColLabelToNumMap, @TextFilesInDelim, @TextFilesOutFile, @TextFilesOutFileExt, @TextFilesCategoryOutFileRoot);
  65 RetrieveTextFilesInfo();
  66 
  67 # Make sure the specified columns exists in text files...
  68 my(@TextFilesCategoryColNum, @TextFilesColNumsToExtract);
  69 ProcessColumnsInfo();
  70 
  71 # Process specified rows info...
  72 my(@TextFilesRowValues);
  73 ProcessRowsInfo();
  74 
  75 # Generate output files...
  76 my($Index, $TextFile);
  77 if (@TextFilesList > 1) {
  78   print "Processing text files...\n";
  79 }
  80 for $Index (0 .. $#TextFilesList) {
  81   if ($TextFilesOkay[$Index]) {
  82     $TextFile = $TextFilesList[$Index];
  83     if (@TextFilesList > 1) {
  84       print "\nProcessing file $TextFile...\n";
  85     }
  86     else {
  87       print "Processing file $TextFile...\n"
  88     }
  89     if ($Options{mode} =~ /^categories$/i) {
  90       ExtractCategoryData($Index);
  91     }
  92     elsif ($Options{mode} =~ /^rows$/i){
  93       ExtractRowsData($Index);
  94     }
  95     else {
  96       ExtractColumnData($Index);
  97     }
  98   }
  99 }
 100 
 101 print "$ScriptName:Done...\n\n";
 102 
 103 $EndTime = new Benchmark;
 104 $TotalTime = timediff ($EndTime, $StartTime);
 105 print "Total time: ", timestr($TotalTime), "\n";
 106 
 107 ###############################################################################
 108 
 109 # Geneate category files...
 110 sub ExtractCategoryData {
 111   my($Index) = @_;
 112   my($TextFile, $CategoryCol, $NewTextFile, $InDelim, @ColLabels);
 113 
 114   $TextFile = $TextFilesList[$Index];
 115   $NewTextFile =$TextFilesOutFile[$Index];
 116   $CategoryCol = $TextFilesCategoryColNum[$Index];
 117   $InDelim = $TextFilesInDelim[$Index];
 118   @ColLabels = @{$TextFilesColLabels[$Index]};
 119 
 120   my($Line, @LineWords, $CategoryName, $CategoryCount, %CategoriesNameToCountMap, %CategoriesNameToLinesMap);
 121   # Collect category data...
 122   open TEXTFILE, "$TextFile" or die "Couldn't open $TextFile: $! \n";
 123   # Skip label line...
 124   $_ = <TEXTFILE>;
 125 
 126   %CategoriesNameToCountMap = ();
 127   %CategoriesNameToLinesMap = ();
 128   while ($Line = GetTextLine(\*TEXTFILE)) {
 129     @LineWords = quotewords($InDelim, 0, $Line);
 130     $CategoryName = ($CategoryCol <= @LineWords) ? $LineWords[$CategoryCol] : "";
 131     if (exists($CategoriesNameToCountMap{$CategoryName})) {
 132       $CategoriesNameToCountMap{$CategoryName} += 1;
 133       push @{$CategoriesNameToLinesMap{$CategoryName}}, $Line;
 134     }
 135     else {
 136       $CategoriesNameToCountMap{$CategoryName} = 1;
 137       @{$CategoriesNameToLinesMap{$CategoryName}} = ();
 138       push @{$CategoriesNameToLinesMap{$CategoryName}}, $Line;
 139     }
 140   }
 141   close TEXTFILE;
 142 
 143   # Setup file names for individual category files...
 144   my(%CategoriesNameToFileHandleMap, %CategoriesNameToFileNameMap, $CategoryFile, $CategoryFileHandle);
 145   %CategoriesNameToFileHandleMap = ();
 146   %CategoriesNameToFileNameMap = ();
 147   for $CategoryName (keys %CategoriesNameToCountMap) {
 148     $CategoryFile = $TextFilesCategoryOutFileRoot[$Index] . "$CategoryName" . ".$TextFilesOutFileExt[$Index]";;
 149     $CategoryFile =~ s/ //g;
 150     $CategoryFileHandle = new FileHandle;
 151     open $CategoryFileHandle, ">$CategoryFile" or die "Couldn't open $CategoryFile: $! \n";
 152     $CategoriesNameToFileNameMap{$CategoryName} = $CategoryFile;
 153     $CategoriesNameToFileHandleMap{$CategoryName} = $CategoryFileHandle;
 154   }
 155 
 156   # Write out summary file...
 157   print "Generating file $NewTextFile...\n";
 158   open NEWTEXTFILE, ">$NewTextFile" or die "Couldn't open $NewTextFile: $! \n";
 159   # Write out column labels...
 160   @LineWords = ("Category","Count");
 161   $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 162   print NEWTEXTFILE "$Line\n";
 163 
 164   # Write out the category names and count...
 165   for $CategoryName (sort { lc($a) cmp lc($b) } keys %CategoriesNameToCountMap) {
 166     $CategoryCount = $CategoriesNameToCountMap{$CategoryName};
 167     @LineWords = ("$CategoryName","$CategoryCount");
 168     $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 169     print NEWTEXTFILE "$Line\n";
 170   }
 171   close NEWTEXTFILE;
 172 
 173   # Write out a file for each category...
 174   my($ColLabelLine, $LineIndex);
 175   $ColLabelLine = JoinWords(\@ColLabels, $OutDelim, $OutQuote);
 176   print "\nGenerating text files for each category...\n";
 177   for $CategoryName (sort { lc($a) cmp lc($b) } keys %CategoriesNameToCountMap) {
 178     print "Generating file $CategoriesNameToFileNameMap{$CategoryName}...\n";
 179     $CategoryFileHandle = $CategoriesNameToFileHandleMap{$CategoryName};
 180     print $CategoryFileHandle "$ColLabelLine\n";
 181     for $LineIndex (0 .. $#{$CategoriesNameToLinesMap{$CategoryName}}) {
 182       $Line = ${$CategoriesNameToLinesMap{$CategoryName}}[$LineIndex];
 183       @LineWords = quotewords($InDelim, 0, $Line);
 184       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 185       print $CategoryFileHandle "$Line\n";
 186     }
 187     close $CategoryFileHandle;
 188   }
 189 }
 190 
 191 # Extract data for specific columns...
 192 sub ExtractColumnData {
 193   my($Index) = @_;
 194   my($TextFile, @ColNumsToExtract, $NewTextFile, $InDelim);
 195 
 196   $TextFile = $TextFilesList[$Index];
 197   $NewTextFile =$TextFilesOutFile[$Index];
 198   $InDelim = $TextFilesInDelim[$Index];
 199   @ColNumsToExtract = @{$TextFilesColNumsToExtract[$Index]};
 200 
 201   print "Generating file $NewTextFile...\n";
 202   open TEXTFILE, "$TextFile" or die "Couldn't open $TextFile: $! \n";
 203   open NEWTEXTFILE, ">$NewTextFile" or die "Couldn't open $NewTextFile: $! \n";
 204   $_ = <TEXTFILE>;
 205   # Write out column labels...
 206   my($Line, @LineWords, @ColLabels, $ColLabelLine, @ColValues, $ColValuesLine, $ColNum, $ColValue);
 207   @ColLabels = (); $ColLabelLine = "";
 208   for $ColNum (@ColNumsToExtract) {
 209     push @ColLabels, $TextFilesColLabels[$Index][$ColNum];
 210   }
 211   $ColLabelLine = JoinWords(\@ColLabels, $OutDelim, $OutQuote);
 212   print NEWTEXTFILE "$ColLabelLine\n";
 213   while ($Line = GetTextLine(\*TEXTFILE)) {
 214     @LineWords = quotewords($InDelim, 0, $Line);
 215     @ColValues = (); $ColValuesLine = "";
 216     for $ColNum (@ColNumsToExtract) {
 217       $ColValue = "";
 218       if ($ColNum < @LineWords) {
 219 	$ColValue = (defined $LineWords[$ColNum]) ? $LineWords[$ColNum] : "";
 220       }
 221       push @ColValues, $ColValue;
 222     }
 223     $ColValuesLine = JoinWords(\@ColValues, $OutDelim, $OutQuote);
 224     print NEWTEXTFILE "$ColValuesLine\n";
 225   }
 226   close NEWTEXTFILE;
 227   close TEXTFILE;
 228 }
 229 
 230 # Extract data for specific rows...
 231 sub ExtractRowsData {
 232   my($Index) = @_;
 233   my($TextFile, $NewTextFile, $InDelim);
 234 
 235   $TextFile = $TextFilesList[$Index];
 236   $NewTextFile =$TextFilesOutFile[$Index];
 237   $InDelim = $TextFilesInDelim[$Index];
 238 
 239   print "Generating file $NewTextFile...\n";
 240   open TEXTFILE, "$TextFile" or die "Couldn't open $TextFile: $! \n";
 241   open NEWTEXTFILE, ">$NewTextFile" or die "Couldn't open $NewTextFile: $! \n";
 242 
 243   my($Line, $RowCount, @LineWords, @ColLabels);
 244 
 245   # Write out column labels...
 246   $Line = <TEXTFILE>;
 247   push @ColLabels, @{$TextFilesColLabels[$Index]};
 248   $Line = JoinWords(\@ColLabels, $OutDelim, $OutQuote);
 249   print NEWTEXTFILE "$Line\n";
 250 
 251   if ($SpecifiedRowsMode =~ /^rowsbycolvalue$/i) {
 252     ExtractRowsByColValue($Index, \*TEXTFILE, \*NEWTEXTFILE);
 253   }
 254   elsif ($SpecifiedRowsMode =~ /^rowsbycolvaluelist$/i) {
 255     ExtractRowsByColValueList($Index, \*TEXTFILE, \*NEWTEXTFILE);
 256   }
 257   elsif ($SpecifiedRowsMode =~ /^rowsbycolvaluerange$/i) {
 258     ExtractRowsByColValueRange($Index, \*TEXTFILE, \*NEWTEXTFILE);
 259   }
 260   elsif ($SpecifiedRowsMode =~ /^(rowbymincolvalue|rowbymaxcolvalue)$/i) {
 261     ExtractRowByMinOrMaxColValue($Index, \*TEXTFILE, \*NEWTEXTFILE);
 262   }
 263   elsif ($SpecifiedRowsMode =~ /^rownums$/i) {
 264     ExtractRowsByRowNums($Index, \*TEXTFILE, \*NEWTEXTFILE);
 265   }
 266   elsif ($SpecifiedRowsMode =~ /^rownumrange$/i) {
 267     ExtractRowsByRowNumRange($Index, \*TEXTFILE, \*NEWTEXTFILE);
 268   }
 269 
 270   close NEWTEXTFILE;
 271   close TEXTFILE;
 272 }
 273 
 274 # Extract rows by column value...
 275 sub ExtractRowsByColValue {
 276   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 277 
 278   my($Line, $ColNum, $ColValue, $Criterion, $Value, $ValueIndex, $InDelim, @LineWords);
 279   $InDelim = $TextFilesInDelim[$Index];
 280 
 281   LINE: while ($Line = GetTextLine($TextFileRef)) {
 282     @LineWords = quotewords($InDelim, 0, $Line);
 283     for ($ValueIndex = 0; $ValueIndex < @{$TextFilesRowValues[$Index]}; $ValueIndex = $ValueIndex + 3) {
 284       $ColNum = $TextFilesRowValues[$Index][$ValueIndex];
 285       $ColValue = $TextFilesRowValues[$Index][$ValueIndex + 1];
 286       $Criterion = $TextFilesRowValues[$Index][$ValueIndex + 2];
 287       if ($ColNum > $#LineWords) {
 288 	next LINE;
 289       }
 290       $Value = $LineWords[$ColNum];
 291       if ($Criterion =~ /^le$/i) {
 292 	if ($Value > $ColValue) {
 293 	  next LINE;
 294 	}
 295       }
 296       elsif ($Criterion =~ /^ge$/i) {
 297 	if ($Value < $ColValue) {
 298 	  next LINE;
 299 	}
 300       }
 301       elsif ($Criterion =~ /^eq$/i) {
 302 	if ($Value ne $ColValue) {
 303 	  next LINE;
 304 	}
 305       }
 306     }
 307     # Write it out...
 308     $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 309     print $NewTextFileRef "$Line\n";
 310   }
 311 }
 312 # Extract rows by column value list...
 313 sub ExtractRowsByColValueList {
 314   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 315 
 316   my($Line, $ColNum, $ColValue, $ValueIndex, $Value, $InDelim, %ColValueMap, @LineWords);
 317   $InDelim = $TextFilesInDelim[$Index];
 318   $ColNum = $TextFilesRowValues[$Index][0];
 319 
 320   # Setup a col value map...
 321   %ColValueMap = ();
 322   for $ValueIndex (1 .. $#{$TextFilesRowValues[$Index]}) {
 323     $Value = $TextFilesRowValues[$Index][$ValueIndex];
 324     $ColValueMap{$Value} = $Value;
 325   }
 326 
 327   LINE: while ($Line = GetTextLine($TextFileRef)) {
 328     @LineWords = quotewords($InDelim, 0, $Line);
 329     if ($ColNum > $#LineWords) {
 330       next LINE;
 331     }
 332     $ColValue = $LineWords[$ColNum];
 333     if (exists $ColValueMap{$ColValue}) {
 334       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 335       print $NewTextFileRef "$Line\n";
 336     }
 337   }
 338 }
 339 
 340 # Extract row by minimum column value...
 341 sub ExtractRowByMinOrMaxColValue {
 342   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 343 
 344   my($Line, $ColNum, $ColValue, $FirstValue, $ValueLine, $InDelim, @LineWords);
 345   $InDelim = $TextFilesInDelim[$Index];
 346   $ColNum = $TextFilesRowValues[$Index][0];
 347 
 348   $ValueLine = ''; $ColValue = ''; $FirstValue = 1;
 349   LINE: while ($Line = GetTextLine($TextFileRef)) {
 350     @LineWords = quotewords($InDelim, 0, $Line);
 351     if ($ColNum > $#LineWords) {
 352       next LINE;
 353     }
 354     if ($FirstValue) {
 355       $FirstValue = 0;
 356       $ColValue = $LineWords[$ColNum];
 357       $ValueLine = $Line;
 358       next LINE;
 359     }
 360     if ($SpecifiedRowsMode =~ /^rowbymaxcolvalue$/i) {
 361       if ($LineWords[$ColNum] > $ColValue) {
 362 	$ColValue = $LineWords[$ColNum];
 363 	$ValueLine = $Line;
 364       }
 365     }
 366     else {
 367       if ($LineWords[$ColNum] < $ColValue) {
 368 	$ColValue = $LineWords[$ColNum];
 369 	$ValueLine = $Line;
 370       }
 371     }
 372   }
 373   if ($ValueLine) {
 374     @LineWords = quotewords($InDelim, 0, $ValueLine);
 375     $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 376     print $NewTextFileRef "$Line\n";
 377   }
 378 }
 379 
 380 # Extract rows by column value range...
 381 sub ExtractRowsByColValueRange {
 382   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 383 
 384   my($Line, $ColNum, $ColValue, $MinValue, $MaxValue, $InDelim, @LineWords);
 385   $InDelim = $TextFilesInDelim[$Index];
 386   $ColNum = $TextFilesRowValues[$Index][0];
 387   $MinValue = $TextFilesRowValues[$Index][1];
 388   $MaxValue = $TextFilesRowValues[$Index][2];
 389 
 390   LINE: while ($Line = GetTextLine($TextFileRef)) {
 391     @LineWords = quotewords($InDelim, 0, $Line);
 392     if ($ColNum > $#LineWords) {
 393       next LINE;
 394     }
 395     $ColValue = $LineWords[$ColNum];
 396     if ($ColValue >= $MinValue && $ColValue <= $MaxValue) {
 397       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 398       print $NewTextFileRef "$Line\n";
 399     }
 400   }
 401 }
 402 
 403 # Extract rows by row number range...
 404 sub ExtractRowsByRowNumRange {
 405   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 406 
 407   my($Line, $MinRowNum, $MaxRowNum, $RowCount, $InDelim, @LineWords);
 408   $InDelim = $TextFilesInDelim[$Index];
 409   $MinRowNum = $TextFilesRowValues[$Index][0];
 410   $MaxRowNum = $TextFilesRowValues[$Index][1];
 411 
 412   $RowCount = 1;
 413   LINE: while ($Line = GetTextLine($TextFileRef)) {
 414     $RowCount++;
 415     if ($RowCount >= $MinRowNum && $RowCount <= $MaxRowNum) {
 416       @LineWords = quotewords($InDelim, 0, $Line);
 417       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 418       print $NewTextFileRef "$Line\n";
 419     }
 420     elsif ($RowCount > $MaxRowNum) {
 421       last LINE;
 422     }
 423   }
 424 }
 425 
 426 # Extract rows by row numbers...
 427 sub ExtractRowsByRowNums {
 428   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 429 
 430   my($Line, $RowNum, $MaxRowNum, $RowCount, $InDelim, %RowNumMap, @LineWords);
 431   $InDelim = $TextFilesInDelim[$Index];
 432 
 433   # Setup a row nums map...
 434   %RowNumMap = ();
 435   $MaxRowNum = $TextFilesRowValues[$Index][0];
 436   for $RowNum (@{$TextFilesRowValues[$Index]}) {
 437     if ($RowNum > $MaxRowNum) {
 438       $MaxRowNum = $RowNum;
 439     }
 440     $RowNumMap{$RowNum} = $RowNum;
 441   }
 442 
 443   $RowCount = 1;
 444   LINE: while ($Line = GetTextLine($TextFileRef)) {
 445     $RowCount++;
 446     if (exists $RowNumMap{$RowCount}) {
 447       @LineWords = quotewords($InDelim, 0, $Line);
 448       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 449       print $NewTextFileRef "$Line\n";
 450     }
 451     elsif ($RowCount > $MaxRowNum) {
 452       last LINE;
 453     }
 454   }
 455 }
 456 
 457 # Process option values...
 458 sub ProcessOptions {
 459   $SpecifiedCategoryCol = "";
 460   if (defined $Options{categorycol}) {
 461     my(@SpecifiedValues) = split ",", $Options{categorycol};
 462     if (@SpecifiedValues != 1) {
 463       die "Error: Invalid number of values, ",scalar(@SpecifiedValues), " using \"--categorycol\" option: Only one value is allowed.\n";
 464     }
 465     $SpecifiedCategoryCol = $SpecifiedValues[0];
 466     if ($Options{colmode} =~ /^colnum$/i) {
 467       if (!IsPositiveInteger($SpecifiedCategoryCol)) {
 468 	die "Error: Category column value, $SpecifiedCategoryCol, specified using \"--categorycol\" is not valid. Allowed integer values: > 0.\n";
 469       }
 470     }
 471   }
 472   @SpecifiedColumns = ();
 473   if (defined $Options{columns}) {
 474     my(@SpecifiedValues) = split ",", $Options{columns};
 475     if ($Options{colmode} =~ /^colnum$/i) {
 476       my($ColValue);
 477       for $ColValue (@SpecifiedValues) {
 478 	if (!IsPositiveInteger($ColValue)) {
 479 	  die "Error: Column value, $ColValue, specified using \"--columns\" is not valid: Allowed integer values: > 0.\n";
 480 	}
 481       }
 482     }
 483     push @SpecifiedColumns, @SpecifiedValues;
 484   }
 485   $OutDelim = ($Options{outdelim} =~ /^tab$/i ) ? "\t" : (($Options{outdelim} =~ /^semicolon$/i) ? "\;" : "\,");
 486   $OutQuote = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
 487 
 488   # Process any specified rows values...
 489   @SpecifiedRowValues = ();
 490   $SpecifiedRowsMode = $Options{rowsmode};
 491   if (defined $Options{rows}) {
 492     (@SpecifiedRowValues) = split ",", $Options{rows};
 493   }
 494   else {
 495     if ($Options{rowsmode} !~ /^rownums$/i) {
 496       die "Error: Specify value for \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\n";
 497     }
 498     push @SpecifiedRowValues, "1";
 499   }
 500 
 501   my($SpecifiedColID, $SpecifiedRowID);
 502   # Make sure specified values are okay...
 503   if ($Options{rowsmode} =~ /^rowsbycolvalue$/i) {
 504     if (@SpecifiedRowValues % 3) {
 505       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nIt must contain triplets.\n";
 506     }
 507     # Triplet format: colid,value,criteria. Criterion: le,ge,eq
 508     my($Index, $ColID, $Criterion, $Value);
 509     for ($Index = 0; $Index < @SpecifiedRowValues; $Index = $Index + 3) {
 510       $ColID = $SpecifiedRowValues[$Index];
 511       $Value = $SpecifiedRowValues[$Index + 1];
 512       $Criterion = $SpecifiedRowValues[$Index + 2];
 513       if ($Options{colmode} =~ /^colnum$/i) {
 514 	if (!IsPositiveInteger($ColID)) {
 515 	  die "Error: Invalid column id, $ColID, specified in triplet, \"$ColID,$Criterion,$Value\", using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 516 	}
 517       }
 518       if ($Criterion !~ /^(eq|le|ge)$/i) {
 519 	die "Error: Invalid criterion value, $Criterion, specified in triplet, \"$ColID,$Criterion,$Value\", using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed values: le, ge, or eq.\n";
 520       }
 521     }
 522   }
 523   elsif ($Options{rowsmode} =~ /^rowsbycolvaluelist$/i) {
 524     ($SpecifiedColID) = $SpecifiedRowValues[0];
 525     if ($Options{colmode} =~ /^colnum$/i) {
 526       if (!IsPositiveInteger($SpecifiedColID)) {
 527 	die "Error: Rows value, $SpecifiedColID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 528       }
 529     }
 530     if (@SpecifiedRowValues == 1) {
 531       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nIt must contain more than one value\n";
 532     }
 533   }
 534   elsif ($Options{rowsmode} =~ /^rowsbycolvaluerange$/i) {
 535     if (@SpecifiedRowValues != 3) {
 536       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nIt must contain three values\n";
 537     }
 538     ($SpecifiedColID) = $SpecifiedRowValues[0];
 539     if ($Options{colmode} =~ /^colnum$/i) {
 540       if (!IsPositiveInteger($SpecifiedColID)) {
 541 	die "Error: Rows value, $SpecifiedColID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 542       }
 543     }
 544     if ($SpecifiedRowValues[1] >= $SpecifiedRowValues[2]) {
 545       die "Error: Invalid value triplet - ", JoinWords(\@SpecifiedRowValues, ',', 0) , " - specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nAllowed values: second value < third value\n";
 546     }
 547   }
 548   elsif ($Options{rowsmode} =~ /^(rowbymincolvalue|rowbymaxcolvalue)$/i) {
 549     if (@SpecifiedRowValues != 1) {
 550       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nOnly one value is allowed.\n";
 551     }
 552     ($SpecifiedColID) = $SpecifiedRowValues[0];
 553     if ($Options{colmode} =~ /^colnum$/i) {
 554       if (!IsPositiveInteger($SpecifiedColID)) {
 555 	die "Error: Rows value, $SpecifiedColID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 556       }
 557     }
 558   }
 559   elsif ($Options{rowsmode} =~ /^rownums$/i) {
 560     for $SpecifiedRowID (@SpecifiedRowValues) {
 561       if (!IsPositiveInteger($SpecifiedRowID)) {
 562 	die "Error: Rows value, $SpecifiedRowID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 563       }
 564     }
 565   }
 566   elsif ($Options{rowsmode} =~ /^rownumrange$/i) {
 567     if (@SpecifiedRowValues != 2) {
 568       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nIt must contain only two values.\n";
 569     }
 570     for $SpecifiedRowID (@SpecifiedRowValues) {
 571       if (!IsPositiveInteger($SpecifiedRowID)) {
 572 	die "Error: Rows value, $SpecifiedRowID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 573       }
 574     }
 575     if ($SpecifiedRowValues[0] >= $SpecifiedRowValues[1]) {
 576       die "Error: Invalid value pair -  ", JoinWords(\@SpecifiedRowValues, ',', 0) , " - specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nAllowed values: First value < second value\n";
 577     }
 578   }
 579 }
 580 
 581 # Retrieve information about input text files...
 582 sub RetrieveTextFilesInfo {
 583   my($Index, $TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, @ColLabels, $OutFileRoot, $CategoryOutFileRoot, $OutFile, $ColNum, $ColLabel);
 584 
 585   @TextFilesOkay = ();
 586   @TextFilesColCount = (); @TextFilesColLabels = ();
 587   @TextFilesColLabelToNumMap = ();
 588   @TextFilesInDelim = ();
 589   @TextFilesOutFile = (); @TextFilesOutFileExt = (); @TextFilesCategoryOutFileRoot = ();
 590 
 591  FILELIST: for $Index (0 .. $#TextFilesList) {
 592     $TextFile = $TextFilesList[$Index];
 593     $TextFilesOkay[$Index] = 0;
 594     $TextFilesColCount[$Index] = 0;
 595     $TextFilesInDelim[$Index] = "";
 596     $TextFilesOutFile[$Index] = "";
 597     $TextFilesOutFileExt[$Index] = "";
 598     $TextFilesCategoryOutFileRoot[$Index] = "";
 599     @{$TextFilesColLabels[$Index]} = ();
 600     %{$TextFilesColLabelToNumMap[$Index]} = ();
 601     if (!(-e $TextFile)) {
 602       warn "Warning: Ignoring file $TextFile: It doesn't exist\n";
 603       next FILELIST;
 604     }
 605     if (!CheckFileType($TextFile, "csv tsv")) {
 606       warn "Warning: Ignoring file $TextFile: It's not a csv or tsv file\n";
 607       next FILELIST;
 608     }
 609     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 610     if ($FileExt =~ /^tsv$/i) {
 611       $InDelim = "\t";
 612     }
 613     else {
 614       $InDelim = "\,";
 615       if (!($Options{indelim} =~ /^(comma|semicolon)$/i)) {
 616 	warn "Warning: Ignoring file $TextFile: The value specified, $Options{indelim}, for option \"--indelim\" is not valid for csv files\n";
 617 	next FILELIST;
 618       }
 619       if ($Options{indelim} =~ /^semicolon$/i) {
 620 	$InDelim = "\;";
 621       }
 622     }
 623 
 624     if (!open TEXTFILE, "$TextFile") {
 625       warn "Warning: Ignoring file $TextFile: Couldn't open it: $! \n";
 626       next FILELIST;
 627     }
 628 
 629     $Line = GetTextLine(\*TEXTFILE);
 630     @ColLabels = quotewords($InDelim, 0, $Line);
 631     close TEXTFILE;
 632 
 633     $FileDir = ""; $FileName = ""; $FileExt = "";
 634     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 635     $FileExt = "csv";
 636     if ($Options{outdelim} =~ /^tab$/i) {
 637       $FileExt = "tsv";
 638     }
 639     if ($Options{root} && (@TextFilesList == 1)) {
 640       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
 641       if ($RootFileName && $RootFileExt) {
 642 	$FileName = $RootFileName;
 643       }
 644       else {
 645 	$FileName = $Options{root};
 646       }
 647       $OutFileRoot .= $FileName;
 648     }
 649     else {
 650       $OutFileRoot = $FileName;
 651       $OutFileRoot .= ($Options{mode} =~ /^categories$/i) ? "CategoriesSummary" : (($Options{mode} =~ /^rows$/i) ? "ExtractedRows" : "ExtractedColumns");
 652     }
 653     $CategoryOutFileRoot = "$FileName" . "Category";
 654 
 655     $OutFile = $OutFileRoot . ".$FileExt";
 656     if (lc($OutFile) eq lc($TextFile)) {
 657       warn "Warning: Ignoring file $TextFile:Output file name, $OutFile, is same as input text file name, $TextFile\n";
 658       next FILELIST;
 659     }
 660     if (!$Options{overwrite}) {
 661       if (-e $OutFile) {
 662 	warn "Warning: Ignoring file $TextFile: The file $OutFile already exists\n";
 663 	next FILELIST;
 664       }
 665     }
 666 
 667     $TextFilesOkay[$Index] = 1;
 668     $TextFilesInDelim[$Index] = $InDelim;
 669     $TextFilesCategoryOutFileRoot[$Index] = "$CategoryOutFileRoot";
 670     $TextFilesOutFile[$Index] = "$OutFile";
 671     $TextFilesOutFileExt[$Index] = "$FileExt";
 672 
 673     $TextFilesColCount[$Index] = @ColLabels;
 674     push @{$TextFilesColLabels[$Index]}, @ColLabels;
 675     for $ColNum (0 .. $#ColLabels) {
 676       $ColLabel = $ColLabels[$ColNum];
 677       $TextFilesColLabelToNumMap[$Index]{$ColLabel} = $ColNum;
 678     }
 679   }
 680 }
 681 
 682 # Make sure the specified columns exists in text files...
 683 sub ProcessColumnsInfo {
 684   my($Index, @ColNumsToExtract, $TextFile);
 685 
 686   @TextFilesCategoryColNum = ();
 687   @TextFilesColNumsToExtract = ();
 688  FILELIST: for $Index (0 .. $#TextFilesList) {
 689     $TextFile = $TextFilesList[$Index];
 690 
 691     $TextFilesCategoryColNum[$Index] = 0;
 692     @{$TextFilesColNumsToExtract[$Index]} = ();
 693 
 694     if ($TextFilesOkay[$Index]) {
 695       if ($Options{mode} =~ /^categories$/i) {
 696 	my($CategoryColNum, $CategoryColValid);
 697 
 698 	$CategoryColNum = 0;
 699 	$CategoryColValid = 1;
 700 	if ($SpecifiedCategoryCol) {
 701 	  if ($Options{colmode} =~ /^colnum$/i) {
 702 	    if ($SpecifiedCategoryCol <= $TextFilesColCount[$Index]) {
 703 	      $CategoryColNum = $SpecifiedCategoryCol - 1;
 704 	    }
 705 	    else {
 706 	      $CategoryColValid = 0;
 707 	    }
 708 	  }
 709 	  else {
 710 	    if (exists($TextFilesColLabelToNumMap[$Index]{$SpecifiedCategoryCol})) {
 711 	      $CategoryColNum =  $TextFilesColLabelToNumMap[$Index]{$SpecifiedCategoryCol};
 712 	    }
 713 	    else {
 714 	      $CategoryColValid = 0;
 715 	    }
 716 	  }
 717 	}
 718 	if ($CategoryColValid) {
 719 	  $TextFilesCategoryColNum[$Index] = $CategoryColNum;
 720 	}
 721 	else {
 722 	  warn "Warning: Ignoring file $TextFile: Category column specified, $SpecifiedCategoryCol, using \"--categorycol\" option doesn't exist\n";
 723 	  $TextFilesOkay[$Index] = 0;
 724 	}
 725       }
 726       elsif ($Options{mode} =~ /^columns$/i) {
 727 	my($SpecifiedColNum, $ColNum);
 728 	$ColNum = 0;
 729 	@ColNumsToExtract = ();
 730 	if (@SpecifiedColumns) {
 731 	  if ($Options{colmode} =~ /^colnum$/i) {
 732 	    for $SpecifiedColNum (@SpecifiedColumns) {
 733 	      if ($SpecifiedColNum >=1 && $SpecifiedColNum <= $TextFilesColCount[$Index]) {
 734 		$ColNum = $SpecifiedColNum - 1;
 735 		push @ColNumsToExtract, $ColNum;
 736 	      }
 737 	    }
 738 	  }
 739 	  else {
 740 	    my($ColLabel);
 741 	    for $ColLabel (@SpecifiedColumns) {
 742 	      if (exists($TextFilesColLabelToNumMap[$Index]{$ColLabel})) {
 743 		push @ColNumsToExtract, $TextFilesColLabelToNumMap[$Index]{$ColLabel};
 744 	      }
 745 	    }
 746 	  }
 747 	}
 748 	else {
 749 	  push @ColNumsToExtract, $ColNum;
 750 	}
 751 	if (@ColNumsToExtract) {
 752 	  push @{$TextFilesColNumsToExtract[$Index]}, @ColNumsToExtract;
 753 	}
 754 	else {
 755 	  warn "Warning: Ignoring file $TextFile: None of the columns specified, @SpecifiedColumns, using \"--columns\" option exist\n";
 756 	  $TextFilesOkay[$Index] = 0;
 757 	}
 758       }
 759     }
 760   }
 761 }
 762 
 763 # Process specified rows info...
 764 sub ProcessRowsInfo {
 765   my($Index, $TextFile, $ColID, $ColIDOkay, $Value, $Criterion, $ColNum, @RowValues);
 766 
 767   @TextFilesRowValues = ();
 768 
 769   FILELIST: for $Index (0 .. $#TextFilesList) {
 770     $TextFile = $TextFilesList[$Index];
 771     @{$TextFilesRowValues[$Index]} = ();
 772 
 773     if ($Options{mode} !~ /^rows$/i) {
 774       next FILELIST;
 775     }
 776     if (!$TextFilesOkay[$Index]) {
 777       next FILELIST;
 778     }
 779     @RowValues = ();
 780     if ($Options{rowsmode} =~ /^rowsbycolvalue$/i) {
 781       my($ValueIndex);
 782       for ($ValueIndex = 0; $ValueIndex < @SpecifiedRowValues; $ValueIndex = $ValueIndex + 3) {
 783 	$ColID = $SpecifiedRowValues[$ValueIndex];
 784 	$Value = $SpecifiedRowValues[$ValueIndex + 1];
 785 	$Criterion = $SpecifiedRowValues[$ValueIndex + 2];
 786 
 787 	$ColIDOkay = 0;
 788 	if ($Options{colmode} =~ /^collabel$/i) {
 789 	  if (exists $TextFilesColLabelToNumMap[$Index]{$ColID}) {
 790 	    $ColIDOkay = 1;
 791 	    $ColNum = $TextFilesColLabelToNumMap[$Index]{$ColID};
 792 	  }
 793 	}
 794 	else {
 795 	  if ($ColID >=1 && $ColID <= $TextFilesColCount[$Index]) {
 796 	    $ColNum = $ColID - 1;
 797 	    $ColIDOkay = 1;
 798 	  }
 799 	}
 800 	if ($ColIDOkay) {
 801 	  push @RowValues, ($ColNum, $Value, $Criterion);
 802 	}
 803       }
 804     }
 805     elsif ($Options{rowsmode} =~ /^(rowsbycolvaluelist|rowsbycolvaluerange|rowbymincolvalue|rowbymaxcolvalue)$/i) {
 806       # Process coulumn id...
 807       $ColID = $SpecifiedRowValues[0];
 808       $ColIDOkay = 0;
 809       if ($Options{colmode} =~ /^collabel$/i) {
 810 	if (exists $TextFilesColLabelToNumMap[$Index]{$ColID}) {
 811 	  $ColIDOkay = 1;
 812 	  $ColNum = $TextFilesColLabelToNumMap[$Index]{$ColID};
 813 	}
 814       }
 815       else {
 816 	if ($ColID >=1 && $ColID <= $TextFilesColCount[$Index]) {
 817 	  $ColIDOkay = 1;
 818 	  $ColNum = $ColID - 1;
 819 	}
 820       }
 821       if ($ColIDOkay) {
 822 	push @RowValues, $ColNum;
 823 	# Get rest of the specified values...
 824 	if (@SpecifiedRowValues > 1) {
 825 	  for $Index (1 .. $#SpecifiedRowValues) {
 826 	    push @RowValues, $SpecifiedRowValues[$Index];
 827 	  }
 828 	}
 829       }
 830     }
 831     elsif ($Options{rowsmode} =~ /^(rownums|rownumrange)$/i) {
 832       push @RowValues, @SpecifiedRowValues;
 833     }
 834 
 835     if (@RowValues) {
 836       push @{$TextFilesRowValues[$Index]}, @RowValues;
 837     }
 838     else {
 839       warn "Warning: Ignoring file $TextFile: Column specified, $ColID, using \"--rows\" option doesn't exist\n";
 840       $TextFilesOkay[$Index] = 0;
 841     }
 842   }
 843 }
 844 
 845 # Setup script usage  and retrieve command line arguments specified using various options...
 846 sub SetupScriptUsage {
 847 
 848   # Setup default and retrieve all the options...
 849   %Options = ();
 850   $Options{colmode} = "colnum";
 851   $Options{indelim} = "comma";
 852   $Options{mode} = "columns";
 853   $Options{outdelim} = "comma";
 854   $Options{quote} = "yes";
 855   $Options{rowsmode} = "rownums";
 856 
 857   if (!GetOptions(\%Options, "categorycol=s", "columns=s", "colmode|c=s", "help|h", "indelim=s", "mode|m=s", "outdelim=s", "overwrite|o", "quote|q=s", "root|r=s", "rows=s", "rowsmode=s", "workingdir|w=s")) {
 858     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 859   }
 860   if ($Options{workingdir}) {
 861     if (! -d $Options{workingdir}) {
 862       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 863     }
 864     chdir $Options{workingdir} || die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 865   }
 866   if ($Options{mode} !~ /^(columns|rows|categories)$/i) {
 867     die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: columns, rows or categories \n";
 868   }
 869   if ($Options{colmode} !~ /^(colnum|collabel)$/i) {
 870     die "Error: The value specified, $Options{colmode}, for option \"--colmode\" is not valid. Allowed values: colnum or collabel \n";
 871   }
 872   if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
 873     die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma or semicolon\n";
 874   }
 875   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 876     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 877   }
 878   if ($Options{quote} !~ /^(yes|no)$/i) {
 879     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
 880   }
 881   if ($Options{rowsmode} !~ /^(rowsbycolvalue|rowsbycolvaluelist|rowsbycolvaluerange|rowbymincolvalue|rowbymaxcolvalue|rownums|rownumrange)$/i) {
 882     die "Error: The value specified, $Options{rowsmode}, for option \"--rowsmode\" is not valid. Allowed values: rowsbycolvalue, rowsbycolvaluelist, rowsbycolvaluerange, rowbymincolvalue, rowbymaxcolvalue, rownum, rownumrange\n";
 883   }
 884 }