MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: ExtractFromTextFiles.pl,v $
   4 # $Date: 2008/01/30 21:44:46 $
   5 # $Revision: 1.26 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use 5.006;
  30 use strict;
  31 use FindBin; use lib "$FindBin::Bin/../lib";
  32 use Getopt::Long;
  33 use File::Basename;
  34 use Text::ParseWords;
  35 use FileHandle;
  36 use Benchmark;
  37 use FileUtil;
  38 use TextUtil;
  39 
  40 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  41 
  42 # Autoflush STDOUT
  43 $| = 1;
  44 
  45 $StartTime = new Benchmark;
  46 
  47 # Starting message...
  48 $ScriptName = basename $0;
  49 print "\n$ScriptName:Starting...\n\n";
  50 
  51 # Get the options and setup script...
  52 SetupScriptUsage();
  53 if ($Options{help} || @ARGV < 1) {
  54   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  55 }
  56 
  57 my(@TextFilesList);
  58 @TextFilesList = ExpandFileNames(\@ARGV, "csv tsv");
  59 
  60 my($OutDelim, $OutQuote, $SpecifiedCategoryCol, $SpecifiedRowsMode, @SpecifiedColumns, @SpecifiedRowValues);
  61 ProcessOptions();
  62 
  63 # Collect column information for all the text files...
  64 print "Checking input text file(s)...\n";
  65 my(@TextFilesOkay, @TextFilesColCount, @TextFilesColLabels, @TextFilesColLabelToNumMap, @TextFilesInDelim, @TextFilesOutFile, @TextFilesOutFileExt, @TextFilesCategoryOutFileRoot);
  66 RetrieveTextFilesInfo();
  67 
  68 # Make sure the specified columns exists in text files...
  69 my(@TextFilesCategoryColNum, @TextFilesColNumsToExtract);
  70 ProcessColumnsInfo();
  71 
  72 # Process specified rows info...
  73 my(@TextFilesRowValues);
  74 ProcessRowsInfo();
  75 
  76 # Generate output files...
  77 my($Index, $TextFile);
  78 if (@TextFilesList > 1) {
  79   print "Processing text files...\n";
  80 }
  81 for $Index (0 .. $#TextFilesList) {
  82   if ($TextFilesOkay[$Index]) {
  83     $TextFile = $TextFilesList[$Index];
  84     if (@TextFilesList > 1) {
  85       print "\nProcessing file $TextFile...\n";
  86     }
  87     else {
  88       print "Processing file $TextFile...\n"
  89     }
  90     if ($Options{mode} =~ /^categories$/i) {
  91       ExtractCategoryData($Index);
  92     }
  93     elsif ($Options{mode} =~ /^rows$/i){
  94       ExtractRowsData($Index);
  95     }
  96     else {
  97       ExtractColumnData($Index);
  98     }
  99   }
 100 }
 101 
 102 print "$ScriptName:Done...\n\n";
 103 
 104 $EndTime = new Benchmark;
 105 $TotalTime = timediff ($EndTime, $StartTime);
 106 print "Total time: ", timestr($TotalTime), "\n";
 107 
 108 ###############################################################################
 109 
 110 # Geneate category files...
 111 sub ExtractCategoryData {
 112   my($Index) = @_;
 113   my($TextFile, $CategoryCol, $NewTextFile, $InDelim, @ColLabels);
 114 
 115   $TextFile = $TextFilesList[$Index];
 116   $NewTextFile =$TextFilesOutFile[$Index];
 117   $CategoryCol = $TextFilesCategoryColNum[$Index];
 118   $InDelim = $TextFilesInDelim[$Index];
 119   @ColLabels = @{$TextFilesColLabels[$Index]};
 120 
 121   my($Line, @LineWords, $CategoryName, $CategoryCount, %CategoriesNameToCountMap, %CategoriesNameToLinesMap);
 122   # Collect category data...
 123   open TEXTFILE, "$TextFile" or die "Couldn't open $TextFile: $! \n";
 124   # Skip label line...
 125   $_ = <TEXTFILE>;
 126 
 127   %CategoriesNameToCountMap = ();
 128   %CategoriesNameToLinesMap = ();
 129   while ($Line = GetTextLine(\*TEXTFILE)) {
 130     @LineWords = quotewords($InDelim, 0, $Line);
 131     $CategoryName = ($CategoryCol <= @LineWords) ? $LineWords[$CategoryCol] : "";
 132     if (exists($CategoriesNameToCountMap{$CategoryName})) {
 133       $CategoriesNameToCountMap{$CategoryName} += 1;
 134       push @{$CategoriesNameToLinesMap{$CategoryName}}, $Line;
 135     }
 136     else {
 137       $CategoriesNameToCountMap{$CategoryName} = 1;
 138       @{$CategoriesNameToLinesMap{$CategoryName}} = ();
 139       push @{$CategoriesNameToLinesMap{$CategoryName}}, $Line;
 140     }
 141   }
 142   close TEXTFILE;
 143 
 144   # Setup file names for individual category files...
 145   my(%CategoriesNameToFileHandleMap, %CategoriesNameToFileNameMap, $CategoryFile, $CategoryFileHandle);
 146   %CategoriesNameToFileHandleMap = ();
 147   %CategoriesNameToFileNameMap = ();
 148   for $CategoryName (keys %CategoriesNameToCountMap) {
 149     $CategoryFile = $TextFilesCategoryOutFileRoot[$Index] . "$CategoryName" . ".$TextFilesOutFileExt[$Index]";;
 150     $CategoryFile =~ s/ //g;
 151     $CategoryFileHandle = new FileHandle;
 152     open $CategoryFileHandle, ">$CategoryFile" or die "Couldn't open $CategoryFile: $! \n";
 153     $CategoriesNameToFileNameMap{$CategoryName} = $CategoryFile;
 154     $CategoriesNameToFileHandleMap{$CategoryName} = $CategoryFileHandle;
 155   }
 156 
 157   # Write out summary file...
 158   print "Generating file $NewTextFile...\n";
 159   open NEWTEXTFILE, ">$NewTextFile" or die "Couldn't open $NewTextFile: $! \n";
 160   # Write out column labels...
 161   @LineWords = ("Category","Count");
 162   $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 163   print NEWTEXTFILE "$Line\n";
 164 
 165   # Write out the category names and count...
 166   for $CategoryName (sort { lc($a) cmp lc($b) } keys %CategoriesNameToCountMap) {
 167     $CategoryCount = $CategoriesNameToCountMap{$CategoryName};
 168     @LineWords = ("$CategoryName","$CategoryCount");
 169     $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 170     print NEWTEXTFILE "$Line\n";
 171   }
 172   close NEWTEXTFILE;
 173 
 174   # Write out a file for each category...
 175   my($ColLabelLine, $LineIndex);
 176   $ColLabelLine = JoinWords(\@ColLabels, $OutDelim, $OutQuote);
 177   print "\nGenerating text files for each category...\n";
 178   for $CategoryName (sort { lc($a) cmp lc($b) } keys %CategoriesNameToCountMap) {
 179     print "Generating file $CategoriesNameToFileNameMap{$CategoryName}...\n";
 180     $CategoryFileHandle = $CategoriesNameToFileHandleMap{$CategoryName};
 181     print $CategoryFileHandle "$ColLabelLine\n";
 182     for $LineIndex (0 .. $#{@{$CategoriesNameToLinesMap{$CategoryName}}}) {
 183       $Line = ${$CategoriesNameToLinesMap{$CategoryName}}[$LineIndex];
 184       @LineWords = quotewords($InDelim, 0, $Line);
 185       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 186       print $CategoryFileHandle "$Line\n";
 187     }
 188     close $CategoryFileHandle;
 189   }
 190 }
 191 
 192 # Extract data for specific columns...
 193 sub ExtractColumnData {
 194   my($Index) = @_;
 195   my($TextFile, @ColNumsToExtract, $NewTextFile, $InDelim);
 196 
 197   $TextFile = $TextFilesList[$Index];
 198   $NewTextFile =$TextFilesOutFile[$Index];
 199   $InDelim = $TextFilesInDelim[$Index];
 200   @ColNumsToExtract = @{$TextFilesColNumsToExtract[$Index]};
 201 
 202   print "Generating file $NewTextFile...\n";
 203   open TEXTFILE, "$TextFile" or die "Couldn't open $TextFile: $! \n";
 204   open NEWTEXTFILE, ">$NewTextFile" or die "Couldn't open $NewTextFile: $! \n";
 205   $_ = <TEXTFILE>;
 206   # Write out column labels...
 207   my($Line, @LineWords, @ColLabels, $ColLabelLine, @ColValues, $ColValuesLine, $ColNum, $ColValue);
 208   @ColLabels = (); $ColLabelLine = "";
 209   for $ColNum (@ColNumsToExtract) {
 210     push @ColLabels, $TextFilesColLabels[$Index][$ColNum];
 211   }
 212   $ColLabelLine = JoinWords(\@ColLabels, $OutDelim, $OutQuote);
 213   print NEWTEXTFILE "$ColLabelLine\n";
 214   while ($Line = GetTextLine(\*TEXTFILE)) {
 215     @LineWords = quotewords($InDelim, 0, $Line);
 216     @ColValues = (); $ColValuesLine = "";
 217     for $ColNum (@ColNumsToExtract) {
 218       $ColValue = "";
 219       if ($ColNum < @LineWords) {
 220 	$ColValue = (defined $LineWords[$ColNum]) ? $LineWords[$ColNum] : "";
 221       }
 222       push @ColValues, $ColValue;
 223     }
 224     $ColValuesLine = JoinWords(\@ColValues, $OutDelim, $OutQuote);
 225     print NEWTEXTFILE "$ColValuesLine\n";
 226   }
 227   close NEWTEXTFILE;
 228   close TEXTFILE;
 229 }
 230 
 231 # Extract data for specific rows...
 232 sub ExtractRowsData {
 233   my($Index) = @_;
 234   my($TextFile, $NewTextFile, $InDelim);
 235 
 236   $TextFile = $TextFilesList[$Index];
 237   $NewTextFile =$TextFilesOutFile[$Index];
 238   $InDelim = $TextFilesInDelim[$Index];
 239 
 240   print "Generating file $NewTextFile...\n";
 241   open TEXTFILE, "$TextFile" or die "Couldn't open $TextFile: $! \n";
 242   open NEWTEXTFILE, ">$NewTextFile" or die "Couldn't open $NewTextFile: $! \n";
 243 
 244   my($Line, $RowCount, @LineWords, @ColLabels);
 245 
 246   # Write out column labels...
 247   $Line = <TEXTFILE>;
 248   push @ColLabels, @{$TextFilesColLabels[$Index]};
 249   $Line = JoinWords(\@ColLabels, $OutDelim, $OutQuote);
 250   print NEWTEXTFILE "$Line\n";
 251 
 252   if ($SpecifiedRowsMode =~ /^rowsbycolvalue$/i) {
 253     ExtractRowsByColValue($Index, \*TEXTFILE, \*NEWTEXTFILE);
 254   }
 255   elsif ($SpecifiedRowsMode =~ /^rowsbycolvaluelist$/i) {
 256     ExtractRowsByColValueList($Index, \*TEXTFILE, \*NEWTEXTFILE);
 257   }
 258   elsif ($SpecifiedRowsMode =~ /^rowsbycolvaluerange$/i) {
 259     ExtractRowsByColValueRange($Index, \*TEXTFILE, \*NEWTEXTFILE);
 260   }
 261   elsif ($SpecifiedRowsMode =~ /^(rowbymincolvalue|rowbymaxcolvalue)$/i) {
 262     ExtractRowByMinOrMaxColValue($Index, \*TEXTFILE, \*NEWTEXTFILE);
 263   }
 264   elsif ($SpecifiedRowsMode =~ /^rownums$/i) {
 265     ExtractRowsByRowNums($Index, \*TEXTFILE, \*NEWTEXTFILE);
 266   }
 267   elsif ($SpecifiedRowsMode =~ /^rownumrange$/i) {
 268     ExtractRowsByRowNumRange($Index, \*TEXTFILE, \*NEWTEXTFILE);
 269   }
 270 
 271   close NEWTEXTFILE;
 272   close TEXTFILE;
 273 }
 274 
 275 # Extract rows by column value...
 276 sub ExtractRowsByColValue {
 277   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 278 
 279   my($Line, $ColNum, $ColValue, $Criterion, $Value, $ValueIndex, $InDelim, @LineWords);
 280   $InDelim = $TextFilesInDelim[$Index];
 281 
 282   LINE: while ($Line = GetTextLine($TextFileRef)) {
 283     @LineWords = quotewords($InDelim, 0, $Line);
 284     for ($ValueIndex = 0; $ValueIndex < @{$TextFilesRowValues[$Index]}; $ValueIndex = $ValueIndex + 3) {
 285       $ColNum = $TextFilesRowValues[$Index][$ValueIndex];
 286       $ColValue = $TextFilesRowValues[$Index][$ValueIndex + 1];
 287       $Criterion = $TextFilesRowValues[$Index][$ValueIndex + 2];
 288       if ($ColNum > $#LineWords) {
 289 	next LINE;
 290       }
 291       $Value = $LineWords[$ColNum];
 292       if ($Criterion =~ /^le$/i) {
 293 	if ($Value > $ColValue) {
 294 	  next LINE;
 295 	}
 296       }
 297       elsif ($Criterion =~ /^ge$/i) {
 298 	if ($Value < $ColValue) {
 299 	  next LINE;
 300 	}
 301       }
 302       elsif ($Criterion =~ /^eq$/i) {
 303 	if ($Value ne $ColValue) {
 304 	  next LINE;
 305 	}
 306       }
 307     }
 308     # Write it out...
 309     $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 310     print $NewTextFileRef "$Line\n";
 311   }
 312 }
 313 # Extract rows by column value list...
 314 sub ExtractRowsByColValueList {
 315   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 316 
 317   my($Line, $ColNum, $ColValue, $ValueIndex, $Value, $InDelim, %ColValueMap, @LineWords);
 318   $InDelim = $TextFilesInDelim[$Index];
 319   $ColNum = $TextFilesRowValues[$Index][0];
 320 
 321   # Setup a col value map...
 322   %ColValueMap = ();
 323   for $ValueIndex (1 .. $#{$TextFilesRowValues[$Index]}) {
 324     $Value = $TextFilesRowValues[$Index][$ValueIndex];
 325     $ColValueMap{$Value} = $Value;
 326   }
 327 
 328   LINE: while ($Line = GetTextLine($TextFileRef)) {
 329     @LineWords = quotewords($InDelim, 0, $Line);
 330     if ($ColNum > $#LineWords) {
 331       next LINE;
 332     }
 333     $ColValue = $LineWords[$ColNum];
 334     if (exists $ColValueMap{$ColValue}) {
 335       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 336       print $NewTextFileRef "$Line\n";
 337     }
 338   }
 339 }
 340 
 341 # Extract row by minimum column value...
 342 sub ExtractRowByMinOrMaxColValue {
 343   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 344 
 345   my($Line, $ColNum, $ColValue, $FirstValue, $ValueLine, $InDelim, @LineWords);
 346   $InDelim = $TextFilesInDelim[$Index];
 347   $ColNum = $TextFilesRowValues[$Index][0];
 348 
 349   $ValueLine = ''; $ColValue = ''; $FirstValue = 1;
 350   LINE: while ($Line = GetTextLine($TextFileRef)) {
 351     @LineWords = quotewords($InDelim, 0, $Line);
 352     if ($ColNum > $#LineWords) {
 353       next LINE;
 354     }
 355     if ($FirstValue) {
 356       $FirstValue = 0;
 357       $ColValue = $LineWords[$ColNum];
 358       $ValueLine = $Line;
 359       next LINE;
 360     }
 361     if ($SpecifiedRowsMode =~ /^rowbymaxcolvalue$/i) {
 362       if ($LineWords[$ColNum] > $ColValue) {
 363 	$ColValue = $LineWords[$ColNum];
 364 	$ValueLine = $Line;
 365       }
 366     }
 367     else {
 368       if ($LineWords[$ColNum] < $ColValue) {
 369 	$ColValue = $LineWords[$ColNum];
 370 	$ValueLine = $Line;
 371       }
 372     }
 373   }
 374   if ($ValueLine) {
 375     @LineWords = quotewords($InDelim, 0, $ValueLine);
 376     $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 377     print $NewTextFileRef "$Line\n";
 378   }
 379 }
 380 
 381 # Extract rows by column value range...
 382 sub ExtractRowsByColValueRange {
 383   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 384 
 385   my($Line, $ColNum, $ColValue, $MinValue, $MaxValue, $InDelim, @LineWords);
 386   $InDelim = $TextFilesInDelim[$Index];
 387   $ColNum = $TextFilesRowValues[$Index][0];
 388   $MinValue = $TextFilesRowValues[$Index][1];
 389   $MaxValue = $TextFilesRowValues[$Index][2];
 390 
 391   LINE: while ($Line = GetTextLine($TextFileRef)) {
 392     @LineWords = quotewords($InDelim, 0, $Line);
 393     if ($ColNum > $#LineWords) {
 394       next LINE;
 395     }
 396     $ColValue = $LineWords[$ColNum];
 397     if ($ColValue >= $MinValue && $ColValue <= $MaxValue) {
 398       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 399       print $NewTextFileRef "$Line\n";
 400     }
 401   }
 402 }
 403 
 404 # Extract rows by row number range...
 405 sub ExtractRowsByRowNumRange {
 406   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 407 
 408   my($Line, $MinRowNum, $MaxRowNum, $RowCount, $InDelim, @LineWords);
 409   $InDelim = $TextFilesInDelim[$Index];
 410   $MinRowNum = $TextFilesRowValues[$Index][0];
 411   $MaxRowNum = $TextFilesRowValues[$Index][1];
 412 
 413   $RowCount = 1;
 414   LINE: while ($Line = GetTextLine($TextFileRef)) {
 415     $RowCount++;
 416     @LineWords = quotewords($InDelim, 0, $Line);
 417     if ($RowCount >= $MinRowNum && $RowCount <= $MaxRowNum) {
 418       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 419       print $NewTextFileRef "$Line\n";
 420     }
 421     elsif ($RowCount > $MaxRowNum) {
 422       last LINE;
 423     }
 424   }
 425 }
 426 
 427 # Extract rows by row numbers...
 428 sub ExtractRowsByRowNums {
 429   my($Index, $TextFileRef, $NewTextFileRef) = @_;
 430 
 431   my($Line, $RowNum, $MaxRowNum, $RowCount, $InDelim, %RowNumMap, @LineWords);
 432   $InDelim = $TextFilesInDelim[$Index];
 433 
 434   # Setup a row nums map...
 435   %RowNumMap = ();
 436   $MaxRowNum = $TextFilesRowValues[$Index][0];
 437   for $RowNum (@{$TextFilesRowValues[$Index]}) {
 438     if ($RowNum > $MaxRowNum) {
 439       $MaxRowNum = $RowNum;
 440     }
 441     $RowNumMap{$RowNum} = $RowNum;
 442   }
 443 
 444   $RowCount = 1;
 445   LINE: while ($Line = GetTextLine($TextFileRef)) {
 446     $RowCount++;
 447     @LineWords = quotewords($InDelim, 0, $Line);
 448     if (exists $RowNumMap{$RowCount}) {
 449       $Line = JoinWords(\@LineWords, $OutDelim, $OutQuote);
 450       print $NewTextFileRef "$Line\n";
 451     }
 452     elsif ($RowCount > $MaxRowNum) {
 453       last LINE;
 454     }
 455   }
 456 }
 457 
 458 # Process option values...
 459 sub ProcessOptions {
 460   $SpecifiedCategoryCol = "";
 461   if (defined $Options{categorycol}) {
 462     my(@SpecifiedValues) = split ",", $Options{categorycol};
 463     if (@SpecifiedValues != 1) {
 464       die "Error: Invalid number of values, ",scalar(@SpecifiedValues), " using \"--categorycol\" option: Only one value is allowed.\n";
 465     }
 466     $SpecifiedCategoryCol = $SpecifiedValues[0];
 467     if ($Options{colmode} =~ /^colnum$/i) {
 468       if (!IsPositiveInteger($SpecifiedCategoryCol)) {
 469 	die "Error: Category column value, $SpecifiedCategoryCol, specified using \"--categorycol\" is not valid. Allowed integer values: > 0.\n";
 470       }
 471     }
 472   }
 473   @SpecifiedColumns = ();
 474   if (defined $Options{columns}) {
 475     my(@SpecifiedValues) = split ",", $Options{columns};
 476     if ($Options{colmode} =~ /^colnum$/i) {
 477       my($ColValue);
 478       for $ColValue (@SpecifiedValues) {
 479 	if (!IsPositiveInteger($ColValue)) {
 480 	  die "Error: Column value, $ColValue, specified using \"--columns\" is not valid: Allowed integer values: > 0.\n";
 481 	}
 482       }
 483     }
 484     push @SpecifiedColumns, @SpecifiedValues;
 485   }
 486   $OutDelim = ($Options{outdelim} =~ /^tab$/i ) ? "\t" : (($Options{outdelim} =~ /^semicolon$/i) ? "\;" : "\,");
 487   $OutQuote = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
 488 
 489   # Process any specified rows values...
 490   @SpecifiedRowValues = ();
 491   $SpecifiedRowsMode = $Options{rowsmode};
 492   if (defined $Options{rows}) {
 493     (@SpecifiedRowValues) = split ",", $Options{rows};
 494   }
 495   else {
 496     if ($Options{rowsmode} !~ /^rownums$/i) {
 497       die "Error: Specify value for \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\n";
 498     }
 499     push @SpecifiedRowValues, "1";
 500   }
 501 
 502   my($SpecifiedColID, $SpecifiedRowID);
 503   # Make sure specified values are okay...
 504   if ($Options{rowsmode} =~ /^rowsbycolvalue$/i) {
 505     if (@SpecifiedRowValues % 3) {
 506       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nIt must contain triplets.\n";
 507     }
 508     # Triplet format: colid,value,criteria. Criterion: le,ge,eq
 509     my($Index, $ColID, $Criterion, $Value);
 510     for ($Index = 0; $Index < @SpecifiedRowValues; $Index = $Index + 3) {
 511       $ColID = $SpecifiedRowValues[$Index];
 512       $Value = $SpecifiedRowValues[$Index + 1];
 513       $Criterion = $SpecifiedRowValues[$Index + 2];
 514       if ($Options{colmode} =~ /^colnum$/i) {
 515 	if (!IsPositiveInteger($ColID)) {
 516 	  die "Error: Invalid column id, $ColID, specified in triplet, \"$ColID,$Criterion,$Value\", using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 517 	}
 518       }
 519       if ($Criterion !~ /^(eq|le|ge)$/i) {
 520 	die "Error: Invalid criterion value, $Criterion, specified in triplet, \"$ColID,$Criterion,$Value\", using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed values: le, ge, or eq.\n";
 521       }
 522     }
 523   }
 524   elsif ($Options{rowsmode} =~ /^rowsbycolvaluelist$/i) {
 525     ($SpecifiedColID) = $SpecifiedRowValues[0];
 526     if ($Options{colmode} =~ /^colnum$/i) {
 527       if (!IsPositiveInteger($SpecifiedColID)) {
 528 	die "Error: Rows value, $SpecifiedColID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 529       }
 530     }
 531     if (@SpecifiedRowValues == 1) {
 532       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nIt must contain more than one value\n";
 533     }
 534   }
 535   elsif ($Options{rowsmode} =~ /^rowsbycolvaluerange$/i) {
 536     if (@SpecifiedRowValues != 3) {
 537       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nIt must contain three values\n";
 538     }
 539     ($SpecifiedColID) = $SpecifiedRowValues[0];
 540     if ($Options{colmode} =~ /^colnum$/i) {
 541       if (!IsPositiveInteger($SpecifiedColID)) {
 542 	die "Error: Rows value, $SpecifiedColID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 543       }
 544     }
 545     if ($SpecifiedRowValues[1] >= $SpecifiedRowValues[2]) {
 546       die "Error: Invalid value triplet - ", JoinWords(\@SpecifiedRowValues, ',', 0) , " - specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nAllowed values: second value < third value\n";
 547     }
 548   }
 549   elsif ($Options{rowsmode} =~ /^(rowbymincolvalue|rowbymaxcolvalue)$/i) {
 550     if (@SpecifiedRowValues != 1) {
 551       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nOnly one value is allowed.\n";
 552     }
 553     ($SpecifiedColID) = $SpecifiedRowValues[0];
 554     if ($Options{colmode} =~ /^colnum$/i) {
 555       if (!IsPositiveInteger($SpecifiedColID)) {
 556 	die "Error: Rows value, $SpecifiedColID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 557       }
 558     }
 559   }
 560   elsif ($Options{rowsmode} =~ /^rownums$/i) {
 561     for $SpecifiedRowID (@SpecifiedRowValues) {
 562       if (!IsPositiveInteger($SpecifiedRowID)) {
 563 	die "Error: Rows value, $SpecifiedRowID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 564       }
 565     }
 566   }
 567   elsif ($Options{rowsmode} =~ /^rownumrange$/i) {
 568     if (@SpecifiedRowValues != 2) {
 569       die "Error: Invalid number of values, ", scalar(@SpecifiedRowValues) , ", specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nIt must contain only two values.\n";
 570     }
 571     for $SpecifiedRowID (@SpecifiedRowValues) {
 572       if (!IsPositiveInteger($SpecifiedRowID)) {
 573 	die "Error: Rows value, $SpecifiedRowID, specified using \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode} is not valid. Allowed integer values: > 0.\n";
 574       }
 575     }
 576     if ($SpecifiedRowValues[0] >= $SpecifiedRowValues[1]) {
 577       die "Error: Invalid value pair -  ", JoinWords(\@SpecifiedRowValues, ',', 0) , " - specified by \"--rows\" option with \"--rowsmode\" value of $Options{rowsmode}.\nAllowed values: First value < second value\n";
 578     }
 579   }
 580 }
 581 
 582 # Retrieve information about input text files...
 583 sub RetrieveTextFilesInfo {
 584   my($Index, $TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, @ColLabels, $OutFileRoot, $CategoryOutFileRoot, $OutFile, $ColNum, $ColLabel);
 585 
 586   @TextFilesOkay = ();
 587   @TextFilesColCount = (); @TextFilesColLabels = ();
 588   @TextFilesColLabelToNumMap = ();
 589   @TextFilesInDelim = ();
 590   @TextFilesOutFile = (); @TextFilesOutFileExt = (); @TextFilesCategoryOutFileRoot = ();
 591 
 592  FILELIST: for $Index (0 .. $#TextFilesList) {
 593     $TextFile = $TextFilesList[$Index];
 594     $TextFilesOkay[$Index] = 0;
 595     $TextFilesColCount[$Index] = 0;
 596     $TextFilesInDelim[$Index] = "";
 597     $TextFilesOutFile[$Index] = "";
 598     $TextFilesOutFileExt[$Index] = "";
 599     $TextFilesCategoryOutFileRoot[$Index] = "";
 600     @{$TextFilesColLabels[$Index]} = ();
 601     %{$TextFilesColLabelToNumMap[$Index]} = ();
 602     if (!(-e $TextFile)) {
 603       warn "Warning: Ignoring file $TextFile: It doesn't exist\n";
 604       next FILELIST;
 605     }
 606     if (!CheckFileType($TextFile, "csv tsv")) {
 607       warn "Warning: Ignoring file $TextFile: It's not a csv or tsv file\n";
 608       next FILELIST;
 609     }
 610     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 611     if ($FileExt =~ /^tsv$/i) {
 612       $InDelim = "\t";
 613     }
 614     else {
 615       $InDelim = "\,";
 616       if (!($Options{indelim} =~ /^(comma|semicolon)$/i)) {
 617 	warn "Warning: Ignoring file $TextFile: The value specified, $Options{indelim}, for option \"--indelim\" is not valid for csv files\n";
 618 	next FILELIST;
 619       }
 620       if ($Options{indelim} =~ /^semicolon$/i) {
 621 	$InDelim = "\;";
 622       }
 623     }
 624 
 625     if (!open TEXTFILE, "$TextFile") {
 626       warn "Warning: Ignoring file $TextFile: Couldn't open it: $! \n";
 627       next FILELIST;
 628     }
 629 
 630     $Line = GetTextLine(\*TEXTFILE);
 631     @ColLabels = quotewords($InDelim, 0, $Line);
 632     close TEXTFILE;
 633 
 634     $FileDir = ""; $FileName = ""; $FileExt = "";
 635     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 636     $FileExt = "csv";
 637     if ($Options{outdelim} =~ /^tab$/i) {
 638       $FileExt = "tsv";
 639     }
 640     if ($Options{root} && (@TextFilesList == 1)) {
 641       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
 642       if ($RootFileName && $RootFileExt) {
 643 	$FileName = $RootFileName;
 644       }
 645       else {
 646 	$FileName = $Options{root};
 647       }
 648       $OutFileRoot .= $FileName;
 649     }
 650     else {
 651       $OutFileRoot = $FileName;
 652       $OutFileRoot .= ($Options{mode} =~ /^categories$/i) ? "CategoriesSummary" : (($Options{mode} =~ /^rows$/i) ? "ExtractedRows" : "ExtractedColumns");
 653     }
 654     $CategoryOutFileRoot = "$FileName" . "Category";
 655 
 656     $OutFile = $OutFileRoot . ".$FileExt";
 657     if (lc($OutFile) eq lc($TextFile)) {
 658       warn "Warning: Ignoring file $TextFile:Output file name, $OutFile, is same as input text file name, $TextFile\n";
 659       next FILELIST;
 660     }
 661     if (!$Options{overwrite}) {
 662       if (-e $OutFile) {
 663 	warn "Warning: Ignoring file $TextFile: The file $OutFile already exists\n";
 664 	next FILELIST;
 665       }
 666     }
 667 
 668     $TextFilesOkay[$Index] = 1;
 669     $TextFilesInDelim[$Index] = $InDelim;
 670     $TextFilesCategoryOutFileRoot[$Index] = "$CategoryOutFileRoot";
 671     $TextFilesOutFile[$Index] = "$OutFile";
 672     $TextFilesOutFileExt[$Index] = "$FileExt";
 673 
 674     $TextFilesColCount[$Index] = @ColLabels;
 675     push @{$TextFilesColLabels[$Index]}, @ColLabels;
 676     for $ColNum (0 .. $#ColLabels) {
 677       $ColLabel = $ColLabels[$ColNum];
 678       $TextFilesColLabelToNumMap[$Index]{$ColLabel} = $ColNum;
 679     }
 680   }
 681 }
 682 
 683 # Make sure the specified columns exists in text files...
 684 sub ProcessColumnsInfo {
 685   my($Index, @ColNumsToExtract, $TextFile);
 686 
 687   @TextFilesCategoryColNum = ();
 688   @TextFilesColNumsToExtract = ();
 689  FILELIST: for $Index (0 .. $#TextFilesList) {
 690     $TextFile = $TextFilesList[$Index];
 691 
 692     $TextFilesCategoryColNum[$Index] = 0;
 693     @{$TextFilesColNumsToExtract[$Index]} = ();
 694 
 695     if ($TextFilesOkay[$Index]) {
 696       if ($Options{mode} =~ /^categories$/i) {
 697 	my($CategoryColNum, $CategoryColValid);
 698 
 699 	$CategoryColNum = 0;
 700 	$CategoryColValid = 1;
 701 	if ($SpecifiedCategoryCol) {
 702 	  if ($Options{colmode} =~ /^colnum$/i) {
 703 	    if ($SpecifiedCategoryCol <= $TextFilesColCount[$Index]) {
 704 	      $CategoryColNum = $SpecifiedCategoryCol - 1;
 705 	    }
 706 	    else {
 707 	      $CategoryColValid = 0;
 708 	    }
 709 	  }
 710 	  else {
 711 	    if (exists($TextFilesColLabelToNumMap[$Index]{$SpecifiedCategoryCol})) {
 712 	      $CategoryColNum =  $TextFilesColLabelToNumMap[$Index]{$SpecifiedCategoryCol};
 713 	    }
 714 	    else {
 715 	      $CategoryColValid = 0;
 716 	    }
 717 	  }
 718 	}
 719 	if ($CategoryColValid) {
 720 	  $TextFilesCategoryColNum[$Index] = $CategoryColNum;
 721 	}
 722 	else {
 723 	  warn "Warning: Ignoring file $TextFile: Category column specified, $SpecifiedCategoryCol, using \"--categorycol\" option doesn't exist\n";
 724 	  $TextFilesOkay[$Index] = 0;
 725 	}
 726       }
 727       elsif ($Options{mode} =~ /^columns$/i) {
 728 	my($SpecifiedColNum, $ColNum);
 729 	$ColNum = 0;
 730 	@ColNumsToExtract = ();
 731 	if (@SpecifiedColumns) {
 732 	  if ($Options{colmode} =~ /^colnum$/i) {
 733 	    for $SpecifiedColNum (@SpecifiedColumns) {
 734 	      if ($SpecifiedColNum >=1 && $SpecifiedColNum <= $TextFilesColCount[$Index]) {
 735 		$ColNum = $SpecifiedColNum - 1;
 736 		push @ColNumsToExtract, $ColNum;
 737 	      }
 738 	    }
 739 	  }
 740 	  else {
 741 	    my($ColLabel);
 742 	    for $ColLabel (@SpecifiedColumns) {
 743 	      if (exists($TextFilesColLabelToNumMap[$Index]{$ColLabel})) {
 744 		push @ColNumsToExtract, $TextFilesColLabelToNumMap[$Index]{$ColLabel};
 745 	      }
 746 	    }
 747 	  }
 748 	}
 749 	else {
 750 	  push @ColNumsToExtract, $ColNum;
 751 	}
 752 	if (@ColNumsToExtract) {
 753 	  push @{$TextFilesColNumsToExtract[$Index]}, @ColNumsToExtract;
 754 	}
 755 	else {
 756 	  warn "Warning: Ignoring file $TextFile: None of the columns specified, @SpecifiedColumns, using \"--columns\" option exist\n";
 757 	  $TextFilesOkay[$Index] = 0;
 758 	}
 759       }
 760     }
 761   }
 762 }
 763 
 764 # Process specified rows info...
 765 sub ProcessRowsInfo {
 766   my($Index, $TextFile, $ColID, $ColIDOkay, $Value, $Criterion, $ColNum, @RowValues);
 767 
 768   @TextFilesRowValues = ();
 769 
 770   FILELIST: for $Index (0 .. $#TextFilesList) {
 771     $TextFile = $TextFilesList[$Index];
 772     @{$TextFilesRowValues[$Index]} = ();
 773 
 774     if ($Options{mode} !~ /^rows$/i) {
 775       next FILELIST;
 776     }
 777     if (!$TextFilesOkay[$Index]) {
 778       next FILELIST;
 779     }
 780     @RowValues = ();
 781     if ($Options{rowsmode} =~ /^rowsbycolvalue$/i) {
 782       my($ValueIndex);
 783       for ($ValueIndex = 0; $ValueIndex < @SpecifiedRowValues; $ValueIndex = $ValueIndex + 3) {
 784 	$ColID = $SpecifiedRowValues[$ValueIndex];
 785 	$Value = $SpecifiedRowValues[$ValueIndex + 1];
 786 	$Criterion = $SpecifiedRowValues[$ValueIndex + 2];
 787 
 788 	$ColIDOkay = 0;
 789 	if ($Options{colmode} =~ /^collabel$/i) {
 790 	  if (exists $TextFilesColLabelToNumMap[$Index]{$ColID}) {
 791 	    $ColIDOkay = 1;
 792 	    $ColNum = $TextFilesColLabelToNumMap[$Index]{$ColID};
 793 	  }
 794 	}
 795 	else {
 796 	  if ($ColID >=1 && $ColID <= $TextFilesColCount[$Index]) {
 797 	    $ColNum = $ColID - 1;
 798 	    $ColIDOkay = 1;
 799 	  }
 800 	}
 801 	if ($ColIDOkay) {
 802 	  push @RowValues, ($ColNum, $Value, $Criterion);
 803 	}
 804       }
 805     }
 806     elsif ($Options{rowsmode} =~ /^(rowsbycolvaluelist|rowsbycolvaluerange|rowbymincolvalue|rowbymaxcolvalue)$/i) {
 807       # Process coulumn id...
 808       $ColID = $SpecifiedRowValues[0];
 809       $ColIDOkay = 0;
 810       if ($Options{colmode} =~ /^collabel$/i) {
 811 	if (exists $TextFilesColLabelToNumMap[$Index]{$ColID}) {
 812 	  $ColIDOkay = 1;
 813 	  $ColNum = $TextFilesColLabelToNumMap[$Index]{$ColID};
 814 	}
 815       }
 816       else {
 817 	if ($ColID >=1 && $ColID <= $TextFilesColCount[$Index]) {
 818 	  $ColIDOkay = 1;
 819 	  $ColNum = $ColID - 1;
 820 	}
 821       }
 822       if ($ColIDOkay) {
 823 	push @RowValues, $ColNum;
 824 	# Get rest of the specified values...
 825 	if (@SpecifiedRowValues > 1) {
 826 	  for $Index (1 .. $#SpecifiedRowValues) {
 827 	    push @RowValues, $SpecifiedRowValues[$Index];
 828 	  }
 829 	}
 830       }
 831     }
 832     elsif ($Options{rowsmode} =~ /^(rownums|rownumrange)$/i) {
 833       push @RowValues, @SpecifiedRowValues;
 834     }
 835 
 836     if (@RowValues) {
 837       push @{$TextFilesRowValues[$Index]}, @RowValues;
 838     }
 839     else {
 840       warn "Warning: Ignoring file $TextFile: Column specified, $ColID, using \"--rows\" option doesn't exist\n";
 841       $TextFilesOkay[$Index] = 0;
 842     }
 843   }
 844 }
 845 
 846 # Setup script usage  and retrieve command line arguments specified using various options...
 847 sub SetupScriptUsage {
 848 
 849   # Setup default and retrieve all the options...
 850   %Options = ();
 851   $Options{colmode} = "colnum";
 852   $Options{indelim} = "comma";
 853   $Options{mode} = "columns";
 854   $Options{outdelim} = "comma";
 855   $Options{quote} = "yes";
 856   $Options{rowsmode} = "rownums";
 857 
 858   if (!GetOptions(\%Options, "categorycol=s", "columns=s", "colmode|c=s", "help|h", "indelim=s", "mode|m=s", "outdelim=s", "overwrite|o", "quote|q=s", "root|r=s", "rows=s", "rowsmode=s", "workingdir|w=s")) {
 859     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 860   }
 861   if ($Options{workingdir}) {
 862     if (! -d $Options{workingdir}) {
 863       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 864     }
 865     chdir $Options{workingdir} || die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 866   }
 867   if ($Options{mode} !~ /(^(columns|rows|categories)$)/i) {
 868     die "Error: The value specified, $Options{mode}, for option \"-m --mode\" is not valid. Allowed values: columns, rows or categories \n";
 869   }
 870   if ($Options{colmode} !~ /(^(colnum|collabel)$)/i) {
 871     die "Error: The value specified, $Options{colmode}, for option \"--colmode\" is not valid. Allowed values: colnum or collabel \n";
 872   }
 873   if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
 874     die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma or semicolon\n";
 875   }
 876   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 877     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 878   }
 879   if ($Options{quote} !~ /^(yes|no)$/i) {
 880     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
 881   }
 882   if ($Options{rowsmode} !~ /^(rowsbycolvalue|rowsbycolvaluelist|rowsbycolvaluerange|rowbymincolvalue|rowbymaxcolvalue|rownums|rownumrange)$/i) {
 883     die "Error: The value specified, $Options{rowsmode}, for option \"--rowsmode\" is not valid. Allowed values: rowsbycolvalue, rowsbycolvaluelist, rowsbycolvaluerange, rowbymincolvalue, rowbymaxcolvalue, rownum, rownumrange\n";
 884   }
 885 }