MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: ModifyPDBFiles.pl,v $
   4 # $Date: 2008/01/30 21:44:49 $
   5 # $Revision: 1.11 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use 5.006;
  30 use strict;
  31 use FindBin; use lib "$FindBin::Bin/../lib";
  32 use Getopt::Long;
  33 use File::Basename;
  34 use Text::ParseWords;
  35 use Benchmark;
  36 use FileUtil;
  37 use TextUtil;
  38 use PDBFileUtil;
  39 
  40 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  41 
  42 # Autoflush STDOUT
  43 $| = 1;
  44 
  45 # Starting message...
  46 $ScriptName = basename($0);
  47 print "\n$ScriptName: Starting...\n\n";
  48 $StartTime = new Benchmark;
  49 
  50 # Get the options and setup script...
  51 SetupScriptUsage();
  52 if ($Options{help} || @ARGV < 1) {
  53   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  54 }
  55 
  56 my(@PDBFilesList);
  57 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
  58 
  59 # Process options...
  60 my(%OptionsInfo);
  61 print "Processing options...\n";
  62 ProcessOptions();
  63 
  64 # Setup information about input files...
  65 print "Checking input PDB file(s)...\n";
  66 my(%PDBFilesInfo);
  67 RetrievePDBFilesInfo();
  68 
  69 # Process input files..
  70 my($FileIndex, $PDBFile, $FileProcessingMsg);
  71 $FileProcessingMsg = "Processing file";
  72 if (@PDBFilesList > 1) {
  73   print "Processing PDB files...\n";
  74   $FileProcessingMsg = "\n$FileProcessingMsg";
  75 }
  76 
  77 for $FileIndex (0 .. $#PDBFilesList) {
  78   if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
  79     $PDBFile = $PDBFilesList[$FileIndex];
  80     print "$FileProcessingMsg $PDBFile...\n";
  81     ModifyPDBFiles($FileIndex);
  82   }
  83 }
  84 
  85 print "$ScriptName:Done...\n\n";
  86 
  87 $EndTime = new Benchmark;
  88 $TotalTime = timediff ($EndTime, $StartTime);
  89 print "Total time: ", timestr($TotalTime), "\n";
  90 
  91 ###############################################################################
  92 
  93 # Modify appropriate information...
  94 sub ModifyPDBFiles {
  95   my($FileIndex) = @_;
  96   my($PDBFile, $PDBRecordLinesRef);
  97 
  98   # Get PDB data...
  99   $PDBFile = $PDBFilesList[$FileIndex];
 100   $PDBRecordLinesRef = ReadPDBFile($PDBFile);
 101 
 102   if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) {
 103     RenumberAtoms($FileIndex, $PDBRecordLinesRef);
 104   }
 105   elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) {
 106     RenumberResidues($FileIndex, $PDBRecordLinesRef);
 107   }
 108   elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) {
 109     RenumberWaters($FileIndex, $PDBRecordLinesRef);
 110   }
 111   elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) {
 112     RenameChainsIDs($FileIndex, $PDBRecordLinesRef);
 113   }
 114 }
 115 
 116 # Renumber atom and hetro atom numbers...
 117 sub RenumberAtoms {
 118   my($FileIndex, $PDBRecordLinesRef) = @_;
 119   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap);
 120 
 121   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 122   print "Generating PDBFileName file $PDBFileName...\n";
 123   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 124 
 125   # Write out header and other older recors...
 126   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 127 
 128   # Write out all ATOM records along with TER and model records to indicate
 129   # chains and multiple models..
 130   %OldToNewAtomNumbersMap = ();
 131   $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
 132   for $RecordLine (@{$PDBRecordLinesRef}) {
 133     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 134       $RecordType = GetPDBRecordType($RecordLine);
 135 
 136       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 137 
 138       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 139 
 140       $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber;
 141       $NewAtomNumber++;
 142     }
 143     elsif (IsTerRecordType($RecordLine)) {
 144       $NewAtomNumber++;
 145       print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n";
 146     }
 147     elsif (IsModelRecordType($RecordLine)) {
 148       print OUTFILE "$RecordLine\n";
 149     }
 150     elsif (IsEndmdlRecordType($RecordLine)) {
 151       print OUTFILE "$RecordLine\n";
 152       # Restart numbering...
 153       $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
 154     }
 155   }
 156 
 157   # Write out modified CONECT records...
 158   my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums);
 159   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 160     if (!IsConectRecordType($RecordLine)) {
 161       next LINE;
 162     }
 163     @ConectAtomNums = ();
 164     @ModifiedConectAtomNums = ();
 165     push @ConectAtomNums, ParseConectRecordLine($RecordLine);
 166     ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) {
 167       $ModifiedConectAtomNum = $ConectAtomNum;
 168       if (defined($ConectAtomNum)) {
 169 	$AtomNumber = $ConectAtomNum;
 170 	if ($AtomNumber) {
 171 	  if (exists $OldToNewAtomNumbersMap{$AtomNumber}) {
 172 	    $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber};
 173 	  }
 174 	}
 175       }
 176       push @ModifiedConectAtomNums, $ModifiedConectAtomNum;
 177     }
 178     # Write out the record...
 179     print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n";
 180   }
 181 
 182   # Write out END record...
 183   print OUTFILE GenerateEndRecordLine(), "\n";
 184 
 185   close OUTFILE;
 186 }
 187 
 188 # Renumber residues...
 189 sub RenumberResidues {
 190   my($FileIndex, $PDBRecordLinesRef) = @_;
 191   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType);
 192 
 193   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 194   print "Generating PDBFileName file $PDBFileName...\n";
 195   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 196 
 197   # Write out header and other older recors...
 198   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 199 
 200   # Do a quick count of all TER records...
 201   $TotalTERCount = 0;
 202   for $RecordLine (@{$PDBRecordLinesRef}) {
 203     if (IsTerRecordType($RecordLine)) {
 204       $TotalTERCount++;
 205     }
 206   }
 207 
 208   # Write out all ATOM records along with TER and model records to indicate
 209   # chains and multiple models..
 210   $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
 211   $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber};
 212 
 213   $TERCount = 0;
 214   $PreviousResidueNumber = 0;
 215   $PreviousHetatmResidueNumber = 0;
 216 
 217   for $RecordLine (@{$PDBRecordLinesRef}) {
 218     if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) {
 219       $RecordType = GetPDBRecordType($RecordLine);
 220       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 221 
 222       if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) {
 223 	$PreviousResidueNumber = $ResidueNumber;
 224 	$NewResidueNumber++;
 225       }
 226       else {
 227 	# First residue in a chain...
 228 	$PreviousResidueNumber = $ResidueNumber;
 229       }
 230       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 231 
 232     }
 233     elsif (IsHetatmRecordType($RecordLine)) {
 234       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine);
 235 
 236       # User HETATM residue numbers...
 237       if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) {
 238 	$PreviousHetatmResidueNumber = $ResidueNumber;
 239 	$NewHetatmResidueNumber++;
 240       }
 241       else {
 242 	# First HETATM residue outside a chain...
 243 	$PreviousHetatmResidueNumber = $ResidueNumber;
 244       }
 245 
 246       print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 247     }
 248     elsif (IsTerRecordType($RecordLine)) {
 249       $TERCount++;
 250       $AtomNumber++;
 251       print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n";
 252       # For per chain numbering, start over again...
 253       if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) {
 254 	if ($TERCount < $TotalTERCount ) {
 255 	  $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
 256 	}
 257 	$PreviousResidueNumber = 0;
 258       }
 259     }
 260     elsif (IsModelRecordType($RecordLine)) {
 261       print OUTFILE "$RecordLine\n";
 262     }
 263     elsif (IsEndmdlRecordType($RecordLine)) {
 264       print OUTFILE "$RecordLine\n";
 265     }
 266   }
 267 
 268   # Write out CONECT records...
 269   for $RecordLine (@{$PDBRecordLinesRef}) {
 270     if (IsConectRecordType($RecordLine)) {
 271       print OUTFILE "$RecordLine\n";
 272     }
 273   }
 274 
 275   # Write out END record...
 276   print OUTFILE GenerateEndRecordLine(), "\n";
 277 
 278   close OUTFILE;
 279 }
 280 
 281 # Renumber water residues...
 282 sub RenumberWaters {
 283   my($FileIndex, $PDBRecordLinesRef) = @_;
 284   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType);
 285 
 286   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 287   print "Generating PDBFileName file $PDBFileName...\n";
 288   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 289 
 290   # Write out header and other older recors...
 291   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 292 
 293   # Write out all ATOM records along with TER and model records to indicate
 294   # chains and multiple models..
 295   $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber};
 296   for $RecordLine (@{$PDBRecordLinesRef}) {
 297     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 298       $RecordType = GetPDBRecordType($RecordLine);
 299 
 300       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 301 
 302       if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
 303 	$ResidueNumber = $NewResidueNumber;
 304 	print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 305 	$NewResidueNumber++;
 306       }
 307       else {
 308 	print OUTFILE "$RecordLine\n";
 309       }
 310     }
 311     elsif (IsTerRecordType($RecordLine)) {
 312       print OUTFILE "$RecordLine\n";
 313     }
 314     elsif (IsModelRecordType($RecordLine)) {
 315       print OUTFILE "$RecordLine\n";
 316     }
 317     elsif (IsEndmdlRecordType($RecordLine)) {
 318       print OUTFILE "$RecordLine\n";
 319     }
 320   }
 321 
 322   # Write out CONECT records...
 323   for $RecordLine (@{$PDBRecordLinesRef}) {
 324     if (IsConectRecordType($RecordLine)) {
 325       print OUTFILE "$RecordLine\n";
 326     }
 327   }
 328 
 329   # Write out END record...
 330   print OUTFILE GenerateEndRecordLine(), "\n";
 331 
 332   close OUTFILE;
 333 }
 334 
 335 # Rename chain IDs...
 336 sub RenameChainsIDs {
 337   my($FileIndex, $PDBRecordLinesRef) = @_;
 338   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap);
 339 
 340   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 341   print "Generating PDBFileName file $PDBFileName...\n";
 342   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 343 
 344   # Write out header and other older recors...
 345   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 346 
 347   # Write out all ATOM records along with TER and model records to indicate
 348   # chains and multiple models..
 349   %OldToNewChainIDsMap = ();
 350   $NewChainIDCounter = $OptionsInfo{StartingChainID};
 351   $FirstChainID = 1;
 352   $PreviousChainID = '';
 353   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 354     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 355       $RecordType = GetPDBRecordType($RecordLine);
 356 
 357       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 358 
 359       if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
 360 	# Chain IDs are not assigned to water residues...
 361 	print OUTFILE "$RecordLine\n";
 362 	next LINE;
 363       }
 364 
 365       if ($FirstChainID) {
 366 	$FirstChainID = 0;
 367 	$PreviousChainID = $ChainID;
 368 	if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
 369 	  $NewChainID = $NewChainIDCounter;
 370 	  $OldToNewChainIDsMap{$ChainID} = $NewChainID;
 371 	}
 372 	else {
 373 	  $NewChainID = '';
 374 	}
 375       }
 376       elsif ($PreviousChainID ne $ChainID) {
 377 	if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
 378 	  $PreviousChainID = $ChainID;
 379 	  if (exists $OldToNewChainIDsMap{$ChainID}) {
 380 	    $NewChainID = $OldToNewChainIDsMap{$ChainID};
 381 	  }
 382 	  else {
 383 	    $NewChainIDCounter++;
 384 	    $NewChainID = $NewChainIDCounter;
 385 	    $OldToNewChainIDsMap{$ChainID} = $NewChainID;
 386 	  }
 387 	}
 388 	else {
 389 	  $NewChainID = '';
 390 	}
 391       }
 392 
 393       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 394     }
 395     elsif (IsTerRecordType($RecordLine)) {
 396       $AtomNumber++;
 397       print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n";
 398     }
 399     elsif (IsModelRecordType($RecordLine)) {
 400       print OUTFILE "$RecordLine\n";
 401     }
 402     elsif (IsEndmdlRecordType($RecordLine)) {
 403       print OUTFILE "$RecordLine\n";
 404     }
 405   }
 406 
 407   # Write out CONECT records...
 408   for $RecordLine (@{$PDBRecordLinesRef}) {
 409     if (IsConectRecordType($RecordLine)) {
 410       print OUTFILE "$RecordLine\n";
 411     }
 412   }
 413 
 414   # Write out END record...
 415   print OUTFILE GenerateEndRecordLine(), "\n";
 416 
 417   close OUTFILE;
 418 }
 419 
 420 
 421 # Write out modifed header and other older records...
 422 sub WriteHeaderAndOlderRecords {
 423   my($OutFileRef, $PDBRecordLinesRef) = @_;
 424 
 425   if ($OptionsInfo{ModifyHeaderRecord}) {
 426     # Write out modified HEADER record...
 427     my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
 428     $Classification = 'Data modified using MayaChemTools';
 429     print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n";
 430   }
 431   else {
 432     print $OutFileRef $PDBRecordLinesRef->[0], "\n";
 433   }
 434 
 435   # Write out any old records...
 436   if ($OptionsInfo{KeepOldRecords}) {
 437     my($RecordLineIndex, $RecordLine);
 438     # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file...
 439     RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) {
 440       $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
 441       if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 442 	last RECORDLINE;
 443       }
 444       print $OutFileRef "$RecordLine\n";
 445     }
 446   }
 447 }
 448 
 449 # Get header record information assuming it's the first record...
 450 sub GetHeaderRecordInformation {
 451   my($PDBRecordLinesRef) = @_;
 452   my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine);
 453 
 454   ($Classification, $DepositionDate, $IDCode) = ('') x 3;
 455   $HeaderRecordLine = $PDBRecordLinesRef->[0];
 456   if (IsHeaderRecordType($HeaderRecordLine)) {
 457     ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
 458   }
 459   return ($Classification, $DepositionDate, $IDCode);
 460 }
 461 
 462 
 463 # Process option values...
 464 sub ProcessOptions {
 465   %OptionsInfo = ();
 466   $OptionsInfo{Mode} = $Options{mode};
 467 
 468   $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart};
 469   $OptionsInfo{StartingChainID} = $Options{chainidstart};
 470   $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0;
 471 
 472   $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0;
 473   $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0;
 474 
 475   $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode};
 476   $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart};
 477 
 478   $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode};
 479   $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm};
 480 
 481   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 482   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 483 
 484   $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames};
 485   $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart};
 486   @{$OptionsInfo{SpecifiedWaterResiduesList}} = ();
 487   %{$OptionsInfo{SpecifiedWaterResiduesMap}} = ();
 488 
 489   my(@SpecifiedWaterResiduesList);
 490   @SpecifiedWaterResiduesList = ();
 491   my($WaterResidueName);
 492   if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) {
 493     push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O');
 494   }
 495   else {
 496     @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames};
 497   }
 498   for $WaterResidueName (@SpecifiedWaterResiduesList) {
 499     $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName;
 500   }
 501   push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList;
 502 }
 503 
 504 # Retrieve information about PDB files...
 505 sub RetrievePDBFilesInfo {
 506   my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot,  $Mode, $OutFileMode, @OutFileNames);
 507 
 508   %PDBFilesInfo = ();
 509   @{$PDBFilesInfo{FileOkay}} = ();
 510   @{$PDBFilesInfo{OutFileRoot}} = ();
 511   @{$PDBFilesInfo{OutFileNames}} = ();
 512 
 513   FILELIST: for $Index (0 .. $#PDBFilesList) {
 514     $PDBFilesInfo{FileOkay}[$Index] = 0;
 515 
 516     $PDBFilesInfo{OutFileRoot}[$Index] = '';
 517     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
 518     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
 519 
 520     $PDBFile = $PDBFilesList[$Index];
 521     if (!(-e $PDBFile)) {
 522       warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
 523       next FILELIST;
 524     }
 525     if (!CheckFileType($PDBFile, "pdb")) {
 526       warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
 527       next FILELIST;
 528     }
 529     if (! open PDBFILE, "$PDBFile") {
 530       warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
 531       next FILELIST;
 532     }
 533     close PDBFILE;
 534 
 535     # Get PDB data...
 536     $PDBRecordLinesRef = ReadPDBFile($PDBFile);
 537     $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
 538     if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
 539       warn "Warning: Ignoring file $PDBFile: No chains found \n";
 540       next FILELIST;
 541     }
 542 
 543     # Setup output file names...
 544     @OutFileNames = ();
 545     $FileDir = ""; $FileName = ""; $FileExt = "";
 546     ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile);
 547     if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) {
 548       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 549       if ($RootFileName && $RootFileExt) {
 550 	$FileName = $RootFileName;
 551       }
 552       else {
 553 	$FileName = $OptionsInfo{OutFileRoot};
 554       }
 555       $OutFileRoot = $FileName;
 556     }
 557     else {
 558       $OutFileRoot = $FileName;
 559     }
 560     $Mode = $OptionsInfo{Mode};
 561     MODE: {
 562 	if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;}
 563 	if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;}
 564 	if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;}
 565 	if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;}
 566 	$OutFileMode = '';
 567     }
 568     $OutFileName = "${OutFileRoot}${OutFileMode}.pdb";
 569     push @OutFileNames, $OutFileName;
 570 
 571     $PDBFilesInfo{FileOkay}[$Index] = 1;
 572     $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 573 
 574     push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames;
 575   }
 576 }
 577 
 578 # Setup script usage  and retrieve command line arguments specified using various options...
 579 sub SetupScriptUsage {
 580 
 581   # Retrieve all the options...
 582   %Options = ();
 583   $Options{atomnumberstart} = 1;
 584   $Options{chainidstart} = 'A';
 585   $Options{chainidrenameempty} = 'No';
 586   $Options{keepoldrecords} = 'no';
 587   $Options{mode} = 'RenumberResidues';
 588   $Options{modifyheader} = 'yes';
 589   $Options{residuenumbermode} = 'PerChain';
 590   $Options{residuenumberstart} = 1;
 591   $Options{residuenumberhetatmmode} = 'Automatic';
 592   $Options{residuenumberstarthetatm} = 6000;
 593   $Options{waterresiduenames} = 'Automatic';
 594   $Options{waterresiduestart} = 8000;
 595 
 596   if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) {
 597     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 598   }
 599   if ($Options{workingdir}) {
 600     if (! -d $Options{workingdir}) {
 601       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 602     }
 603     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 604   }
 605   if (!IsPositiveInteger($Options{atomnumberstart})) {
 606     die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n";
 607   }
 608   if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) {
 609     die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n";
 610   }
 611   if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) {
 612     die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n";
 613   }
 614   if ($Options{keepoldrecords} !~ /^(yes|no)$/i) {
 615     die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n";
 616   }
 617   if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) {
 618     die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n";
 619   }
 620   if ($Options{modifyheader} !~ /^(yes|no)$/i) {
 621     die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n";
 622   }
 623   if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) {
 624     die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n";
 625   }
 626   if (!IsPositiveInteger($Options{residuenumberstart})) {
 627     die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n";
 628   }
 629   if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) {
 630     die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n";
 631   }
 632   if (!IsPositiveInteger($Options{residuenumberstarthetatm})) {
 633     die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n";
 634   }
 635   if (!IsPositiveInteger $Options{waterresiduestart}) {
 636     die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n";
 637   }
 638 }
 639