1 #!/usr/bin/perl -w 2 # 3 # $RCSfile: ModifyPDBFiles.pl,v $ 4 # $Date: 2008/01/30 21:44:49 $ 5 # $Revision: 1.11 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use 5.006; 30 use strict; 31 use FindBin; use lib "$FindBin::Bin/../lib"; 32 use Getopt::Long; 33 use File::Basename; 34 use Text::ParseWords; 35 use Benchmark; 36 use FileUtil; 37 use TextUtil; 38 use PDBFileUtil; 39 40 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 41 42 # Autoflush STDOUT 43 $| = 1; 44 45 # Starting message... 46 $ScriptName = basename($0); 47 print "\n$ScriptName: Starting...\n\n"; 48 $StartTime = new Benchmark; 49 50 # Get the options and setup script... 51 SetupScriptUsage(); 52 if ($Options{help} || @ARGV < 1) { 53 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 54 } 55 56 my(@PDBFilesList); 57 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb"); 58 59 # Process options... 60 my(%OptionsInfo); 61 print "Processing options...\n"; 62 ProcessOptions(); 63 64 # Setup information about input files... 65 print "Checking input PDB file(s)...\n"; 66 my(%PDBFilesInfo); 67 RetrievePDBFilesInfo(); 68 69 # Process input files.. 70 my($FileIndex, $PDBFile, $FileProcessingMsg); 71 $FileProcessingMsg = "Processing file"; 72 if (@PDBFilesList > 1) { 73 print "Processing PDB files...\n"; 74 $FileProcessingMsg = "\n$FileProcessingMsg"; 75 } 76 77 for $FileIndex (0 .. $#PDBFilesList) { 78 if ($PDBFilesInfo{FileOkay}[$FileIndex]) { 79 $PDBFile = $PDBFilesList[$FileIndex]; 80 print "$FileProcessingMsg $PDBFile...\n"; 81 ModifyPDBFiles($FileIndex); 82 } 83 } 84 85 print "$ScriptName:Done...\n\n"; 86 87 $EndTime = new Benchmark; 88 $TotalTime = timediff ($EndTime, $StartTime); 89 print "Total time: ", timestr($TotalTime), "\n"; 90 91 ############################################################################### 92 93 # Modify appropriate information... 94 sub ModifyPDBFiles { 95 my($FileIndex) = @_; 96 my($PDBFile, $PDBRecordLinesRef); 97 98 # Get PDB data... 99 $PDBFile = $PDBFilesList[$FileIndex]; 100 $PDBRecordLinesRef = ReadPDBFile($PDBFile); 101 102 if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) { 103 RenumberAtoms($FileIndex, $PDBRecordLinesRef); 104 } 105 elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) { 106 RenumberResidues($FileIndex, $PDBRecordLinesRef); 107 } 108 elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) { 109 RenumberWaters($FileIndex, $PDBRecordLinesRef); 110 } 111 elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) { 112 RenameChainsIDs($FileIndex, $PDBRecordLinesRef); 113 } 114 } 115 116 # Renumber atom and hetro atom numbers... 117 sub RenumberAtoms { 118 my($FileIndex, $PDBRecordLinesRef) = @_; 119 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap); 120 121 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 122 print "Generating PDBFileName file $PDBFileName...\n"; 123 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 124 125 # Write out header and other older recors... 126 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 127 128 # Write out all ATOM records along with TER and model records to indicate 129 # chains and multiple models.. 130 %OldToNewAtomNumbersMap = (); 131 $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; 132 for $RecordLine (@{$PDBRecordLinesRef}) { 133 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 134 $RecordType = GetPDBRecordType($RecordLine); 135 136 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 137 138 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 139 140 $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber; 141 $NewAtomNumber++; 142 } 143 elsif (IsTerRecordType($RecordLine)) { 144 $NewAtomNumber++; 145 print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n"; 146 } 147 elsif (IsModelRecordType($RecordLine)) { 148 print OUTFILE "$RecordLine\n"; 149 } 150 elsif (IsEndmdlRecordType($RecordLine)) { 151 print OUTFILE "$RecordLine\n"; 152 # Restart numbering... 153 $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; 154 } 155 } 156 157 # Write out modified CONECT records... 158 my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums); 159 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 160 if (!IsConectRecordType($RecordLine)) { 161 next LINE; 162 } 163 @ConectAtomNums = (); 164 @ModifiedConectAtomNums = (); 165 push @ConectAtomNums, ParseConectRecordLine($RecordLine); 166 ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) { 167 $ModifiedConectAtomNum = $ConectAtomNum; 168 if (defined($ConectAtomNum)) { 169 $AtomNumber = $ConectAtomNum; 170 if ($AtomNumber) { 171 if (exists $OldToNewAtomNumbersMap{$AtomNumber}) { 172 $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber}; 173 } 174 } 175 } 176 push @ModifiedConectAtomNums, $ModifiedConectAtomNum; 177 } 178 # Write out the record... 179 print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n"; 180 } 181 182 # Write out END record... 183 print OUTFILE GenerateEndRecordLine(), "\n"; 184 185 close OUTFILE; 186 } 187 188 # Renumber residues... 189 sub RenumberResidues { 190 my($FileIndex, $PDBRecordLinesRef) = @_; 191 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType); 192 193 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 194 print "Generating PDBFileName file $PDBFileName...\n"; 195 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 196 197 # Write out header and other older recors... 198 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 199 200 # Do a quick count of all TER records... 201 $TotalTERCount = 0; 202 for $RecordLine (@{$PDBRecordLinesRef}) { 203 if (IsTerRecordType($RecordLine)) { 204 $TotalTERCount++; 205 } 206 } 207 208 # Write out all ATOM records along with TER and model records to indicate 209 # chains and multiple models.. 210 $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; 211 $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber}; 212 213 $TERCount = 0; 214 $PreviousResidueNumber = 0; 215 $PreviousHetatmResidueNumber = 0; 216 217 for $RecordLine (@{$PDBRecordLinesRef}) { 218 if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) { 219 $RecordType = GetPDBRecordType($RecordLine); 220 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 221 222 if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) { 223 $PreviousResidueNumber = $ResidueNumber; 224 $NewResidueNumber++; 225 } 226 else { 227 # First residue in a chain... 228 $PreviousResidueNumber = $ResidueNumber; 229 } 230 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 231 232 } 233 elsif (IsHetatmRecordType($RecordLine)) { 234 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine); 235 236 # User HETATM residue numbers... 237 if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) { 238 $PreviousHetatmResidueNumber = $ResidueNumber; 239 $NewHetatmResidueNumber++; 240 } 241 else { 242 # First HETATM residue outside a chain... 243 $PreviousHetatmResidueNumber = $ResidueNumber; 244 } 245 246 print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 247 } 248 elsif (IsTerRecordType($RecordLine)) { 249 $TERCount++; 250 $AtomNumber++; 251 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n"; 252 # For per chain numbering, start over again... 253 if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) { 254 if ($TERCount < $TotalTERCount ) { 255 $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; 256 } 257 $PreviousResidueNumber = 0; 258 } 259 } 260 elsif (IsModelRecordType($RecordLine)) { 261 print OUTFILE "$RecordLine\n"; 262 } 263 elsif (IsEndmdlRecordType($RecordLine)) { 264 print OUTFILE "$RecordLine\n"; 265 } 266 } 267 268 # Write out CONECT records... 269 for $RecordLine (@{$PDBRecordLinesRef}) { 270 if (IsConectRecordType($RecordLine)) { 271 print OUTFILE "$RecordLine\n"; 272 } 273 } 274 275 # Write out END record... 276 print OUTFILE GenerateEndRecordLine(), "\n"; 277 278 close OUTFILE; 279 } 280 281 # Renumber water residues... 282 sub RenumberWaters { 283 my($FileIndex, $PDBRecordLinesRef) = @_; 284 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType); 285 286 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 287 print "Generating PDBFileName file $PDBFileName...\n"; 288 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 289 290 # Write out header and other older recors... 291 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 292 293 # Write out all ATOM records along with TER and model records to indicate 294 # chains and multiple models.. 295 $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber}; 296 for $RecordLine (@{$PDBRecordLinesRef}) { 297 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 298 $RecordType = GetPDBRecordType($RecordLine); 299 300 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 301 302 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { 303 $ResidueNumber = $NewResidueNumber; 304 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 305 $NewResidueNumber++; 306 } 307 else { 308 print OUTFILE "$RecordLine\n"; 309 } 310 } 311 elsif (IsTerRecordType($RecordLine)) { 312 print OUTFILE "$RecordLine\n"; 313 } 314 elsif (IsModelRecordType($RecordLine)) { 315 print OUTFILE "$RecordLine\n"; 316 } 317 elsif (IsEndmdlRecordType($RecordLine)) { 318 print OUTFILE "$RecordLine\n"; 319 } 320 } 321 322 # Write out CONECT records... 323 for $RecordLine (@{$PDBRecordLinesRef}) { 324 if (IsConectRecordType($RecordLine)) { 325 print OUTFILE "$RecordLine\n"; 326 } 327 } 328 329 # Write out END record... 330 print OUTFILE GenerateEndRecordLine(), "\n"; 331 332 close OUTFILE; 333 } 334 335 # Rename chain IDs... 336 sub RenameChainsIDs { 337 my($FileIndex, $PDBRecordLinesRef) = @_; 338 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap); 339 340 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 341 print "Generating PDBFileName file $PDBFileName...\n"; 342 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 343 344 # Write out header and other older recors... 345 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 346 347 # Write out all ATOM records along with TER and model records to indicate 348 # chains and multiple models.. 349 %OldToNewChainIDsMap = (); 350 $NewChainIDCounter = $OptionsInfo{StartingChainID}; 351 $FirstChainID = 1; 352 $PreviousChainID = ''; 353 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 354 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 355 $RecordType = GetPDBRecordType($RecordLine); 356 357 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 358 359 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { 360 # Chain IDs are not assigned to water residues... 361 print OUTFILE "$RecordLine\n"; 362 next LINE; 363 } 364 365 if ($FirstChainID) { 366 $FirstChainID = 0; 367 $PreviousChainID = $ChainID; 368 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { 369 $NewChainID = $NewChainIDCounter; 370 $OldToNewChainIDsMap{$ChainID} = $NewChainID; 371 } 372 else { 373 $NewChainID = ''; 374 } 375 } 376 elsif ($PreviousChainID ne $ChainID) { 377 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { 378 $PreviousChainID = $ChainID; 379 if (exists $OldToNewChainIDsMap{$ChainID}) { 380 $NewChainID = $OldToNewChainIDsMap{$ChainID}; 381 } 382 else { 383 $NewChainIDCounter++; 384 $NewChainID = $NewChainIDCounter; 385 $OldToNewChainIDsMap{$ChainID} = $NewChainID; 386 } 387 } 388 else { 389 $NewChainID = ''; 390 } 391 } 392 393 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 394 } 395 elsif (IsTerRecordType($RecordLine)) { 396 $AtomNumber++; 397 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n"; 398 } 399 elsif (IsModelRecordType($RecordLine)) { 400 print OUTFILE "$RecordLine\n"; 401 } 402 elsif (IsEndmdlRecordType($RecordLine)) { 403 print OUTFILE "$RecordLine\n"; 404 } 405 } 406 407 # Write out CONECT records... 408 for $RecordLine (@{$PDBRecordLinesRef}) { 409 if (IsConectRecordType($RecordLine)) { 410 print OUTFILE "$RecordLine\n"; 411 } 412 } 413 414 # Write out END record... 415 print OUTFILE GenerateEndRecordLine(), "\n"; 416 417 close OUTFILE; 418 } 419 420 421 # Write out modifed header and other older records... 422 sub WriteHeaderAndOlderRecords { 423 my($OutFileRef, $PDBRecordLinesRef) = @_; 424 425 if ($OptionsInfo{ModifyHeaderRecord}) { 426 # Write out modified HEADER record... 427 my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef); 428 $Classification = 'Data modified using MayaChemTools'; 429 print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n"; 430 } 431 else { 432 print $OutFileRef $PDBRecordLinesRef->[0], "\n"; 433 } 434 435 # Write out any old records... 436 if ($OptionsInfo{KeepOldRecords}) { 437 my($RecordLineIndex, $RecordLine); 438 # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file... 439 RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) { 440 $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex]; 441 if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 442 last RECORDLINE; 443 } 444 print $OutFileRef "$RecordLine\n"; 445 } 446 } 447 } 448 449 # Get header record information assuming it's the first record... 450 sub GetHeaderRecordInformation { 451 my($PDBRecordLinesRef) = @_; 452 my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine); 453 454 ($Classification, $DepositionDate, $IDCode) = ('') x 3; 455 $HeaderRecordLine = $PDBRecordLinesRef->[0]; 456 if (IsHeaderRecordType($HeaderRecordLine)) { 457 ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine); 458 } 459 return ($Classification, $DepositionDate, $IDCode); 460 } 461 462 463 # Process option values... 464 sub ProcessOptions { 465 %OptionsInfo = (); 466 $OptionsInfo{Mode} = $Options{mode}; 467 468 $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart}; 469 $OptionsInfo{StartingChainID} = $Options{chainidstart}; 470 $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0; 471 472 $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0; 473 $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0; 474 475 $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode}; 476 $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart}; 477 478 $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode}; 479 $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm}; 480 481 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; 482 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; 483 484 $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames}; 485 $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart}; 486 @{$OptionsInfo{SpecifiedWaterResiduesList}} = (); 487 %{$OptionsInfo{SpecifiedWaterResiduesMap}} = (); 488 489 my(@SpecifiedWaterResiduesList); 490 @SpecifiedWaterResiduesList = (); 491 my($WaterResidueName); 492 if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) { 493 push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O'); 494 } 495 else { 496 @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames}; 497 } 498 for $WaterResidueName (@SpecifiedWaterResiduesList) { 499 $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName; 500 } 501 push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList; 502 } 503 504 # Retrieve information about PDB files... 505 sub RetrievePDBFilesInfo { 506 my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, $Mode, $OutFileMode, @OutFileNames); 507 508 %PDBFilesInfo = (); 509 @{$PDBFilesInfo{FileOkay}} = (); 510 @{$PDBFilesInfo{OutFileRoot}} = (); 511 @{$PDBFilesInfo{OutFileNames}} = (); 512 513 FILELIST: for $Index (0 .. $#PDBFilesList) { 514 $PDBFilesInfo{FileOkay}[$Index] = 0; 515 516 $PDBFilesInfo{OutFileRoot}[$Index] = ''; 517 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); 518 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); 519 520 $PDBFile = $PDBFilesList[$Index]; 521 if (!(-e $PDBFile)) { 522 warn "Warning: Ignoring file $PDBFile: It doesn't exist\n"; 523 next FILELIST; 524 } 525 if (!CheckFileType($PDBFile, "pdb")) { 526 warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n"; 527 next FILELIST; 528 } 529 if (! open PDBFILE, "$PDBFile") { 530 warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n"; 531 next FILELIST; 532 } 533 close PDBFILE; 534 535 # Get PDB data... 536 $PDBRecordLinesRef = ReadPDBFile($PDBFile); 537 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef); 538 if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) { 539 warn "Warning: Ignoring file $PDBFile: No chains found \n"; 540 next FILELIST; 541 } 542 543 # Setup output file names... 544 @OutFileNames = (); 545 $FileDir = ""; $FileName = ""; $FileExt = ""; 546 ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile); 547 if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) { 548 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); 549 if ($RootFileName && $RootFileExt) { 550 $FileName = $RootFileName; 551 } 552 else { 553 $FileName = $OptionsInfo{OutFileRoot}; 554 } 555 $OutFileRoot = $FileName; 556 } 557 else { 558 $OutFileRoot = $FileName; 559 } 560 $Mode = $OptionsInfo{Mode}; 561 MODE: { 562 if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;} 563 if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;} 564 if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;} 565 if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;} 566 $OutFileMode = ''; 567 } 568 $OutFileName = "${OutFileRoot}${OutFileMode}.pdb"; 569 push @OutFileNames, $OutFileName; 570 571 $PDBFilesInfo{FileOkay}[$Index] = 1; 572 $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; 573 574 push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames; 575 } 576 } 577 578 # Setup script usage and retrieve command line arguments specified using various options... 579 sub SetupScriptUsage { 580 581 # Retrieve all the options... 582 %Options = (); 583 $Options{atomnumberstart} = 1; 584 $Options{chainidstart} = 'A'; 585 $Options{chainidrenameempty} = 'No'; 586 $Options{keepoldrecords} = 'no'; 587 $Options{mode} = 'RenumberResidues'; 588 $Options{modifyheader} = 'yes'; 589 $Options{residuenumbermode} = 'PerChain'; 590 $Options{residuenumberstart} = 1; 591 $Options{residuenumberhetatmmode} = 'Automatic'; 592 $Options{residuenumberstarthetatm} = 6000; 593 $Options{waterresiduenames} = 'Automatic'; 594 $Options{waterresiduestart} = 8000; 595 596 if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) { 597 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 598 } 599 if ($Options{workingdir}) { 600 if (! -d $Options{workingdir}) { 601 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 602 } 603 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 604 } 605 if (!IsPositiveInteger($Options{atomnumberstart})) { 606 die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n"; 607 } 608 if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) { 609 die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n"; 610 } 611 if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) { 612 die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n"; 613 } 614 if ($Options{keepoldrecords} !~ /^(yes|no)$/i) { 615 die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n"; 616 } 617 if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) { 618 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n"; 619 } 620 if ($Options{modifyheader} !~ /^(yes|no)$/i) { 621 die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n"; 622 } 623 if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) { 624 die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n"; 625 } 626 if (!IsPositiveInteger($Options{residuenumberstart})) { 627 die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n"; 628 } 629 if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) { 630 die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n"; 631 } 632 if (!IsPositiveInteger($Options{residuenumberstarthetatm})) { 633 die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n"; 634 } 635 if (!IsPositiveInteger $Options{waterresiduestart}) { 636 die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n"; 637 } 638 } 639