MayaChemTools

   1 package PDBFileUtil;
   2 #
   3 # $RCSfile: PDBFileUtil.pm,v $
   4 # $Date: 2008/04/19 16:11:02 $
   5 # $Revision: 1.18 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 use 5.006;
  29 use strict;
  30 use Exporter;
  31 use Text::ParseWords;
  32 use TextUtil;
  33 use FileUtil;
  34 
  35 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  36 
  37 $VERSION = '1.00';
  38 @ISA = qw(Exporter);
  39 @EXPORT = qw(GetPDBRecordType GetRecordTypesCount GetAllResidues GetConectRecordLines GetChainsAndResidues GetMinMaxCoords IsPDBFile IsAtomRecordType IsConectRecordType IsHeaderRecordType IsHetatmRecordType IsSeqresRecordType IsModelRecordType IsEndmdlRecordType IsTerRecordType IsMasterRecordType ReadPDBFile ParseHeaderRecordLine GenerateHeaderRecordLine GenerateHeaderRecordTimeStamp ParseAtomRecordLine GenerateAtomRecordLine ParseAtomOrHetatmRecordLine GenerateAtomOrHetatmRecordLine GenerateHetatmRecordLine ParseHetatmRecordLine ParseConectRecordLine GenerateConectRecordLine ParseSeqresRecordLine ParseTerRecordLine GenerateTerRecordLine ParseMasterRecordLine GenerateEndRecordLine);
  40 @EXPORT_OK = qw();
  41 
  42 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  43 
  44 # Get PDB record type...
  45 sub GetPDBRecordType {
  46   my($Line) = @_;
  47 
  48   return _GetRecordType($Line);
  49 }
  50 
  51 # Is it a PDB file?
  52 sub IsPDBFile {
  53   my($PDBFile) = @_;
  54   my($Line, $Status);
  55 
  56   $Status = 0;
  57   open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n";
  58   $Line = GetTextLine(\*PDBFILE);
  59   $Status = ($Line =~ /^HEADER/i) ? 1 : 0;
  60   close PDBFILE;
  61 
  62   return $Status;
  63 }
  64 
  65 # Is it a atom record type?
  66 sub IsAtomRecordType {
  67   my($Line) = @_;
  68 
  69   return _IsRecordType($Line, 'ATOM');
  70 }
  71 
  72 # Is it a connect record type?
  73 sub IsConectRecordType {
  74   my($Line) = @_;
  75 
  76   return _IsRecordType($Line, 'CONECT');
  77 }
  78 
  79 # Is it a header atom record type?
  80 sub IsHeaderRecordType {
  81   my($Line) = @_;
  82 
  83   return _IsRecordType($Line, 'HEADER');
  84 }
  85 
  86 # Is it a hetro atom record type?
  87 sub IsHetatmRecordType {
  88   my($Line) = @_;
  89 
  90   return _IsRecordType($Line, 'HETATM');
  91 }
  92 
  93 # Is it a seqres record type?
  94 sub IsSeqresRecordType {
  95   my($Line) = @_;
  96 
  97   return _IsRecordType($Line, 'SEQRES');
  98 }
  99 
 100 # Is it a MODEL record type?
 101 sub IsModelRecordType {
 102   my($Line) = @_;
 103 
 104   return _IsRecordType($Line, 'MODEL');
 105 }
 106 
 107 # Is it a ENDMDL record type?
 108 sub IsEndmdlRecordType {
 109   my($Line) = @_;
 110 
 111   return _IsRecordType($Line, 'ENDMDL');
 112 }
 113 
 114 # Is it a TER record type?
 115 sub IsTerRecordType {
 116   my($Line) = @_;
 117 
 118   return _IsRecordType($Line, 'TER');
 119 }
 120 
 121 # Is it a MASTER record type?
 122 sub IsMasterRecordType {
 123   my($Line) = @_;
 124 
 125   return _IsRecordType($Line, 'MASTER');
 126 }
 127 
 128 # Count the number of each record type and a reference to data type with these key/value pairs:
 129 # {RecordTypes} - An array of unique record types in order of their presence in the file
 130 # {Count}{$RecordType} - Count of each record type
 131 # {Lines}{$RecordType} - Optional lines data for a specific record type.
 132 #
 133 sub GetRecordTypesCount {
 134   my($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag, $RecordType, $RecordLine, %RecordTypeDataMap);
 135 
 136   %RecordTypeDataMap = ();
 137   @{$RecordTypeDataMap{RecordTypes}} = ();
 138   %{$RecordTypeDataMap{Count}} = ();
 139   %{$RecordTypeDataMap{Lines}} = ();
 140 
 141   $SpecifiedRecordType = '';
 142   $GetRecordLinesFlag = 0;
 143   if (@_ == 3) {
 144     ($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag) = @_;
 145     $SpecifiedRecordType = uc $SpecifiedRecordType;
 146   }
 147   elsif (@_ == 2) {
 148     ($PDBRecordLinesRef, $SpecifiedRecordType) = @_;
 149     $SpecifiedRecordType = uc $SpecifiedRecordType;
 150   }
 151   else {
 152     ($PDBRecordLinesRef) = @_;
 153   }
 154   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 155     $RecordType = _GetRecordType($RecordLine);
 156     if ($SpecifiedRecordType && ($SpecifiedRecordType ne $RecordType)) {
 157       next LINE;
 158     }
 159     if (exists $RecordTypeDataMap{Count}{$RecordType}) {
 160       # Update count...
 161       $RecordTypeDataMap{Count}{$RecordType} += 1;
 162 
 163       if ($GetRecordLinesFlag) {
 164 	push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine;
 165       }
 166     }
 167     else {
 168       # New record type...
 169       push @{$RecordTypeDataMap{RecordTypes}}, $RecordType;
 170       $RecordTypeDataMap{Count}{$RecordType} = 1;
 171 
 172       if ($GetRecordLinesFlag) {
 173 	@{$RecordTypeDataMap{Lines}{$RecordType}} = ();
 174 	push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine;
 175       }
 176     }
 177   }
 178   return (\%RecordTypeDataMap);
 179 }
 180 
 181 # Collect CONECT record lines for specific atom number, modified specified data to exclude any atom
 182 # number not present in the list of specified atom numbers and return a reference to list of
 183 # CONECT record lines.
 184 #
 185 sub GetConectRecordLines {
 186   my($PDBRecordLinesRef, $AtomNumbersMapRef) = @_;
 187   my($AtomNumber, $ConectAtomNumber, $RecordLine, @ConectRecordAtomNums, @ConectRecordLines);
 188 
 189   @ConectRecordLines = ();
 190   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 191     if (!IsConectRecordType($RecordLine)) {
 192       next LINE;
 193     }
 194     @ConectRecordAtomNums = ();
 195     push @ConectRecordAtomNums, ParseConectRecordLine($RecordLine);
 196     ATOMNUMBER: for $ConectAtomNumber (@ConectRecordAtomNums) {
 197       if (defined $ConectAtomNumber) {
 198 	$AtomNumber = $ConectAtomNumber;
 199 	if ($AtomNumber) {
 200 	  if (! exists $AtomNumbersMapRef->{$AtomNumber}) {
 201 	    next LINE;
 202 	  }
 203 	}
 204       }
 205     }
 206     push @ConectRecordLines, $RecordLine;
 207   }
 208   return \@ConectRecordLines;
 209 }
 210 
 211 # Get chains and residue information using ATOM/HETATM or SEQRES records. And return a reference to a
 212 #  hash with these keys:
 213 #
 214 # @{$ChainsDataMap{ChainIDs}} - List of chain IDs with 'None' for no chain identification
 215 # @{$ChainsDataMap{Residues}{$ChainID}} - List of residues in order of their appearance in a chain
 216 # %{$ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}} - Count of specific residues in a chain
 217 #
 218 # Notes:
 219 #  . Chains and residue data can be extacted using either ATOM/HETATM records or SEQRES records.
 220 #  . In addition to a different chain ID in ATOM/HETATM a TER record also indicates end of an existing chain
 221 #    and start of a new one: ChainID in ATOM/HETATM records might still be emtpy.
 222 #   . ATOM/HETATM records after the first ENDMDL records are simply ingnored.
 223 #
 224 sub GetChainsAndResidues {
 225   my($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag);
 226 
 227   $RecordsSource = 'AtomAndHetatm';
 228   $GetChainResiduesBeyondTERFlag = 0;
 229   $GetRecordLinesFlag = 0;
 230 
 231   if (@_ == 4) {
 232     ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_;
 233   }
 234   elsif (@_ == 3) {
 235     ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag) = @_;
 236   }
 237   elsif (@_ == 2) {
 238     ($PDBRecordLinesRef, $RecordsSource) = @_;
 239   }
 240   else {
 241     ($PDBRecordLinesRef) = @_;
 242   }
 243 
 244   if ($RecordsSource =~ /^AtomAndHetatm$/i) {
 245     return _GetChainsAndResiduesFromAtomHetatmRecords($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag);
 246   }
 247   elsif ($RecordsSource =~ /^Seqres$/i) {
 248     return _GetChainsAndResiduesFromSeqresRecords($PDBRecordLinesRef);
 249   }
 250   else {
 251     my(%ChainsDataMap);
 252     %ChainsDataMap = ();
 253     @{$ChainsDataMap{ChainIDs}} = ();
 254     %{$ChainsDataMap{Residues}} = ();
 255     %{$ChainsDataMap{ResidueCount}} = ();
 256 
 257     return \%ChainsDataMap;
 258   }
 259 }
 260 
 261 
 262 # Get residue information using ATOM/HETATM records and return a reference to a hash with
 263 # these keys:
 264 #
 265 # @{$ResiduesDataMap{ResidueNames}} - List of all the residues
 266 # %{$ResiduesDataMap{ResidueCount}{$ResidueName}} - Count of residues
 267 # @{$ResiduesDataMap{AtomResidueNames}} - List of all the residues
 268 # %{$ResiduesDataMap{AtomResidueCount}{$ResidueName}} - Count of residues in ATOM records
 269 # @{$ResiduesDataMap{HetatomResidueNames}} - List of all the residues
 270 # %{$ResiduesDataMap{HetatmResidueCount}{$ResidueName}} - Count of residues HETATM records
 271 #
 272 # Notes:
 273 #  . ATOM/HETATM records after the first ENDMDL records are simply ingnored.
 274 #
 275 sub GetAllResidues {
 276   my($PDBRecordLinesRef) = @_;
 277 
 278   my($PreviousChainID, $PreviousResidueNumber, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ResiduesDataMap);
 279 
 280   %ResiduesDataMap = ();
 281   @{$ResiduesDataMap{ResidueNames}} = ();
 282   %{$ResiduesDataMap{ResidueCount}} = ();
 283   @{$ResiduesDataMap{AtomResidueNames}} = ();
 284   %{$ResiduesDataMap{AtomResidueCount}} = ();
 285   @{$ResiduesDataMap{HetatmResidueNames}} = ();
 286   %{$ResiduesDataMap{HetatmResidueCount}} = ();
 287 
 288   $PreviousChainID = '';
 289   $PreviousResidueNumber = 0;
 290 
 291   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 292     if (IsEndmdlRecordType($RecordLine)) {
 293       last LINE;
 294     }
 295     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 296       next LINE;
 297     }
 298     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 299 
 300     if ($PreviousChainID eq $ChainID) {
 301       if ($ResidueNumber == $PreviousResidueNumber) {
 302 	next LINE;
 303       }
 304       $PreviousResidueNumber = $ResidueNumber;
 305     }
 306     else {
 307       # New chain...
 308       $PreviousChainID = $ChainID;
 309       $PreviousResidueNumber = $ResidueNumber;
 310     }
 311 
 312     # Store the residue and update its count...
 313     push @{$ResiduesDataMap{ResidueNames}}, $ResidueName;
 314     if (exists $ResiduesDataMap{ResidueCount}{$ResidueName}) {
 315       $ResiduesDataMap{ResidueCount}{$ResidueName} += 1;
 316     }
 317     else {
 318       $ResiduesDataMap{ResidueCount}{$ResidueName} = 1;
 319     }
 320     # Update ATOM residue data...
 321     if (IsAtomRecordType($RecordLine)) {
 322       push @{$ResiduesDataMap{AtomResidueNames}}, $ResidueName;
 323       if (exists $ResiduesDataMap{AtomResidueCount}{$ResidueName}) {
 324 	$ResiduesDataMap{AtomResidueCount}{$ResidueName} += 1;
 325       }
 326       else {
 327 	$ResiduesDataMap{AtomResidueCount}{$ResidueName} = 1;
 328       }
 329     }
 330     # Update HETATM residue data...
 331     if (IsHetatmRecordType($RecordLine)) {
 332       push @{$ResiduesDataMap{HetatmResidueNames}}, $ResidueName;
 333       if (exists $ResiduesDataMap{HetatmResidueCount}{$ResidueName}) {
 334 	$ResiduesDataMap{HetatmResidueCount}{$ResidueName} += 1;
 335       }
 336       else {
 337 	$ResiduesDataMap{HetatmResidueCount}{$ResidueName} = 1;
 338       }
 339     }
 340   }
 341 
 342   return \%ResiduesDataMap;
 343 }
 344 
 345 # Return min/max XYZ coordinates for ATOM/HETATM records...
 346 sub GetMinMaxCoords {
 347   my($PDBRecordLinesRef) = @_;
 348 
 349   my($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 350 
 351   ($XMin, $YMin, $ZMin) = (99999) x 3;
 352   ($XMax, $YMax, $ZMax) = (-99999) x 3;
 353 
 354   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 355     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 356       next LINE;
 357     }
 358     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 359 
 360     $XMin = ($X < $XMin) ? $X : $XMin;
 361     $YMin = ($Y < $YMin) ? $Y : $YMin;
 362     $ZMin = ($Z < $ZMin) ? $Z : $ZMin;
 363 
 364     $XMax = ($X > $XMax) ? $X : $XMax;
 365     $YMax = ($Y > $YMax) ? $Y : $YMax;
 366     $ZMax = ($Z > $ZMax) ? $Z : $ZMax;
 367   }
 368 
 369   if ($XMin == 99999) { $XMin = undef; }
 370   if ($YMin == 99999) { $YMin = undef; }
 371   if ($ZMin == 99999) { $ZMin = undef; }
 372   if ($XMax == -99999) { $XMax = undef; }
 373   if ($YMax == -99999) { $YMax = undef; }
 374   if ($ZMax == -99999) { $ZMax = undef; }
 375 
 376   return ($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax);
 377 }
 378 
 379 # Read PDB file and return reference to record lines..
 380 sub ReadPDBFile {
 381   my($PDBFile) = @_;
 382 
 383   my($Line, @PDBRecordLines);
 384 
 385   @PDBRecordLines = ();
 386   open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n";
 387   while ($Line = GetTextLine(\*PDBFILE)) {
 388     push @PDBRecordLines, $Line;
 389   }
 390 
 391   close PDBFILE;
 392 
 393   return (\@PDBRecordLines);
 394 }
 395 
 396 #
 397 # Parse HEADER record line...
 398 sub ParseHeaderRecordLine {
 399   my($Line) = @_;
 400   my($Classification, $DepositionDate, $IDCode) = (undef, undef, undef);
 401 
 402   if ($Line !~ /^HEADER/i) {
 403     return ($Classification, $DepositionDate, $IDCode);
 404   }
 405   ($Classification, $DepositionDate, $IDCode) = unpack("x10A40A9x3A4", $Line);
 406   $Classification = RemoveLeadingAndTrailingWhiteSpaces($Classification);
 407   $DepositionDate =~ s/ //g;
 408   $IDCode =~ s/ //g;
 409 
 410   return ($Classification, $DepositionDate, $IDCode);
 411 }
 412 
 413 #
 414 # Generate HEADER record line...
 415 sub GenerateHeaderRecordLine {
 416   my($Classification, $Date, $IDCode, $Line);
 417 
 418   $Classification = "Created using MayaChemTools";
 419   $Date = GenerateHeaderRecordTimeStamp();
 420   if (@_ == 3) {
 421     ($IDCode, $Classification, $Date) = @_;
 422   }
 423   elsif (@_ == 2) {
 424     ($IDCode, $Classification) = @_;
 425   }
 426   elsif (@_ == 1) {
 427     ($IDCode) = @_;
 428   }
 429 
 430   $Line = sprintf "HEADER    %-40.40s%9.9s   %4.4s", $Classification, $Date, $IDCode;
 431   return $Line;
 432 }
 433 
 434 # Generate PDB header time stamp...
 435 sub GenerateHeaderRecordTimeStamp {
 436   my($Date, $Sec, $Min, $Hour, $MDay, $Mon, $Year, $WDay, $YDay, $IsDst);
 437   my($MonthName, %MonthNameHash);
 438 
 439   %MonthNameHash = (1 => 'JAN', 2 => 'FEB', 3 => 'MAR', 4 => 'APR', 5 => 'MAY', 6 => 'JUN', 7 => 'JUL', 8 => 'AUG', 9 => 'SEP', 10 => 'OCT', 11 => 'NOV', 12 => 'DEC');
 440 
 441   ($Sec, $Min, $Hour, $MDay, $Mon, $Year, $WDay, $YDay, $IsDst) = localtime;
 442 
 443   $Mon += 1;
 444   $MonthName = $MonthNameHash{$Mon};
 445 
 446   $Year = substr($Year, -1, 2);
 447 
 448   $Date = sprintf "%2i-%3s-%2i", $MDay, $MonthName, $Year;
 449   $Date =~ s/ /0/g;
 450 
 451   return $Date;
 452 }
 453 
 454 #
 455 # Parse ATOM record line.
 456 #
 457 sub ParseAtomRecordLine {
 458   my($Line) = @_;
 459 
 460   return _ParseAtomOrHetatmRecordLine($Line);
 461 }
 462 
 463 # Generate ATOM record line...
 464 sub GenerateAtomRecordLine {
 465   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 466 
 467   return _GenerateAtomOrHetatmRecordLine('ATOM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 468 }
 469 
 470 #
 471 # Parse ATOM/HETATm record line.
 472 #
 473 sub ParseAtomOrHetatmRecordLine {
 474   my($Line) = @_;
 475 
 476   return _ParseAtomOrHetatmRecordLine($Line);
 477 }
 478 
 479 # Generate ATOM/HETATM record line...
 480 sub GenerateAtomOrHetatmRecordLine {
 481   my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 482 
 483   return _GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 484 }
 485 #
 486 # Parse HETATM record line...
 487 #
 488 sub ParseHetatmRecordLine {
 489   my($Line) = @_;
 490 
 491   return _ParseAtomOrHetatmRecordLine($Line);
 492 }
 493 
 494 # Generate HETATM record line...
 495 sub GenerateHetatmRecordLine {
 496   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 497 
 498   return _GenerateAtomOrHetatmRecordLine('HETATM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 499 }
 500 #
 501 # Parse SEQRES record line...
 502 #
 503 # SEQRES format:
 504 #
 505 # 1 - 6 Record name "SEQRES"
 506 # 9 - 10 Serial number of the SEQRES record for the current chain. Starts at 1 and increments by one each line. Reset to 1 for each chain.
 507 # 12 - Chain identifier
 508 # 14 - 17 Integer numRes Number of residues in the chain
 509 # 20 - 22 24 -26 ... ... 68 - 70 Residue name resName Residue name.
 510 #
 511 sub ParseSeqresRecordLine {
 512   my($Line) = @_;
 513 
 514   if ($Line !~ /^SEQRES/i) {
 515     return ((undef) x 5);
 516   }
 517   my($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames) = unpack("x8A2x1A1x1A4x2A51" , $Line);
 518   $RecordSerialNumber =~ s/ //g;
 519   $ChainID =~ s/ //g;
 520   $NumOfResidues =~ s/ //g;
 521   $ResidueNames = RemoveLeadingAndTrailingWhiteSpaces($ResidueNames);
 522 
 523   return ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames);
 524 }
 525 
 526 #
 527 # Parse CONECT record line...
 528 #
 529 # CONECT format:
 530 #
 531 # 1 - 6 Record name "CONECT"
 532 # 7 - 11 Atom number
 533 # 12 - 16, 17 - 21, 22 - 26, 27 - 31 Atom number of bonded atom
 534 #
 535 # 32 - 36, 37 - 41 Atom number of hydrogen bonded atom
 536 # 42 - 46 Atom number of salt bridged atom
 537 # 47 - 51, 52 -56 Atom number of hydrogen bonded atom
 538 # 57 - 61 Atom number of salt bridged atom
 539 #
 540 sub ParseConectRecordLine {
 541   my($Line) = @_;
 542 
 543   if ($Line !~ /^CONECT/i) {
 544     return ((undef) x 11);
 545   }
 546   my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = map {s/ //g; $_} unpack("x6A5A5A5A5A5A5A5A5A5A5A5", $Line);
 547 
 548   return ($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2);
 549 }
 550 
 551 # Generate CONECT record line...
 552 sub GenerateConectRecordLine {
 553   my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = @_;
 554   my($Line);
 555 
 556   $Line = sprintf "CONECT%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s", $AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2;
 557 
 558   return $Line;
 559 }
 560 
 561 #
 562 # Parse TER record line...
 563 #
 564 # TER format:
 565 #
 566 #1 - 6 Record name "TER "
 567 # 7 - 11 Serial number
 568 # 18 - 20 Residue name
 569 # 22 Chain identifier
 570 # 23 - 26 Residue sequence number
 571 # 27 Insertion code
 572 #
 573 sub ParseTerRecordLine {
 574   my($Line) = @_;
 575 
 576   if ($Line !~ /^TER/i) {
 577     return ((undef) x 5);
 578   }
 579   my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3xA1A4A1", $Line);
 580 
 581   return ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode);
 582 }
 583 
 584 # Generate TER record line...
 585 sub GenerateTerRecordLine {
 586   my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Line) = ('') x 6;
 587 
 588   if (@_ == 5) {
 589     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = @_;
 590   }
 591   elsif (@_ == 4) {
 592     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber) = @_;
 593   }
 594   elsif (@_ == 3) {
 595     ($SerialNumber, $ResidueName) = @_;
 596   }
 597   elsif (@_ == 2) {
 598     ($SerialNumber, $ResidueName) = @_;
 599   }
 600   elsif (@_ == 1) {
 601     ($SerialNumber) = @_;
 602   }
 603   $Line = sprintf "TER   %5.5s      %-3.3s %1.1s%4.4s%1.1s", $SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode;
 604 
 605   return $Line;
 606 }
 607 
 608 #
 609 # Parse MASTER record line...
 610 #
 611 # MASTER record format:
 612 #
 613 #1 - 6 Record name "MASTER"
 614 # 11 - 15 Number of REMARK records
 615 # 16 - 20 "0"
 616 # 21 - 25 Number of HET records
 617 # 26 - 30 Number of HELIX records
 618 # 31 - 35 Number of SHEET records
 619 # 36 - 40 Number of TURN records
 620 # 41 - 45 Number of SITE records
 621 # 46 - 50 Number of coordinate transformation records (ORIGXn+SCALEn+MTRIXn)
 622 # 51 - 55 Number of atomic coordinate records (ATOM+HETATM)
 623 # 56 - 60 Number of TER records
 624 # 61 - 65 Number of CONECT records
 625 # 66 - 70 Number of SEQRES records
 626 #
 627 sub ParseMasterRecordLine {
 628   my($Line) = @_;
 629 
 630   if ($Line !~ /^MASTER/i) {
 631     return ((undef) x 11);
 632   }
 633   my($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = map {s/ //g; $_} unpack("x6x4A5x5A5A5A5A5A5A5A5A5A5A5", $Line);
 634 
 635   return ($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords);
 636 }
 637 
 638 # End record...
 639 sub GenerateEndRecordLine {
 640   my($Line);
 641   $Line = 'END   ';
 642   return $Line;
 643 }
 644 
 645 # ATOM/HETATM record format:
 646 #
 647 # 1 - 6 Record name
 648 # 7 - 11  Atom serial number - right justified
 649 # 13 - 16 Atom name
 650 # 17 Alternate location indicator.
 651 # 18 - 20 Residue name - right justified
 652 # 22 Chain identifier.
 653 # 23 - 26 Residue sequence number - right justified
 654 # 27 Code for insertion of residues.
 655 # 31 - 38 Real(8.3), Orthogonal coordinates for X in Angstroms.
 656 # 39 - 46 Real(8.3), Orthogonal coordinates for Y in Angstroms.
 657 # 47 - 54 Real(8.3), Orthogonal coordinates for Z in Angstroms.
 658 # 55 - 60 Real(6.2), Occupancy
 659 # 61 - 66 Real(6.2), Temperature factor
 660 # 73 - 76 LString(4), Segment identifier, left-justified.
 661 # 77 - 78 LString(2), Element symbol, right-justified.
 662 #79 - 80 LString(2), Charge on the atom.
 663 #
 664 # Notes:
 665 #  . Atom names starting with C, N, O and S are left justified starting with column 14
 666 #    and others are left justified starting with column 13.
 667 #
 668 sub _ParseAtomOrHetatmRecordLine {
 669   my($Line) = @_;
 670 
 671   if ($Line !~ /^(ATOM|HETATM)/i) {
 672     return ((undef) x 15);
 673   }
 674   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6x6A4A2A2", $Line);
 675 
 676   return($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 677 }
 678 
 679 # Generate ATOM/HETATM record line...
 680 sub _GenerateAtomOrHetatmRecordLine {
 681   my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 682   my($Line, $AtomNameFormat);
 683 
 684   if ($AtomName =~ /^(C|N|O|S)/i) {
 685     # Left justified starting at column 14...
 686     $AtomNameFormat = " %-3.3s";
 687   }
 688   else {
 689     $AtomNameFormat = "%-4.4s";
 690   }
 691   $Line = sprintf "%-6.6s%5.5s ${AtomNameFormat}%1.1s%3.3s %1.1s%4.4s%1.1s   %8.8s%8.8s%8.8s%6.6s%6.6s      %-4.4s%2.2s%2.2s", $RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge;
 692 
 693   return $Line;
 694 }
 695 
 696 # Check record type...
 697 sub _IsRecordType {
 698   my($Line, $SpecifiedType) = @_;
 699   my($Type, $Status);
 700 
 701   ($Type) = map {s/ //g; $_} unpack("A6", $Line);
 702 
 703   $Status = ($SpecifiedType eq $Type) ? 1 : 0;
 704 
 705   return $Status;
 706 }
 707 
 708 # Get record type...
 709 sub _GetRecordType {
 710   my($Line) = @_;
 711   my($Type);
 712 
 713   ($Type) = map {s/ //g; $_} unpack("A6", $Line);
 714 
 715   return $Type;
 716 }
 717 
 718 # Get chains and residues data using ATOM/HETATM records...
 719 #
 720 sub _GetChainsAndResiduesFromAtomHetatmRecords {
 721   my($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_;
 722 
 723   my($LineCount, $TotalChainCount, $PreviousResidueNumber, $ChainCount, $DefaultChainID, $DefaultChainLabel, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ChainsDataMap);
 724 
 725   # Do a quick chain count using TER record...
 726   $TotalChainCount = 0;
 727   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 728     if (IsEndmdlRecordType($RecordLine)) {
 729       last LINE;
 730     }
 731     if (IsTerRecordType($RecordLine)) {
 732       $TotalChainCount++;
 733     }
 734   }
 735 
 736   %ChainsDataMap = ();
 737   @{$ChainsDataMap{ChainIDs}} = ();
 738   %{$ChainsDataMap{Residues}} = ();
 739   %{$ChainsDataMap{Lines}} = ();
 740   %{$ChainsDataMap{ResidueCount}} = ();
 741 
 742   $LineCount = 0;
 743   $ChainCount = 0;
 744   $DefaultChainLabel = 'None';
 745   $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
 746   $PreviousResidueNumber = 0;
 747 
 748   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 749       $LineCount++;
 750       if (IsTerRecordType($RecordLine)) {
 751 	$DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
 752 	$ChainCount++;
 753 	if ($ChainCount == $TotalChainCount) {
 754 	  last LINE;
 755 	}
 756 	else {
 757 	  next LINE;
 758 	}
 759       }
 760       elsif (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 761 	next LINE;
 762       }
 763       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 764 
 765       if (IsEmpty($ChainID)) {
 766 	$ChainID = $DefaultChainID;
 767       }
 768       if (exists $ChainsDataMap{Residues}{$ChainID}) {
 769 	# Data for existing chain...
 770 	if ($GetRecordLinesFlag) {
 771 	  push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
 772 	}
 773 
 774 	if ($ResidueNumber != $PreviousResidueNumber) {
 775 	  # Next residue with in the chain...
 776 	  push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
 777 
 778 	  if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
 779 	    $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
 780 	  }
 781 	  else {
 782 	    $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
 783 	  }
 784 	  $PreviousResidueNumber = $ResidueNumber;
 785 	}
 786       }
 787       else {
 788 	# Data for new chain...
 789 	push @{$ChainsDataMap{ChainIDs}}, $ChainID;
 790 
 791 	@{$ChainsDataMap{Residues}{$ChainID}} = ();
 792 	push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
 793 
 794 	@{$ChainsDataMap{Lines}{$ChainID}} = ();
 795 	if ($GetRecordLinesFlag) {
 796 	  push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
 797 	}
 798 
 799 	%{$ChainsDataMap{ResidueCount}{$ChainID}} = ();
 800 	$ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
 801 	$PreviousResidueNumber = $ResidueNumber;
 802       }
 803   }
 804   if (!$GetChainResiduesBeyondTERFlag) {
 805     return \%ChainsDataMap;
 806   }
 807   # Look for any HETATM residues specified outside TER records which could belong to an existing chain...
 808   my($LineIndex, $PreviousChainID);
 809   $PreviousChainID = '';
 810   $PreviousResidueNumber = 0;
 811   LINE: for $LineIndex (($LineCount - 1) .. $#{$PDBRecordLinesRef}) {
 812     $RecordLine = $PDBRecordLinesRef->[$LineIndex];
 813     if (IsEndmdlRecordType($RecordLine)) {
 814       last LINE;
 815     }
 816     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 817       next LINE;
 818     }
 819     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 820     if (IsEmpty($ChainID)) {
 821       # Ignore the chains with no ids...
 822       next LINE;
 823     }
 824     if (! exists($ChainsDataMap{Residues}{$ChainID})) {
 825       # Don't collect any new chains after TER record...
 826       next LINE;
 827     }
 828     if ($GetRecordLinesFlag) {
 829       push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
 830     }
 831     if ($ResidueNumber != $PreviousResidueNumber || $ChainID ne $PreviousChainID) {
 832 
 833       push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
 834 
 835       if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
 836 	$ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
 837       }
 838       else {
 839 	$ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
 840       }
 841       $PreviousChainID = $ChainID;
 842       $PreviousResidueNumber = $ResidueNumber;
 843     }
 844   }
 845   return \%ChainsDataMap;
 846 }
 847 
 848 # Get chains and residues data using SEQRES records...
 849 #
 850 sub _GetChainsAndResiduesFromSeqresRecords {
 851   my($PDBRecordLinesRef) = @_;
 852 
 853   my($ChainCount, $DefaultChainLabel, $DefaultChainID, $RecordLine, $RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueName, $ResidueNamesString, @ResidueNamesList, %ChainsDataMap);
 854 
 855   %ChainsDataMap = ();
 856   @{$ChainsDataMap{ChainIDs}} = ();
 857   %{$ChainsDataMap{Residues}} = ();
 858   %{$ChainsDataMap{ResidueCount}} = ();
 859 
 860   $ChainCount = 0;
 861   $DefaultChainLabel = 'None';
 862   $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
 863 
 864   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 865       if (!IsSeqresRecordType($RecordLine)) {
 866 	next LINE;
 867       }
 868       ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNamesString) = ParseSeqresRecordLine($RecordLine);
 869       if ($RecordSerialNumber == 1) {
 870 	# Indicates start of a new chain...
 871 	$DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
 872 	$ChainCount++;
 873       }
 874       if (IsEmpty($ChainID)) {
 875 	$ChainID = $DefaultChainID;
 876       }
 877       # Process the residues...
 878       @ResidueNamesList = split /[ ]+/, $ResidueNamesString;
 879 
 880       if (exists $ChainsDataMap{Residues}{$ChainID}) {
 881 	# Data for existing chain...
 882 	push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList;
 883       }
 884       else {
 885 	# Data for new chain...
 886 	push @{$ChainsDataMap{ChainIDs}}, $ChainID;
 887 	@{$ChainsDataMap{Residues}{$ChainID}} = ();
 888 	push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList;
 889       }
 890 
 891       # Setup residue count...
 892       for $ResidueName (@ResidueNamesList) {
 893 	if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
 894 	  $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
 895 	}
 896 	else {
 897 	  $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
 898 	}
 899       }
 900   }
 901   return \%ChainsDataMap;
 902 }
 903