1 #!/usr/bin/perl -w 2 # 3 # $RCSfile: InfoPDBFiles.pl,v $ 4 # $Date: 2008/02/24 18:13:55 $ 5 # $Revision: 1.21 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use 5.006; 30 use strict; 31 use FindBin; use lib "$FindBin::Bin/../lib"; 32 use Getopt::Long; 33 use File::Basename; 34 use Text::ParseWords; 35 use Benchmark; 36 use FileUtil; 37 use TextUtil; 38 use PDBFileUtil; 39 40 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 41 42 # Autoflush STDOUT 43 $| = 1; 44 45 # Starting message... 46 $ScriptName = basename($0); 47 print "\n$ScriptName: Starting...\n\n"; 48 $StartTime = new Benchmark; 49 50 # Get the options and setup script... 51 SetupScriptUsage(); 52 if ($Options{help} || @ARGV < 1) { 53 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 54 } 55 56 my(@PDBFilesList); 57 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb"); 58 59 # Process options... 60 my(%OptionsInfo); 61 ProcessOptions(); 62 63 # Setup information about input files... 64 my(%PDBFilesInfo); 65 RetrievePDBFilesInfo(); 66 67 # Process input files.. 68 my($FileIndex, $PDBFile, $FileProcessingMsg); 69 $FileProcessingMsg = "Processing file"; 70 if (@PDBFilesList > 1) { 71 print "Processing PDB files...\n"; 72 $FileProcessingMsg = "\n$FileProcessingMsg"; 73 } 74 75 for $FileIndex (0 .. $#PDBFilesList) { 76 if ($PDBFilesInfo{FileOkay}[$FileIndex]) { 77 $PDBFile = $PDBFilesList[$FileIndex]; 78 print "$FileProcessingMsg $PDBFile...\n"; 79 ListPDBFileInfo($FileIndex); 80 } 81 } 82 83 ListTotalSizeOfFiles(); 84 85 print "$ScriptName:Done...\n\n"; 86 87 $EndTime = new Benchmark; 88 $TotalTime = timediff ($EndTime, $StartTime); 89 print "Total time: ", timestr($TotalTime), "\n"; 90 91 ############################################################################### 92 93 # List appropriate information... 94 sub ListPDBFileInfo { 95 my($Index) = @_; 96 my($PDBFile, $PDBRecordLinesRef); 97 98 $PDBFile = $PDBFilesList[$Index]; 99 $PDBRecordLinesRef = ReadPDBFile($PDBFile); 100 101 # Header informaton... 102 if ($OptionsInfo{ListHeaderInfo}) { 103 ListHeaderInfo($PDBRecordLinesRef); 104 } 105 106 # Total number of records... 107 my($TotalRecordsCount) = scalar @{$PDBRecordLinesRef}; 108 print "\nTotal number of records: $TotalRecordsCount\n"; 109 110 # List record type count information... 111 ListRecordTypesInfo($PDBRecordLinesRef); 112 113 if ($OptionsInfo{CountChains} || $OptionsInfo{CountResiduesInChains} || $OptionsInfo{ResiduesFrequencyInChains}) { 114 ListChainsAndResiduesInfo($PDBRecordLinesRef); 115 } 116 if ($OptionsInfo{CountResiduesAll} || $OptionsInfo{ResiduesFrequencyAll}) { 117 ListAllResiduesInfo($PDBRecordLinesRef); 118 } 119 if ($OptionsInfo{CalculateBoundingBox}) { 120 ListBoundingBox($PDBRecordLinesRef); 121 } 122 123 # File size and modification information... 124 print "\nFile size: ", FormatFileSize($PDBFilesInfo{FileSize}[$Index]), " \n"; 125 print "Last modified: ", $PDBFilesInfo{FileLastModified}[$Index], " \n"; 126 } 127 128 sub ListHeaderInfo { 129 my($PDBRecordLinesRef) = @_; 130 my($HeaderRecordLine, $Classification, $DepositionDate, $IDCode); 131 132 ($Classification, $DepositionDate, $IDCode) = (undef) x 3; 133 $HeaderRecordLine = $PDBRecordLinesRef->[0]; 134 if (IsHeaderRecordType($HeaderRecordLine)) { 135 ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine); 136 } 137 138 $Classification = IsEmpty($Classification) ? 'Not available' : $Classification; 139 $DepositionDate = IsEmpty($DepositionDate) ? 'Not available' : $DepositionDate; 140 $IDCode = IsEmpty($IDCode) ? 'Not available' : $IDCode; 141 142 print "\nClassification: $Classification\nID: $IDCode\nDeposition date: $DepositionDate\n"; 143 } 144 145 # List record type info... 146 sub ListRecordTypesInfo { 147 my($PDBRecordLinesRef) = @_; 148 my($RecordType, $RecordCount, $RecordTypesCountRef, @RecordTypeCountInfo); 149 150 $RecordTypesCountRef = GetRecordTypesCount($PDBRecordLinesRef); 151 152 @RecordTypeCountInfo = (); 153 if ($OptionsInfo{CountRecordType} =~ /^All$/i) { 154 for $RecordType (@{$RecordTypesCountRef->{RecordTypes}}) { 155 $RecordCount = $RecordTypesCountRef->{Count}{$RecordType}; 156 push @RecordTypeCountInfo, "$RecordType - $RecordCount"; 157 } 158 } 159 else { 160 for $RecordType (@{$OptionsInfo{SpecifiedRecordTypes}}) { 161 $RecordCount = (exists $RecordTypesCountRef->{Count}{$RecordType}) ? ($RecordTypesCountRef->{Count}{$RecordType}) : 0; 162 push @RecordTypeCountInfo, "$RecordType - $RecordCount"; 163 } 164 } 165 print "Number of individual records: ", JoinWords(\@RecordTypeCountInfo, '; ', 0), "\n"; 166 167 if ($OptionsInfo{CheckMasterRecord}) { 168 CheckMasterRecord($RecordTypesCountRef, $PDBRecordLinesRef); 169 } 170 } 171 172 # List information about residues and chains... 173 sub ListChainsAndResiduesInfo { 174 my($PDBRecordLinesRef) = @_; 175 my($ResidueName, $ResidueCount, $ChainCount, $ChainID, $CollectChainResiduesBeyondTER, $ChainsAndResiduesInfoRef); 176 177 $CollectChainResiduesBeyondTER = 1; 178 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'AtomAndHetatm', $CollectChainResiduesBeyondTER); 179 $ChainCount = @{$ChainsAndResiduesInfoRef->{ChainIDs}}; 180 if ($OptionsInfo{CountChains}) { 181 print "\nNumber of chains: $ChainCount \n"; 182 my($ChainID, @ChainIDsList); 183 @ChainIDsList = (); 184 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) { 185 push @ChainIDsList, CleanupChainID($ChainID); 186 } 187 print "Chain IDs: ", JoinWords(\@ChainIDsList, ', ', 0),"\n"; 188 } 189 190 if ($OptionsInfo{CountResiduesInChains}) { 191 my($TotalResiduesCount, $ResidueCountInfo, @ResiduesCountInfo); 192 @ResiduesCountInfo = (); 193 $TotalResiduesCount = 0; 194 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) { 195 $ResidueCount = @{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}}; 196 $TotalResiduesCount += $ResidueCount; 197 $ResidueCountInfo = "Chain ${ChainID} - ${ResidueCount}"; 198 push @ResiduesCountInfo, $ResidueCountInfo; 199 } 200 print "\nNumber of residues in chain(s): "; 201 if ($ChainCount > 1) { 202 print "Total - $TotalResiduesCount; ", JoinWords(\@ResiduesCountInfo, '; ', 0),"\n"; 203 } 204 else { 205 print "$TotalResiduesCount\n"; 206 } 207 208 # List of residues in each chain... 209 if ($OptionsInfo{DetailLevel} >= 3) { 210 print "List of residues in chain(s): \n"; 211 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) { 212 if ($ChainCount > 1) { 213 print "Chain ", CleanupChainID($ChainID), ": "; 214 } 215 print JoinWords(\@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}}, ', ', 0),"\n"; 216 } 217 } 218 } 219 if ($OptionsInfo{ResiduesFrequencyInChains}) { 220 # Setup a hash using residue count as key for sorting the values... 221 my(%ResiduesCountToNameMap); 222 %ResiduesCountToNameMap = (); 223 @{$ResiduesCountToNameMap{ChainIDs}} = (); 224 %{$ResiduesCountToNameMap{ResidueNames}} = (); 225 226 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) { 227 push @{$ResiduesCountToNameMap{ChainIDs}}, $ChainID; 228 %{$ResiduesCountToNameMap{ResidueNames}{$ChainID}} = (); 229 230 for $ResidueName (sort keys %{$ChainsAndResiduesInfoRef->{ResidueCount}{$ChainID}}) { 231 $ResidueCount = $ChainsAndResiduesInfoRef->{ResidueCount}{$ChainID}{$ResidueName}; 232 # Setup count value for each chain... 233 if (exists $ResiduesCountToNameMap{ResidueNames}{$ChainID}{$ResidueCount}) { 234 $ResiduesCountToNameMap{ResidueNames}{$ChainID}{$ResidueCount} .= "~${ResidueName}"; 235 } 236 else { 237 $ResiduesCountToNameMap{ResidueNames}{$ChainID}{$ResidueCount} = $ResidueName; 238 } 239 } 240 } 241 # Collect data for all the residues in all the chains... 242 my(%AllResiduesNameToCountMap, %AllResiduesCountToNameMap); 243 %AllResiduesNameToCountMap = (); 244 %AllResiduesCountToNameMap = (); 245 if ($ChainCount > 1) { 246 for $ChainID (@{$ChainsAndResiduesInfoRef->{ChainIDs}}) { 247 for $ResidueName (keys %{$ChainsAndResiduesInfoRef->{ResidueCount}{$ChainID}}) { 248 $ResidueCount = $ChainsAndResiduesInfoRef->{ResidueCount}{$ChainID}{$ResidueName}; 249 if (exists $AllResiduesNameToCountMap{$ResidueName}) { 250 $AllResiduesNameToCountMap{$ResidueName} += $ResidueCount; 251 } 252 else { 253 $AllResiduesNameToCountMap{$ResidueName} = $ResidueCount; 254 } 255 } 256 } 257 for $ResidueName (keys %AllResiduesNameToCountMap) { 258 $ResidueCount = $AllResiduesNameToCountMap{$ResidueName}; 259 if (exists $AllResiduesCountToNameMap{$ResidueCount}) { 260 $AllResiduesCountToNameMap{$ResidueCount} .= "~${ResidueName}"; 261 } 262 else { 263 $AllResiduesCountToNameMap{$ResidueCount} = $ResidueName; 264 } 265 } 266 } 267 268 # Setup distribution data for individual chains and the grand total as well... 269 my($ChainResidueCount, $PercentResidueCount, $TotalResidueCount, $ResidueNames, @ResidueNamesList, %ResiduesFrequencyInfoMap); 270 @{$ResiduesFrequencyInfoMap{ChainIDs}} = (); 271 %{$ResiduesFrequencyInfoMap{Frequency}} = (); 272 %{$ResiduesFrequencyInfoMap{PercentFrequency}} = (); 273 274 @{$ResiduesFrequencyInfoMap{AllChainsFrequency}} = (); 275 @{$ResiduesFrequencyInfoMap{AllChainsPercentFrequency}} = (); 276 277 $TotalResidueCount = 0; 278 279 for $ChainID (@{$ResiduesCountToNameMap{ChainIDs}}) { 280 push @{$ResiduesFrequencyInfoMap{ChainIDs}}, $ChainID; 281 @{$ResiduesFrequencyInfoMap{Frequency}{$ChainID}} = (); 282 @{$ResiduesFrequencyInfoMap{PercentFrequency}{$ChainID}} = (); 283 284 $ChainResidueCount = @{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}}; 285 $TotalResidueCount += $ChainResidueCount; 286 287 for $ResidueCount (sort {$b <=> $a} keys %{$ResiduesCountToNameMap{ResidueNames}{$ChainID}}) { 288 $ResidueNames = $ResiduesCountToNameMap{ResidueNames}{$ChainID}{$ResidueCount}; 289 @ResidueNamesList = split /~/, $ResidueNames; 290 for $ResidueName (@ResidueNamesList) { 291 push @{$ResiduesFrequencyInfoMap{Frequency}{$ChainID}}, "${ResidueName} - ${ResidueCount}"; 292 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$ChainResidueCount)*100)) + 0; 293 push @{$ResiduesFrequencyInfoMap{PercentFrequency}{$ChainID}}, "${ResidueName} - ${PercentResidueCount}%"; 294 } 295 } 296 } 297 if ($ChainCount > 1) { 298 for $ResidueCount (sort {$b <=> $a} keys %AllResiduesCountToNameMap) { 299 $ResidueNames = $AllResiduesCountToNameMap{$ResidueCount}; 300 @ResidueNamesList = split /~/, $ResidueNames; 301 for $ResidueName (@ResidueNamesList) { 302 push @{$ResiduesFrequencyInfoMap{AllChainsFrequency}}, "${ResidueName} - ${ResidueCount}"; 303 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$TotalResidueCount)*100)) + 0; 304 push @{$ResiduesFrequencyInfoMap{AllChainsPercentFrequency}}, "${ResidueName} - ${PercentResidueCount}%"; 305 } 306 } 307 } 308 309 # List distribution of residues 310 print "\nDistribution of residues in chain(s): \n"; 311 for $ChainID (@{$ResiduesFrequencyInfoMap{ChainIDs}}) { 312 if ($ChainCount > 1) { 313 print "Chain ", CleanupChainID($ChainID), ": "; 314 } 315 print JoinWords(\@{$ResiduesFrequencyInfoMap{Frequency}{$ChainID}}, '; ', 0), "\n"; 316 } 317 if ($OptionsInfo{DetailLevel} >= 2) { 318 print "\nPercent distribution of residues in chain(s): \n"; 319 for $ChainID (@{$ResiduesFrequencyInfoMap{ChainIDs}}) { 320 if ($ChainCount > 1) { 321 print "Chain ", CleanupChainID($ChainID), ": "; 322 } 323 print JoinWords(\@{$ResiduesFrequencyInfoMap{PercentFrequency}{$ChainID}}, '; ', 0), "\n"; 324 } 325 } 326 if ($ChainCount > 1) { 327 print "\nDistribution of residues across all chains: \n"; 328 print JoinWords(\@{$ResiduesFrequencyInfoMap{AllChainsFrequency}}, '; ', 0), "\n"; 329 330 if ($OptionsInfo{DetailLevel} >= 2) { 331 print "\nPercent distribution of residues across all chains: \n"; 332 print JoinWords(\@{$ResiduesFrequencyInfoMap{AllChainsPercentFrequency}}, '; ', 0), "\n"; 333 } 334 } 335 } 336 } 337 338 # List information about all the residues... 339 sub ListAllResiduesInfo { 340 my($PDBRecordLinesRef) = @_; 341 my($TotalResidueCount, $AtomResiduesCount, $HetatmResiduesCount, $ResiduesInfoRef); 342 343 $ResiduesInfoRef = GetAllResidues($PDBRecordLinesRef); 344 $TotalResidueCount = @{$ResiduesInfoRef->{ResidueNames}}; 345 $AtomResiduesCount = @{$ResiduesInfoRef->{AtomResidueNames}}; 346 $HetatmResiduesCount = @{$ResiduesInfoRef->{HetatmResidueNames}}; 347 348 if ($OptionsInfo{CountResiduesAll}) { 349 print "\nTotal number of residues: Total - $TotalResidueCount; ATOM residues - $AtomResiduesCount; HETATM residues - $HetatmResiduesCount\n"; 350 351 if ($OptionsInfo{DetailLevel} >= 3) { 352 print "List of residues: \n"; 353 if ($AtomResiduesCount) { 354 print "ATOM residues: ", JoinWords(\@{$ResiduesInfoRef->{AtomResidueNames}}, ', ', 0), "\n"; 355 } 356 if ($HetatmResiduesCount) { 357 print "HETATM residues: ", JoinWords(\@{$ResiduesInfoRef->{HetatmResidueNames}}, ', ', 0), "\n"; 358 } 359 } 360 } 361 362 if ($OptionsInfo{ResiduesFrequencyAll}) { 363 my($ResidueName, $ResidueCount); 364 365 # Setup a hash using residue count as key for sorting the values... 366 my(%ResiduesCountToNameMap, %AtomResiduesCountToNameMap, %HetatmResiduesCountToNameMap); 367 %ResiduesCountToNameMap = (); 368 %{$ResiduesCountToNameMap{ResidueNames}} = (); 369 370 %AtomResiduesCountToNameMap = (); 371 %{$AtomResiduesCountToNameMap{ResidueNames}} = (); 372 373 %HetatmResiduesCountToNameMap = (); 374 %{$HetatmResiduesCountToNameMap{ResidueNames}} = (); 375 376 for $ResidueName (keys %{$ResiduesInfoRef->{ResidueCount}}) { 377 $ResidueCount = $ResiduesInfoRef->{ResidueCount}{$ResidueName}; 378 if (exists $ResiduesCountToNameMap{ResidueNames}{$ResidueCount}) { 379 $ResiduesCountToNameMap{ResidueNames}{$ResidueCount} .= "~${ResidueName}"; 380 } 381 else { 382 $ResiduesCountToNameMap{ResidueNames}{$ResidueCount} = $ResidueName; 383 } 384 } 385 386 if ($OptionsInfo{DetailLevel} >= 1) { 387 for $ResidueName (keys %{$ResiduesInfoRef->{AtomResidueCount}}) { 388 $ResidueCount = $ResiduesInfoRef->{AtomResidueCount}{$ResidueName}; 389 if (exists $AtomResiduesCountToNameMap{ResidueNames}{$ResidueCount}) { 390 $AtomResiduesCountToNameMap{ResidueNames}{$ResidueCount} .= "~${ResidueName}"; 391 } 392 else { 393 $AtomResiduesCountToNameMap{ResidueNames}{$ResidueCount} = $ResidueName; 394 } 395 } 396 for $ResidueName (keys %{$ResiduesInfoRef->{HetatmResidueCount}}) { 397 $ResidueCount = $ResiduesInfoRef->{HetatmResidueCount}{$ResidueName}; 398 if (exists $HetatmResiduesCountToNameMap{ResidueNames}{$ResidueCount}) { 399 $HetatmResiduesCountToNameMap{ResidueNames}{$ResidueCount} .= "~${ResidueName}"; 400 } 401 else { 402 $HetatmResiduesCountToNameMap{ResidueNames}{$ResidueCount} = $ResidueName; 403 } 404 } 405 } 406 407 # Setup distribution of residues info... 408 my($ResidueNames, $PercentResidueCount, @ResidueNamesList, %ResiduesCountInfoMap, %AtomResiduesCountInfoMap, %HetatmResiduesCountInfoMap); 409 410 @{$ResiduesCountInfoMap{Frequency}} = (); 411 @{$ResiduesCountInfoMap{PercentFrequency}} = (); 412 for $ResidueCount (sort {$b <=> $a} keys %{$ResiduesCountToNameMap{ResidueNames}}) { 413 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$TotalResidueCount)*100)) + 0; 414 $ResidueNames = $ResiduesCountToNameMap{ResidueNames}{$ResidueCount}; 415 @ResidueNamesList = split /~/, $ResidueNames; 416 for $ResidueName (@ResidueNamesList) { 417 push @{$ResiduesCountInfoMap{Frequency}}, "${ResidueName} - ${ResidueCount}"; 418 push @{$ResiduesCountInfoMap{PercentFrequency}}, "${ResidueName} - ${PercentResidueCount}"; 419 } 420 } 421 if ($OptionsInfo{DetailLevel} >= 1) { 422 @{$AtomResiduesCountInfoMap{Frequency}} = (); 423 @{$AtomResiduesCountInfoMap{PercentFrequency}} = (); 424 for $ResidueCount (sort {$b <=> $a} keys %{$AtomResiduesCountToNameMap{ResidueNames}}) { 425 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$TotalResidueCount)*100)) + 0; 426 $ResidueNames = $AtomResiduesCountToNameMap{ResidueNames}{$ResidueCount}; 427 @ResidueNamesList = split /~/, $ResidueNames; 428 for $ResidueName (@ResidueNamesList) { 429 push @{$AtomResiduesCountInfoMap{Frequency}}, "${ResidueName} - ${ResidueCount}"; 430 push @{$AtomResiduesCountInfoMap{PercentFrequency}}, "${ResidueName} - ${PercentResidueCount}"; 431 } 432 } 433 @{$HetatmResiduesCountInfoMap{Frequency}} = (); 434 @{$HetatmResiduesCountInfoMap{PercentFrequency}} = (); 435 for $ResidueCount (sort {$b <=> $a} keys %{$HetatmResiduesCountToNameMap{ResidueNames}}) { 436 $PercentResidueCount = sprintf("%.1f", (($ResidueCount/$TotalResidueCount)*100)) + 0; 437 $ResidueNames = $HetatmResiduesCountToNameMap{ResidueNames}{$ResidueCount}; 438 @ResidueNamesList = split /~/, $ResidueNames; 439 for $ResidueName (@ResidueNamesList) { 440 push @{$HetatmResiduesCountInfoMap{Frequency}}, "${ResidueName} - ${ResidueCount}"; 441 push @{$HetatmResiduesCountInfoMap{PercentFrequency}}, "${ResidueName} - ${PercentResidueCount}"; 442 } 443 } 444 } 445 446 # List distribution of residues 447 print "\nDistribution of residues: ", JoinWords(\@{$ResiduesCountInfoMap{Frequency}},'; ', 0), "\n"; 448 if ($OptionsInfo{DetailLevel} >= 2) { 449 print "\nPercent distribution of residues: ", JoinWords(\@{$ResiduesCountInfoMap{PercentFrequency}},'; ', 0), "\n"; 450 } 451 452 if ($OptionsInfo{DetailLevel} >= 1) { 453 print "\nDistribution of ATOM residues: ", JoinWords(\@{$AtomResiduesCountInfoMap{Frequency}},'; ', 0), "\n"; 454 if ($OptionsInfo{DetailLevel} >= 2) { 455 print "\nPercent distribution of ATOM residues: ", JoinWords(\@{$AtomResiduesCountInfoMap{PercentFrequency}},'; ', 0), "\n"; 456 } 457 458 print "\nDistribution of HETATM residues: ", JoinWords(\@{$HetatmResiduesCountInfoMap{Frequency}},'; ', 0), "\n"; 459 if ($OptionsInfo{DetailLevel} >= 2) { 460 print "\nPercent distribution of HETATM residues: ", JoinWords(\@{$HetatmResiduesCountInfoMap{PercentFrequency}},'; ', 0), "\n"; 461 } 462 } 463 } 464 } 465 466 # List min/max XYZ coordinates for ATOM/HETATM records... 467 sub ListBoundingBox { 468 my($PDBRecordLinesRef) = @_; 469 my($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax, $XSize, $YSize, $ZSize); 470 471 ($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax) = GetMinMaxCoords($PDBRecordLinesRef); 472 $XSize = abs($XMax - $XMin); 473 $YSize = abs($YMax - $YMin); 474 $ZSize = abs($ZMax - $ZMin); 475 476 $XMin = sprintf("%.3f", $XMin) + 0; $XMax = sprintf("%.3f", $XMax) + 0; 477 $YMin = sprintf("%.3f", $YMin) + 0; $YMax = sprintf("%.3f", $YMax) + 0; 478 $ZMin = sprintf("%.3f", $ZMin) + 0; $ZMax = sprintf("%.3f", $ZMax) + 0; 479 480 $XSize = sprintf("%.3f", $XSize) + 0; 481 $YSize = sprintf("%.3f", $YSize) + 0; 482 $ZSize = sprintf("%.3f", $ZSize) + 0; 483 484 print "\nBounding box coordinates: <XMin, XMax> - <$XMin, $XMax>; <YMin, YMax> - <$YMin, $YMax>; <ZMin, ZMax> - <$ZMin, $ZMax>;\n"; 485 print "Bounding box size in angstroms: XSize - $XSize; YSize - $YSize; ZSize - $ZSize\n"; 486 487 } 488 489 # Check master record counts against actual record counts... 490 sub CheckMasterRecord { 491 my($RecordTypesCountRef, $PDBRecordLinesRef) = @_; 492 493 # Get master record information... 494 my($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = (undef) x 11; 495 my($RecordLine, $MasterRecordFound); 496 $MasterRecordFound = 0; 497 498 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 499 if (IsMasterRecordType($RecordLine)) { 500 ($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = ParseMasterRecordLine($RecordLine); 501 $MasterRecordFound = 1; 502 last LINE; 503 } 504 } 505 if (!$MasterRecordFound) { 506 print "\nWarning: MASTER record is missing.\n"; 507 return; 508 } 509 my(@MasterRecordValidationInfo); 510 @MasterRecordValidationInfo = (); 511 $NumOfRemarkRecords += 0; 512 if (exists($RecordTypesCountRef->{Count}{REMARK}) && $NumOfRemarkRecords != $RecordTypesCountRef->{Count}{REMARK}) { 513 push @MasterRecordValidationInfo, "Number of REMARK records, $NumOfRemarkRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{REMARK}."; 514 } 515 $NumOfHetRecords += 0; 516 if (exists($RecordTypesCountRef->{Count}{HET}) && $NumOfHetRecords != $RecordTypesCountRef->{Count}{HET}) { 517 push @MasterRecordValidationInfo, "Number of HET records, $NumOfHetRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{HET}."; 518 } 519 $NumOfHelixRecords += 0; 520 if (exists($RecordTypesCountRef->{Count}{HELIX}) && $NumOfHelixRecords != $RecordTypesCountRef->{Count}{HELIX}) { 521 push @MasterRecordValidationInfo, "Number of HELIX records, $NumOfHelixRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{HELIX}."; 522 } 523 $NumOfSheetRecords += 0; 524 if (exists($RecordTypesCountRef->{Count}{SHEET}) && $NumOfSheetRecords != $RecordTypesCountRef->{Count}{SHEET}) { 525 push @MasterRecordValidationInfo, "Number of SHEET records, $NumOfSheetRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{SHEET}."; 526 } 527 $NumOfTurnRecords += 0; 528 if (exists($RecordTypesCountRef->{Count}{TURN}) && $NumOfTurnRecords != $RecordTypesCountRef->{Count}{TURN}) { 529 push @MasterRecordValidationInfo, "Number of TURN records, $NumOfTurnRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{REMARK}."; 530 } 531 $NumOfSiteRecords += 0; 532 if (exists($RecordTypesCountRef->{Count}{SITE}) && $NumOfSiteRecords != $RecordTypesCountRef->{Count}{SITE}) { 533 push @MasterRecordValidationInfo, "Number of SITE records, $NumOfSiteRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{SITE}."; 534 } 535 536 $NumOfTransformationsRecords += 0; 537 my($RecordsCount, $ID, $RecordID, $RecordLabel); 538 $RecordsCount = 0; 539 for $RecordLabel ('ORIGX', 'SCALE', 'MTRIX') { 540 for $ID (1 .. 3) { 541 $RecordID = "${RecordLabel}${ID}"; 542 if (exists $RecordTypesCountRef->{Count}{$RecordID}) { 543 $RecordsCount += $RecordTypesCountRef->{Count}{$RecordID}; 544 } 545 } 546 } 547 if ($NumOfTransformationsRecords != $RecordsCount) { 548 push @MasterRecordValidationInfo, "Number of transformation records (ORIGXn+SCALEn+MTRIXn), $NumOfTransformationsRecords, specified in MASTER record doen't match its explict count, $RecordsCount."; 549 } 550 551 $RecordsCount = 0; 552 for $RecordLabel ('ATOM', 'HETATM') { 553 if (exists $RecordTypesCountRef->{Count}{$RecordLabel}) { 554 $RecordsCount += $RecordTypesCountRef->{Count}{$RecordLabel}; 555 } 556 } 557 $NumOfAtomAndHetatmRecords += 0; 558 if ($NumOfAtomAndHetatmRecords != $RecordsCount) { 559 push @MasterRecordValidationInfo, "Number of ATOM + HETATM records, $NumOfAtomAndHetatmRecords, specified in MASTER record doen't match its explict count, $RecordsCount."; 560 } 561 $NumOfTerRecords += 0; 562 if (exists($RecordTypesCountRef->{Count}{TER}) && $NumOfTerRecords != $RecordTypesCountRef->{Count}{TER}) { 563 push @MasterRecordValidationInfo, "Number of TER records, $NumOfTerRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{TER}."; 564 } 565 $NumOfConectRecords += 0; 566 if (exists($RecordTypesCountRef->{Count}{CONECT}) && $NumOfConectRecords != $RecordTypesCountRef->{Count}{CONECT}) { 567 push @MasterRecordValidationInfo, "Number of CONECT records, $NumOfConectRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{CONECT}."; 568 } 569 $NumOfSeqresRecords += 0; 570 if (exists($RecordTypesCountRef->{Count}{SEQRES}) && $NumOfSeqresRecords != $RecordTypesCountRef->{Count}{SEQRES}) { 571 push @MasterRecordValidationInfo, "Number of SITE records, $NumOfSeqresRecords, specified in MASTER record doen't match its explict count, $RecordTypesCountRef->{Count}{SEQRES}."; 572 } 573 574 if (@MasterRecordValidationInfo) { 575 print "\nMASTER record validation: Count mismatches found:\n"; 576 print JoinWords(\@MasterRecordValidationInfo, "\n", 0), "\n"; 577 } 578 else { 579 print "\nMASTER record validation: Count values match with the explicit count of the corresponding records.\n"; 580 } 581 } 582 583 # Total size of all the fiels... 584 sub ListTotalSizeOfFiles { 585 my($FileOkayCount, $TotalSize, $Index); 586 587 $FileOkayCount = 0; 588 $TotalSize = 0; 589 590 for $Index (0 .. $#PDBFilesList) { 591 if ($PDBFilesInfo{FileOkay}[$Index]) { 592 $FileOkayCount++; 593 $TotalSize += $PDBFilesInfo{FileSize}[$Index]; 594 } 595 } 596 if ($FileOkayCount > 1) { 597 print "\nTotal size of $FileOkayCount files: ", FormatFileSize($TotalSize), "\n"; 598 } 599 600 } 601 602 # Empty chain IDs are replaced with "None[1-9]". But for displaying purposes, take out any 603 # numbers from label... 604 sub CleanupChainID { 605 my($ChainID) = @_; 606 607 if ($ChainID =~ /^None/i) { 608 return 'None'; 609 } 610 return $ChainID; 611 } 612 613 # Process option values... 614 sub ProcessOptions { 615 %OptionsInfo = (); 616 617 # Setup record types to count... 618 if ($Options{count}) { 619 $OptionsInfo{CountRecordType} = $Options{count}; 620 } 621 else { 622 $OptionsInfo{CountRecordType} = $Options{all} ? 'All' : 'ATOM,HETATM'; 623 } 624 @{$OptionsInfo{SpecifiedRecordTypes}} =(); 625 if ($OptionsInfo{CountRecordType} !~ /^All$/i) { 626 my(@RecordTypes); 627 @RecordTypes = split /\,/, $OptionsInfo{CountRecordType}; 628 push @{$OptionsInfo{SpecifiedRecordTypes}}, @RecordTypes; 629 } 630 $OptionsInfo{CountChains} = ($Options{chains} || $Options{all}) ? 1 : 0; 631 $OptionsInfo{CheckMasterRecord} = ($Options{mastercheck} || $Options{all}) ? 1 : 0; 632 633 # Residue count is the default. So $Options{residues} is simply ignored. 634 my($CountResidues) = 1; 635 $OptionsInfo{CountResiduesInChains} = (($CountResidues || $Options{all}) && $Options{residuesmode} =~ /^(InChains|Both)$/i) ? 1 : 0; 636 $OptionsInfo{CountResiduesAll} = (($CountResidues || $Options{all}) && $Options{residuesmode} =~ /^(All|Both)$/i) ? 1 : 0; 637 638 $OptionsInfo{ResiduesFrequencyInChains} = (($Options{frequency} || $Options{all}) && $Options{residuesmode} =~ /^(InChains|Both)$/i) ? 1 : 0; 639 $OptionsInfo{ResiduesFrequencyAll} = (($Options{frequency} || $Options{all}) && $Options{residuesmode} =~ /^(All|Both)$/i) ? 1 : 0; 640 641 $OptionsInfo{CalculateBoundingBox} = ($Options{boundingbox} || $Options{all}) ? 1 : 0; 642 643 $OptionsInfo{ListHeaderInfo} = ($Options{header} || $Options{all}) ? 1 : 0; 644 $OptionsInfo{DetailLevel} = $Options{detail}; 645 646 } 647 648 # Retrieve information about PDB files... 649 sub RetrievePDBFilesInfo { 650 my($Index, $PDBFile, $ModifiedTimeString, $ModifiedDateString); 651 652 %PDBFilesInfo = (); 653 @{$PDBFilesInfo{FileOkay}} = (); 654 @{$PDBFilesInfo{FileSize}} = (); 655 @{$PDBFilesInfo{FileLastModified}} = (); 656 657 FILELIST: for $Index (0 .. $#PDBFilesList) { 658 $PDBFilesInfo{FileOkay}[$Index] = 0; 659 $PDBFilesInfo{FileSize}[$Index] = 0; 660 $PDBFilesInfo{FileLastModified}[$Index] = ''; 661 662 $PDBFile = $PDBFilesList[$Index]; 663 if (!(-e $PDBFile)) { 664 warn "Warning: Ignoring file $PDBFile: It doesn't exist\n"; 665 next FILELIST; 666 } 667 if (!CheckFileType($PDBFile, "pdb")) { 668 warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n"; 669 next FILELIST; 670 } 671 if (! open PDBFILE, "$PDBFile") { 672 warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n"; 673 next FILELIST; 674 } 675 close PDBFILE; 676 677 $PDBFilesInfo{FileOkay}[$Index] = 1; 678 $PDBFilesInfo{FileSize}[$Index] = FileSize($PDBFile); 679 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($PDBFile); 680 $PDBFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString"; 681 } 682 } 683 684 685 # Setup script usage and retrieve command line arguments specified using various options... 686 sub SetupScriptUsage { 687 688 # Retrieve all the options... 689 %Options = (); 690 $Options{count} = ''; 691 $Options{detail} = 1; 692 $Options{residuesmode} = 'Both'; 693 694 if (!GetOptions(\%Options, "all|a", "boundingbox|b", "count|c=s", "chains", "detail|d=i", "frequency|f", "mastercheck|m", "header", "help|h", "residues", "residuesmode=s", "workingdir|w=s")) { 695 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 696 } 697 if ($Options{workingdir}) { 698 if (! -d $Options{workingdir}) { 699 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 700 } 701 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 702 } 703 if (!IsPositiveInteger($Options{detail})) { 704 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Allowed values: > 0\n"; 705 } 706 if ($Options{residuesmode} !~ /^(InChains|All|Both)$/i) { 707 die "Error: The value specified, $Options{residuesmode}, for option \"--ResiduesMode\" is not valid. Allowed values: InChains, All, or Both\n"; 708 } 709 } 710