#!/usr/local/bin/perl

# Example program 8.  Given the name of a PDB file, read the file and
# locate all ATOM locations.  For each atom, count the number of
# neighboring atoms within a threshold distance.  As PDB files can
# contain thousands of atoms, we want to do this efficiently.


# Define some constants that will be needed later.  Define the threshold
# as the square of the value we want (in angstroms).  This way, when we
# compute Euclidean distance, we don't have to use the square root
# function on EVERY pair of distances, cutting down on computation time.

$threshold     = 3.6**2;
$atom_keyword  = "ATOM";
$atom_start    = 0;
$label_start   = 6;
$label_length  = 5;
$x_start       = 30;
$x_length      = 8;
$y_start       = 38;
$y_length      = 8;
$z_start       = 46;
$z_length      = 8;


# Retreive and check the command line parameter.

$inputfile = @ARGV[0];

if ( @ARGV == 0 )
{
  printf( "$0: Place the PDB filename on the commmand line.\n\n" );
  exit( 1 );
} # if

printf( "Input file: $inputfile\n\n" );

unless( open( PDB, "$inputfile" ) )
{
  printf( "$0: Cannot open PDB file $inputfile.\n\n" );
  exit( 2 );
} # if

while ( <PDB> )
{
  $input = $_;
  chop( $input );

  if ( index( $input, $atom_keyword ) == $atom_start )
  {
    $label[$records] = substr( $input, $label_start, $label_length );
    $x[$records] = substr( $input, $x_start, $x_length );
    $y[$records] = substr( $input, $y_start, $y_length );
    $z[$records] = substr( $input, $z_start, $z_length );
    $records++;
  } # if
} # while

for ( $i = 0; $i < $records; $i++ )
{
  for ( $j = $i+1; $j < $records; $j++ )
  {
    $distance = ( $x[$i] - $x[$j] ) ** 2
              + ( $y[$i] - $y[$j] ) ** 2
              + ( $z[$i] - $z[$j] ) ** 2;
    if ( $distance < $threshold )
    {
      $count[$i]++;
      $count[$j]++;
    } # if
  } # for j
} # for i

# Sort the labels and counts in order by count, from highest to lowest.
# Note that we cannot simply use 'sort' here, since the information
# resides in two different arrays.  The method presented here is a
# simple bubble sort: check each pair of counts, swapping them (and
# the labels) if the later one has a larger value.

printf( "Atom #  Count\n" );
printf( "------  -----\n" );

for ( $i = 0; $i < $records-1; $i++ )
{
  for ( $j = $i+1; $j < $records; $j++ )
  {
    if ( $count[$i] < $count[$j] )
    {
      ( $count[$i], $count[$j] ) = ( $count[$j], $count[$i] );
      ( $label[$i], $label[$j] ) = ( $label[$j], $label[$i] );
    } # if
  } # for j

  # Print out each record as it becomes sorted.
  printf( "%6s  %5d\n", $label[$i], $count[$i] );
} # for i

# Print out the final record, which was skipped by the for i loop going
# only up to $records-1.
printf( "%6s  %5d\n", $label[$i], $count[$i] );

