#!/usr/local/bin/perl

# Example program 7.  Perform a 2DHP (2-Dimensional Hydrophobic/
# Hydrophilic) analysis of a given input sequence.  The input starts
# with either H (hydrophobic) or P (hydrophilic), and is then followed
# by pairs of either U, D, L, R (up, down, left, right), then H or P.


# Define some constants that we'll need later.  Put the allowed input
# characters here, for easy modification.

$minlength     =  3;
$hydrophobic   = 'H';
$hydrophilic   = 'P';
$up            = 'U';
$down          = 'D';
$left          = 'L';
$right         = 'R';


# Start out at coordinates 0,0

$xpos = 0;
$ypos = 0;


# Retreive and check the command line parameter.

$input = @ARGV[0];

if (    ( length( $input ) < $minlength )
     || ( ( length( $input ) % 2 ) != 1 )
   )
{
  printf( "$0: Place the input string on the commmand line.\n\n" );
  exit( 1 );
} # if

printf( "Input sequence: $input\n\n" );

# Run through each input character.  If it is invalid, print an error
# and exit.  Otherwise process the character according to position.

for ( $i = 0; $i < length( $input ); $i++ )
{
  $char = substr( $input, $i, 1 );

  if ( ( $i % 2 ) == 0 )
  {
    # H or P character.

    if (    ( $char ne $hydrophobic )
         && ( $char ne $hydrophilic )
       )
    {
      printf( "$0: Input character $i ($char) must be $hydrophobic or " .
              "$hydrophilic.\n\n" );
      exit( 2 );
    } # if

    # Check to see if the new grid position already exists in the hash.
    # If it does, then the protein has folded back on itself, and is invalid.

    if ( exists( $grid{"$xpos,$ypos"} ) )
    {
      printf( "$0: Input sequence has a collision at character $i.\n\n" );
      exit( 3 );
    } # if

    # Save this grid position.  If we've seen two hydrophobics in a row
    # in the input sequence, count them for later use in the scoring.

    $grid{"$xpos,$ypos"} = $char;

    if (    ( $char eq $hydrophobic )
         && ( $previous eq $hydrophobic )
       )
    {
      $input_adjacencies++;
    } # if

    $previous = $char;

  } # if
  else
  {
    # U/D/L/R character.  Modify the current position accordingly,
    # exit if the input character is unrecognized.

    if ( $char eq $up )
    {
      $xpos++;
    } # if
    elsif ( $char eq $down )
    {
      $xpos--;
    } # elsif
    elsif ( $char eq $left )
    {
      $ypos--;
    } # elsif
    elsif ( $char = $right )
    {
      $ypos++;
    } # elsif
    else
    {
      printf( "$0: Input character $i ($char) must be $up, $down, $left, " .
              "or $right.\n\n" );
      exit( 2 );
    } # else
  } # else

} # for i


# Run through all PAIRS of keys in the grid (the locations), and check
# whether any locations that are adjacent in the grid are both
# hydrophobic.  If they are, count them for scoring.

@keys = keys( %grid );

for ( $i = 0; $i < @keys; $i++ )
{
  for ( $j = $i+1; $j < @keys; $j++ )
  {
    $a = $keys[$i];
    $b = $keys[$j];
    if (    ( $grid{$a} eq $hydrophobic )
         && ( $grid{$b} eq $hydrophobic )
         && ( adjacent( $a, $b ) )
       )
    {
      $adjacencies++;
    } # if
  } # for j
} # for i


# Compute the score.  Note that when we counted hydrophobic positions
# that were adjacent in the grid, we also counted those that MUST be
# adjacent, because they are adjacent in the input sequence.  Thus we
# subtract these from the score.  Also, make the final score a negative
# value, to be consistent with the energy minimization concept.

$score = $adjacencies - $input_adjacencies;
$score = - $score;
printf( "Score: $score\n\n" );


# Function to determine if two locations are adjacent in a grid.
# Note that only adjacencies along the grid axes are considered.
# Returns 1 if they are adjacent, 0 otherwise.

sub adjacent
{
  my( $a, $b ) = @_;
  my( $ax, $ay, $bx, $by, $commapos );

  # Find the position of the comma; everything before that is 'x',
  # everything after it is 'y'.  Then compute the Euclidean distance.
  # Do not bother with the square root.

  $commapos = index( $a, "," );
  $ax = substr( $a, 0, $commapos );
  $ay = substr( $a, $commapos+1 );
  $commapos = index( $b, "," );
  $bx = substr( $b, 0, $commapos );
  $by = substr( $b, $commapos+1 );

  $distance = ( $ay - $by )**2 + ( $ax - $bx )**2;

  # Return the result of the boolean comparison, which results in
  # 1 if the comparison is true, 0 if false.

  return ( $distance <= 1 );
} # adjacent

