#!/usr/local/bin/perl

# Example program 6.  Find all possible splice junctions in
# a nucleotide sequence, determine the probability of each,
# and print them out in order from most to least likely.

# Define some constants that we'll need later.

$minlength     =  2;

@offsets     = ( -2,   -1,   2,    3,    4,    5    );
@nucleotides   = ( "A",  "G",  "A",  "A",  "G",  "T"  );
@probabilities = ( 0.64, 0.75, 0.62, 0.68, 0.84, 0.63 );


# Retreive and check the command line parameter.

$input = @ARGV[0];

if ( length( $input ) < $minlength )
{
  die( "$0: Place the nucleotide string on the commmand"
        . " line.\n\n" );
} # if

printf( "Nucleotide sequence: $input\n\n" );
printf( "Potential splice junctions and associated"
      . " probabilities:\n\n" );
printf( "Index \t Probability\n" );
printf( "----- \t -----------\n" );


# Find all positions where "GT" occurs, and build a list of
# these indices.

$pos = index( $input, "GT" );

while ( $pos != -1 )
{
  push( @indices, $pos );

  # Note that we need to start searching at $pos+1 to make
  # sure that we skip the "GT" we just found; otherwise
  # we'd find the first occurrence over and over again.
  $pos = index( $input, "GT", $pos+1 );
} # while


# For each of the indices where "GT" occurs, compute the
# probabilty of a splice junction occurring at that point.

foreach $index ( @indices )
{
  $prob = 1.0;

  # Run through all the offsets where important nucleotides
  # occur.

  for ( $i = 0; $i < @offsets; $i++ )
  {
    # Check to see if the nucleotide gives the maximum
    # probability, or the reciprocal thereof.  Note: we are
    # counting on PERL's treatment of strings: if we step
    # off the left or right end of the string, the "eq"
    # test will fail, as the nucleotide at that position is
    # "", the empty string.

    if (    $nucleotides[$i]
         eq substr( $input, $index + $offsets[$i], 1 ) 
       )
    {
      $prob *= $probabilities[$i];
    } # if
    else
    {
      $prob *= 1.0 - $probabilities[$i];
    } # else

  } # for i

  # Add an entry to a hash table, where the probability is
  # the key and the index where it occurs is the associated
  # value.  We do it in this order, since we will later
  # sort on the key values.

  @hash{$prob} = $index;
} # foreach

# Sort the list of probabilities in descending order, then
# print out that sorted list, along with the index into the
# string where the potential splice junction occurs.

@sortedprobs = sort descending_numerical keys( %hash );

foreach $prob ( @sortedprobs )
{
  $printindex = $hash{$prob} + 1;
  printf( "$printindex \t $prob\n" );
}

printf( "\n" );


# Subroutine used for sorting a list in descending
# numerical order.

sub descending_numerical
{
  if ( $a < $b )
  {
    return 1;
  } # if
  elsif ( $a == $b )
  {
    return 0;
  } # elsif
  else
  {
    return -1;
  } # else
} # reverse_numerical

