#!/usr/local/bin/perl

# Example program 1.  Perform conceptual translation from nucleotides
# to amino acids.  Do this for three reading frames, skipping the
# first 0-2 nucleotides to produce the reading frames.

# Define some constants that we'll need later.

$minlength     =  3;
$readingframes =  3;
$unknown       = "UNK";  # If some nucleotides are unknown, print this.

# Define a hash to do matching/printing.  This allows us to say things
# like: $nucleohash{ "UUU" } and receive "Phe".

%nucleohash = ( "UUU", "Phe", "UUC", "Phe", "UUA", "Leu", "UUG", "Leu",
                "UCU", "Ser", "UCC", "Ser", "UCA", "Ser", "UCG", "Ser",
                "UAU", "Tyr", "UAC", "Tyr", "UAA", "STP", "UAG", "STP",
                "UGU", "Cys", "UGC", "Cys", "UGA", "STP", "UGG", "Trp",
                "CUU", "Leu", "CUC", "Leu", "CUA", "Leu", "CUG", "Leu",
                "CCU", "Pro", "CCC", "Pro", "CCA", "Pro", "CCG", "Pro",
                "CAU", "His", "CAC", "His", "CAA", "Gln", "CAG", "Gln",
                "CGU", "Arg", "CGC", "Arg", "CGA", "Arg", "CGG", "Arg",
                "AUU", "Ile", "AUC", "Ile", "AUA", "Ile", "AUG", "Met",
                "ACU", "Thr", "ACC", "Thr", "ACA", "Thr", "ACG", "Thr",
                "AAU", "Asn", "AAC", "Asn", "AAA", "Lys", "AAG", "Lys",
                "AGU", "Ser", "AGC", "Ser", "AGA", "Arg", "AGG", "Arg",
                "GUU", "Val", "GUC", "Val", "GUA", "Val", "GUG", "Val",
                "GCU", "Ala", "GCC", "Ala", "GCA", "Ala", "GCG", "Ala",
                "GAU", "Asp", "GAC", "Asp", "GAA", "Glu", "GAG", "Glu",
                "GGU", "Gly", "GGC", "Gly", "GGA", "Gly", "GGG", "Gly"  );

# Retreive and check the command line parameter.

$input = @ARGV[0];

if ( length( $input ) < $minlength )
{
  printf( "$0: Place the nucleotide string on the commmand line.\n\n" );
  exit( 1 );
} # if

printf( "Nucleotide sequence: $input\n\n" );

# Run through all 3 possible reading frames, skipping the first letter
# or two for frames 1 and 2.

for ( $i = 0; $i < $readingframes; $i++ )
{
  printf( "Reading frame $i: " );

  # Find out how many 3-letter sequences remain, after skipping 0-2
  # for the reading frame, and loop through all of these sequences.

  $len = int( length( substr( $input, $i ) ) / 3 );

  for ( $j = 0; $j < $len; $j++ )
  {
    # Take the current 3-letter sequence, look up the corresponding
    # amino acid.  If it isn't in the hash table, it is unknown.
    $nuc = substr( $input, $i + $j * 3, 3 );

    if ( defined( $nucleohash{ $nuc } ) )
    {
      $aa = $nucleohash{ $nuc };
    } # if
    else
    {
      $aa = $unknown;
    } # else

    printf( "$aa " );
  } # for j

  printf( "\n" );
} # for i

printf( "\n" );

