#!/usr/local/bin/perl

# Example program 4.  Perform a UPGMA (Unweighted Pair Growth
# Arithmatic Mean) analysis of a set of data.  The input consists
# of label/value pairs, all inputs separated by spaces.  Therefore
# a label cannot have blank spaces inside of it.


# Make sure there are an even number of inputs (matched value/label pairs).

if ( ( @ARGV % 2 ) != 0 )
{
  printf( "$0: Place the label/value pairs on the commmand line.\n\n" );
  exit( 1 );
} # if

printf( "Input sequence: @ARGV\n\n" );

# Store the arguments in a hash, all at once.  Recall that the first
# element of ARGV is a key, the second is a value, etc.

%labelhash = @ARGV;

@labelkeys = keys( %labelhash );

# Loop through all pairs of nodes, at each cycle finding and combining
# the two nodes which are the closest to each other.  Halt when there is
# only one label remaining; that label will be the final tree structure.

while ( @labelkeys > 1 )
{
  $smallest = Infinity;

  for ( $i = 0; $i < @labelkeys; $i++ )
  {
    for ( $j = $i+1; $j < @labelkeys; $j++ )
    {
      $a = $labelkeys[$i];
      $b = $labelkeys[$j];
      $distance = abs( $labelhash{$a} - $labelhash{$b} );

      if ( $distance < $smallest )
      {
        $smallest = $distance;
        $smallesta = $a;
        $smallestb = $b;
      } # if
    } # for j
  } # for i

  # Merge $smallesta and $smallestb into one, placing the label in the
  # following format: ($smallesta,$smallestb).  Note that when we compute
  # the new value, we can NOT simply add the values at $smallesta and
  # $smallestb and divide by two: $smallesta might represent several
  # nodes that have already been merged.  Thus the number of nodes
  # represented by $smallesta or $smallestb is equal to the number of
  # commas in the string, plus one.

  $counta = ( $smallesta =~ tr/,// ) + 1;
  $countb = ( $smallestb =~ tr/,// ) + 1;
  $newval = ( $labelhash{$smallesta} * $counta
            + $labelhash{$smallestb} * $countb ) / ( $counta + $countb );

  delete $labelhash{$smallesta};
  delete $labelhash{$smallestb};
  $labelhash{"($smallesta,$smallestb)"} = $newval;

  # Recompute the list of the keys in the label hash.
  @labelkeys = keys( %labelhash );
} # while

printf( "Final structure: @labelkeys\n\n" );

