#!/usr/local/bin/perl

# Example program B.  Perform a linear regression on the
# input data.  The input consists of x/y pairs of numerical
# data, all inputs separated by spaces.  The output values
# are m and b, such that y = mx + b is the form of the best
# fit, as well as the x- and y-intercepts of this line,
# along with the correlation coefficient.


# Define some constants that we'll need later.

$mininputs = 6;   # With less than 3 input pairs, linear
                  # regression is either unnecessary or
                  # innapropriate.


# Make sure there are a sufficient number of evenly matched
# x/y inputs.

if (    ( @ARGV < $mininputs )
     || ( ( @ARGV % 2 ) != 0 )
   )
{
  printf( "$0: Place the x/y pairs on the commmand"
        . " line.\n\n" );
  exit( 1 );
} # if

printf( "Input coordinates: @ARGV\n\n" );


# Store the input values in two arrays.

$numinputs = @ARGV / 2;

for ( $i = 0; $i < $numinputs; $i++ )
{
  $x[$i] = $ARGV[$i*2];
  $y[$i] = $ARGV[$i*2 + 1];
} # for i


# Compute the values that are needed for the linear
# regression.

for ( $i = 0; $i < $numinputs; $i++ )
{
  $sumx   += $x[$i];
  $sumy   += $y[$i];
  $sumxsq += $x[$i] ** 2;
  $sumxy  += $x[$i] * $y[$i];
} # for i

# Compute and print m, b, x-intercept, and y-intercept,
# checking for possible division by 0 if the slope is 0
# (in which case there is no x-intercept).

$m = ( $numinputs * $sumxy - $sumx * $sumy )
   / ( $numinputs * $sumxsq - $sumx ** 2 );

$b = ( $sumxsq * $sumy - $sumxy * $sumx )
   / ( $numinputs * $sumxsq - $sumx ** 2 );

printf( "m = %.2f, b = %.2f\n\n", $m, $b );

if ( $m == 0 )
{
  $xintcp = "DNE";
} # if
else
{
  $xintcp = - $b / $m;
} # else

$yintcp = $b;

printf( "X-intercept = %.2f\n", $xintcp );
printf( "Y-intercept = %.2f\n\n", $yintcp );

# Compute the values needed for the correlation
# coefficient.

$avgx = $sumx / $numinputs;
$avgy = $sumy / $numinputs;

for ( $i = 0; $i < $numinputs; $i++ )
{
  $xminusavgx = $x[$i] - $avgx;
  $yminusavgy = $y[$i] - $avgy;

  $sum1 += $xminusavgx * $yminusavgy;
  $sum2 += $xminusavgx ** 2;
  $sum3 += $yminusavgy ** 2;
} # for i

$sum2 = sqrt( $sum2 );
$sum3 = sqrt( $sum3 );

# Compute and print the correlation coefficient.

$r = $sum1 / $sum2 / $sum3;

printf( "Correlation coefficient: %.2f\n\n", $r );

