package ScatterPlot;

#use 5.008001;
use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

# This allows declaration       use SCatterPlot ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw(

) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw(

);

our $VERSION = '0.01';


# constructor
sub new {
    # declare the class name and assign it the input parameter
    my ($class_name) = @_;

    # create the new variable, its a hash
    my ($self) = [];

    # bless it to be an object within class $class_name
    bless ($self, $class_name);

    # return the hash and exit
    return $self;
}


# draw an ASCII plot
sub draw {
    # declare local copy of self and sport, assigning with input paramters
    my ($self, $xy_points, $x_size, $y_size, $x_label, $y_label, $char, $type, $reg_calc) = @_;

    # default variables
    unless ($type) {
        $type = 'text';
    }
    unless ($char) {
        $char = 'o';
    }
    unless ($y_label) {
        $y_label = '';
    }
    unless ($x_label) {
        $x_label = '';
    }
    unless ($y_size) {
        $y_size = 26;
    } else {
        $y_size--;
    }
    unless ($x_size) {
        $x_size = 60;
    }
    unless ($xy_points) {
        my @xy_points = ();
        my $i_max = 20;
        for (my $i=0; $i<$i_max; $i++) {
            $xy_points[$i][0] = ($i - ($i_max - 1) / 2 ) * 6 / $i_max;
            $xy_points[$i][1] = ($xy_points[$i][0] + 2) * ($xy_points[$i][0] - 2) * $xy_points[$i][0];
        }
        $xy_points = \@xy_points;
    }
    unless ($reg_calc) {
        $reg_calc = 0;
    }

    # find the number of points to plot
    my $num_points = @$xy_points;

    # loop thru the points and find min/max values
    my $x_min = $$xy_points[0][0];
    my $x_max = $$xy_points[0][0];
    my $y_min = $$xy_points[0][1];
    my $y_max = $$xy_points[0][1];
    for (my $j=1; $j<$num_points; $j++) {
        if (($$xy_points[$j][0]||$$xy_points[$j][0]==0) and ($$xy_points[$j][1]||$$xy_points[$j][1]==0)) {
            if ($$xy_points[$j][0] < $x_min) {
                $x_min = $$xy_points[$j][0];
            }
            if ($$xy_points[$j][0] > $x_max) {
                $x_max = $$xy_points[$j][0];
            }
            if ($$xy_points[$j][1] < $y_min) {
                $y_min = $$xy_points[$j][1];
            }
            if ($$xy_points[$j][1] > $y_max) {
                $y_max = $$xy_points[$j][1];
            }
        }
    }

    # calculate the scale and offset value
    my $slopish=($y_max-$y_min)/($x_max-$x_min+.0000000001);
    my $x_scale = 0;
    if ($x_max - $x_min != 0) {
        $x_scale = $x_size / ($x_max - $x_min);
    }
    my $x_offset = -$x_min;
    my $y_scale = 0;
    if ($y_max - $y_min != 0) {
        $y_scale = $y_size / ($y_max - $y_min);
    }
    #$y_scale*=int($slopish);
    my $y_offset = -$y_min;

    # clear the graph
    for (my $x=0; $x<$x_size; $x++) {
        for (my $y=0; $y<=$y_size; $y++) {
            $$self[$x][$y] = ' ';
        }
    }

    # draw the axes
    my $x_axis = int($x_scale * $x_offset);
    my $y_axis = int($y_scale * $y_offset);
    if (($y_axis >= 0) and ($y_axis < $y_size)) {
        for (my $x=0; $x<$x_size; $x++) {
            $$self[$x][$y_axis] = '-';
        }
    }
    if (($x_axis >= 0) and ($x_axis < $x_size)) {
        for (my $y=0; $y<$y_size; $y++) {
            $$self[$x_axis][$y] = '|';
        }
    }
    if (($x_axis >= 0) and ($x_axis < $x_size) and($y_axis >= 0) and ($y_axis < $y_size)) {
        $$self[$x_axis][$y_axis] = '+';
    }

    # plot the points
    for (my $i=0; $i<$num_points; $i++) {
        my $x_pos = 0;
        if ($$xy_points[$i][0]||$$xy_points[$i][0]==0) {
            $x_pos = int($x_scale * ($$xy_points[$i][0] + $x_offset));
        }
        if ($x_pos < 0) {
            $x_pos = 0;
        } elsif ($x_pos > $x_size - 1) {
            $x_pos = $x_size - 1;
        }
        my $y_pos = 0;
        if ($$xy_points[$i][1]||$$xy_points[$i][1]==0) {
            $y_pos = int($y_scale * ($$xy_points[$i][1] + $y_offset));
        }
        if ($y_pos < 0) {
            $y_pos = 0;
        } elsif ($y_pos > $y_size - 1) {
            $y_pos = $y_size - 1;
        }
        if (($$xy_points[$i][0]||$$xy_points[$i][0]==0) and ($$xy_points[$i][1]||$$xy_points[$i][1]==0)) {
            $$self[$x_pos][$y_pos] = $char;
        }
    }

############################################### Louis
    my @xvalues;                                #Values from column 1 of the file
    my @yvalues;                                #Values from column 2 of the file
    my $correlation;                            #The "r" value
    my $slope;                                  #Indicates rate of y/x
    my $intercept;                              #Regressed value of y at x=0
    my $sign;                                   #For printing the f(x)=mx+b equation
    my $countx=0;                               #For iterating through the file
    my @scatter_array;

    #Populate the two arrays containing the x and y values
    for (my $i=0; $i<$num_points; $i++) {
        if (($$xy_points[$i][0])||($$xy_points[$i][0] == 0)) {
            $xvalues[$i] = $$xy_points[$i][0];
        }
        if (($$xy_points[$i][1])||($$xy_points[$i][1] == 0)) {
            $yvalues[$i] = $$xy_points[$i][1];
        }
    }

    #Return the total from one array
    sub total () {
        my ($z_ref) = @_;
        my $total=0;
        my $value=0;
        foreach $value (@$z_ref) {
            $total+=$value;
        }
        return $total;
    }

    #Return the mean from one array [requires &total]
    sub mean () {
        my ($z_ref) = @_;
        my $total=&total($z_ref);
        return $total/(@$z_ref);
    }

    #Return the variance from one array [requires &mean]
    sub variance () {
        my ($z_ref) = @_;
        my $numerator=0;
        my $mean=&mean($z_ref);
        foreach my $value (@$z_ref) {
            $numerator+=($value-$mean)**2;
        }
        return $numerator/($#$z_ref);
    }

    #Return the standard deviation from one array [requires &variance]
    sub stdDeviation () {
        my ($z_ref) = @_;
        return sqrt(&variance($z_ref))+.000000001;  #Avoiding divide by zero with the result
    }

    #Return the correlation between two arrays [requires &mean, &stdDeviation]
    sub correlation () {
        my ($x_ref, $y_ref) = @_;
        my $corrsub=0;
        my $meanx=&mean($x_ref);
        my $meany=&mean($y_ref);
        my $devx=&stdDeviation($x_ref);
        my $devy=&stdDeviation($y_ref);
        for (my $i=0; $i<@$x_ref; $i++) {
            $corrsub+=(($x_ref->[$i]-$meanx)/$devx)*(($y_ref->[$i]-$meany)/$devy);
        }
        if ($devy<.0001) {
            return 1;
        } else {
            return $corrsub/($#$x_ref);
        }
    }

    #Return the regressed slope of two arrays [requires &correlation, &stdDeviation]
    sub getSlope () {
        my ($x_ref, $y_ref) = @_;
        return &correlation($x_ref, $y_ref)*(&stdDeviation($y_ref)/&stdDeviation($x_ref));
    }

    #Return the regressed y intercept of two arrays [requires &getSlope, &mean]
    sub getIntercept () {
        my ($x_ref, $y_ref) = @_;
        return &mean($y_ref)-(&getSlope($x_ref, $y_ref) * &mean($x_ref));
    }

    $slope=&getSlope(\@xvalues, \@yvalues);
    $intercept=&getIntercept(\@xvalues, \@yvalues);
    $correlation=&correlation(\@xvalues, \@yvalues);
    my $correlation_type;
    my $correlation_strength;
    if ($correlation<0) {
        $correlation_type="negative";
    } elsif ($correlation>0) {
        $correlation_type="positive";
    } else {
        $correlation_type="nil";
    }
    if (abs($correlation)>.99999) {
        $correlation_strength=" and perfectly correlated";
    } elsif (abs($correlation)>0.99) {
        $correlation_strength=" and almost perfectly correlated";
    } elsif (abs($correlation)>0.90) {
        $correlation_strength=" and very strongly correlated";
    } elsif (abs($correlation)>0.70) {
        $correlation_strength=" and strongly correlated";
    } elsif (abs($correlation)>0.50) {
        $correlation_strength=" and moderately correlated";
    } elsif (abs($correlation)>0.30) {
        $correlation_strength=" and weakly correlated";
    } elsif (abs($correlation)>0.20) {
        $correlation_strength=" and neglibily correlated";
    } else {
        $correlation_type="";
        $correlation_strength="not correlated";
    }

    if ($intercept<0) {
        $sign="-";
    }else{
        $sign="+";
    }

    # plot the regression line
    if ($reg_calc) {
        for (my $i=$x_min; $i< $x_max; $i+=(1/$x_scale)) {
            my $x_pos;
            $x_pos = int($x_scale * ($i + $x_offset));
            if ($x_pos < 0) {
                $x_pos = 0;
            } elsif ($x_pos > $x_size - 1) {
                $x_pos = $x_size - 1;
            }
            my $y_pos = 0;
            $y_pos = int($y_scale * ($i*$slope +$intercept + $y_offset));
            if ($y_pos < 0) {
                $y_pos = 0;
            } elsif ($y_pos > $y_size - 1) {
                $y_pos = $y_size - 1;
            }
            if ($$self[$x_pos][$y_pos] eq '.') {
                $$self[$x_pos][$y_pos] = '.';
            } elsif ($$self[$x_pos][$y_pos] eq '*') {
                $$self[$x_pos][$y_pos] = '*';
            } elsif ($$self[$x_pos][$y_pos] eq $char) {
                $$self[$x_pos][$y_pos] = '*';
            } else {
                $$self[$x_pos][$y_pos] = '.';
            }
        }
    }

############################################### Louis

    # add the axes limits
    # left label
    my $y_pos = 1;
    if ($y_axis < 1) {
        $y_pos = 1;
    } elsif ($y_axis > $y_size) {
        $y_pos = $y_size;
    } else {
        $y_pos = $y_axis;
    }
    my $label = sprintf("%0.1f ", $x_min);
    my $l = length($label);
    for (my $i=0; $i<$l; $i++) {
        $$self[$i][$y_pos] = substr($label, $i, 1);
    }
    # right label
    $label = sprintf(" %0.1f", $x_max);
    $l = length($label);
    my $x_label_pos = $x_size - $l;
    for (my $i=0; $i<$l; $i++) {
        $$self[$x_label_pos+$i][$y_pos] = substr($label, $i, 1);
    }
    # bottom label
    $label = sprintf("%0.1f", $y_min);
    $l = length($label);
    my $y_label_pos = $x_axis - int($l/2);
    if ($y_label_pos < 0) {
        $y_label_pos = 0;
    } elsif ($y_label_pos + $l > $x_size) {
        $y_label_pos = $x_size - $l;
    }
    for (my $i=0; $i<$l; $i++) {
        $$self[$y_label_pos+$i][0] = substr($label, $i, 1);
    }
    # top label
    $label = sprintf("%0.1f", $y_max);
    $l = length($label);
    $y_label_pos = $x_axis - int($l/2);
    if ($y_label_pos < 0) {
        $y_label_pos = 0;
    } elsif ($y_label_pos + $l > $x_size) {
        $y_label_pos = $x_size - $l;
    }
    for (my $i=0; $i<$l; $i++) {
        $$self[$y_label_pos+$i][$y_size-1] = substr($label, $i, 1);
    }

    # add the labels
    # x label
    $l = length($x_label);
    $x_label_pos = $x_size - $l;
    for (my $i=0; $i<$l; $i++) {
        $$self[$x_label_pos+$i][$y_pos+1] = substr($x_label, $i, 1);
    }
    # y label
    $l = length($y_label);
    $y_label_pos = $x_axis - int($l/2);
    if ($y_label_pos < 0) {
        $y_label_pos = 0;
    } elsif ($y_label_pos + $l > $x_size) {
        $y_label_pos = $x_size - $l;
    }
    for (my $i=0; $i<$l; $i++) {
        $$self[$y_label_pos+$i][$y_size] = substr($y_label, $i, 1);
    }

    # print the $self
    my $prefix = "\n";
    my $newline = "\n";
    my $postfix = "\n";
    if (($type eq 'html') or ($type eq 'HTML')) {
        $prefix = "<pre>\n";
        $newline = "\n";
        $postfix = "</pre>\n";
    }
    print $prefix;
    for (my $y=$y_size; $y>=0; $y--) {
        for (my $x=0; $x<$x_size; $x++) {
            print $$self[$x][$y];
        }
        print $newline;
    }
    print $postfix;
    if ($reg_calc) {
        printf ("Linear regression forumula: f(x)=%gx +%.4f\n", $slope, $intercept);
        printf ("                   R-value: %g\n", $correlation);
        printf ("                 R^2-value: %g\n", $correlation**2);
	printf ("\n\nSummary:\n");
        printf("The function f(x)=mx+b represented by this data is: f(x)=%gx %s%.4f\n", $slope, $sign, abs($intercept));
        printf("The x intercept is: %g\n", (-1)*(abs($intercept))/$slope);
        printf("The correlation between the %g records in this set is: %g\n", scalar @xvalues, $correlation);
        printf("This correlation is defined as %s%s.\n\n", $correlation_type, $correlation_strength);
        printf("The coefficient of determination (r^2 value) of these records is: %g\n", $correlation**2);
        printf("This means that %.1f%% of these records can be explained by the above equation.\n\n", 100*($correlation**2));
    }

    return 1;
}


1;
__END__

=head1 NAME

ScatterPlot - Perl extension for drawing ASCII scatter plots

=head1 SYNOPSIS

  use ScatterPlot;

=head1 DESCRIPTION

This module will draw a quick and easy ASCII scatter plot.  It has only two functions, new() and draw().  new() takes no arguments and creates a new ScatterPlot object.  draw() can be called with no arguments to draw a sample test plot.  You can call draw like this:

    draw($xy_points);

where $xy_points is a reference to an array of (x,y) pairs.  See the file ScatterPlot.pl for an example.  The full call to draw is:

    draw($xy_points, $x_size, $y_size, $x_label, $y_label, $char, $type, $reg_line);

where $xy_points is a reference to an array of (x,y) pairs, $x_size is an integer describing the width of the plot in characters, $y_size is an integer describing the height of the plot in characters, $x_label is a string for the horizontal axis label, $y_label is a string for the vertical axis lable, $char is the plot character, and $type is either 'text', 'html', or 'HTML'.  If you are using CGI or sending the plot output to a web page, then use $type='html' or $type='HTML'.

The method draw() will automatically scale the plot to fit your data and draw the axes labels accordingly.  The size of the output text will be $y_size lines of text, each of which is $x_size long in characters (plus line terminator).  In text mode the plot begins with "\n" and ends with "\n", while in html mode the plot begins with "<pre>" and ends with "<\pre>".

=head2 EXPORT

none


=head1 SEE ALSO

The example file ScatterPlot.pl contains an example of how to use the ScatterPlot module.

=head1 AUTHORS

Les Hall, E<lt>inventor-66@comcast.netE<gt>
Louis Romero, E<lt>louis_romero@hotmail.comE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2007 by Les Hall

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut