#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# This file is part of G-language Genome Analysis Environment package
#
#     Copyright (C) 2001-2009 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
# G-language GAE is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
# 
# G-language GAE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with G-language GAE -- see the file COPYING.
# If not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# 
#END_HEADER
#
# written by Kazuharu Arakawa <gaou@sfc.keio.ac.jp> at
# G-language Project, Institute for Advanced Biosciences, Keio University.
#

package G::Tools::Statistics;

use SubOpt;
use G::Messenger;

use strict;
use base qw(Exporter);
use SelfLoader;

our @EXPORT = qw(
		 cumulative
		 mean
		 sum
		 variance
		 standard_deviation
		 min
		 mindex
		 max
		 maxdex
		 median
		 least_squares_fit
		 cor
		 ttest
		 );

__DATA__

#::::::::::::::::::::::::::::::
#          PerlDoc
#::::::::::::::::::::::::::::::

=head1 NAME

G::Tools::Statistics - Statistical Methods

=head1 SYNOPSIS

    use G::Tools::Statistics;
    $mean = mean(@values);

=head1 DESCRIPTION

This module contains statistical analysis methods, which is 
mostly a simple wrapper around CPAN Statistics:: modules.

=head1 AUTHOR

Kazuharu Arakawa, E<lt>gaou@sfc.keio.ac.jpE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2007 by Kazuharu Arakawa

This library is a part of G-language GAE. 

=cut

#::::::::::::::::::::::::::::::
#          Variables
#::::::::::::::::::::::::::::::

# none.

#::::::::::::::::::::::::::::::
#    Let the code begin...
#::::::::::::::::::::::::::::::


=head2 cumulative

  Name: cumulative   -   returns cumulative array of the given array

  Description:
    Returns the cumulative array of the data.

  Usage:
    @cumulative = cumulative(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20081124-01 initial posting

=cut

sub cumulative {
    my @array;
    my $cum;
    foreach my $val (@_){
	$cum += $val;
	push(@array, $cum);
    }

    return @array;
}



=head2 mean

  Name: mean   -   calculate the mean of the given array of data

  Description:
    Returns the mean of the data.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $mean = mean(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub mean{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->mean();
}



=head2 sum

  Name: sum   -   calculate the sum of the given array of data

  Description:
    Returns the sum of the data.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $sum = sum(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub sum{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->sum();
}



=head2 variance

  Name: variance   -   calculate the variance of the given array of data

  Description:
    Returns the variance of the data. Division by n-1 is used.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $variance = variance(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub variance{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->variance();
}



=head2 standard_deviation

  Name: standard_deviation   -   calculate the standard deviation of the given array of data

  Description:
    Returns the standard deviation of the data. Division by n-1 is used.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $standard_deviation = standard_deviation(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub standard_deviation{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->standard_deviation();
}



=head2 min

  Name: min   -   get the minimum value of the given array of data

  Description:
    Returns the minimum value of the data.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $min = min(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub min{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->min();
}



=head2 max

  Name: max   -   get the maximum value of the given array of data

  Description:
    Returns the maximum value of the data.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $max = max(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub max{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->max();
}



=head2 mindex

  Name: mindex   -   get the index of minimum value of the given array of data

  Description:
    Returns the index of the minimum value of the data.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $minimum_index = mindex(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub mindex{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->mindex();
}



=head2 maxdex

  Name: maxdex   -   get the index of maximum value of the given array of data

  Description:
    Returns the index of the maximum value of the data.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $maximum_index = maxdex(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub maxdex{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->maxdex();
}



=head2 median

  Name: median   -   calculate the median of the given array of data

  Description:
    Returns the median of the data.
    This is a wrapper around Statistics::Descriptive module.

  Usage:
    $median = median(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub median{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 
    return $stat->median();
}



=head2 least_squares_fit

  Name: least_squares_fit   -   calculate the least squares fit of the given array of data

  Description:
    Performs least squares fit on the data. 

    When called in array context, this returns an array of four
    values ($intercept, $slope, $r, $error), where the linear fit is expressed by 
    y = $slope * x + $intercept, and $r is the Pearson's linear correlation coefficient,
    $error is the root-mean-square error.

    When called in scalar context, only $r is returned.

    This is a wrapper around Statistics::Descriptive module.

  Usage:
    ($intercept, $slope, $r, $error) = least_squares_fit(@array_of_values);
      or
    $r = least_squares_fit(@array_of_values);
      or
    $r = scalar least_squares_fit(@array_of_values);

  Options:
    None.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub least_squares_fit{
    require Statistics::Descriptive;
    my $stat = Statistics::Descriptive::Full->new();
    $stat->add_data(@_); 

    my @result = $stat->least_squares_fit();

    if(wantarray()){
	return @result;
    }else{
	return $result[2];
    }
}




=head2 cor

  Name: cor   -   calculate the correlation of given two array references

  Description:
    Calculates correlation coefficient from two array references, using
    Pearson's, Spearman's or Kendall's methods.

    For the rank correlation methods (spearman and kendall), and for
    pearson in scalar context, only $r is returned.

    For pearson called in array context, this returns an array of four
    values ($intercept, $slope, $r, $error), where the linear fit is expressed by 
    y = $slope * x + $intercept, and $r is the Pearson's R^2 correlation coefficient,
    $error is the mean square error.

    This is a wrapper around Statistics::LineFit and Statistics::RankCorrelation modules.

  Usage:
    ($intercept, $slope, $r, $error) = cor([@array1], [@array2]); # for pearson only.
      or
    $r = cor([@array1], [@array2]); 
      or
    $r = scalar cor([@array1], [@array2]); 

  Options:
    -method    "pearson", "spearman", or "kendall" (default:pearson)
               Method used to calculate the correlation coefficient
    -sorted    sorted rank coefficient when 1 (default:0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub cor{
    opt_default(method=>"pearson", sorted=>0);
    my @args = opt_get(@_);
    my $method = opt_val("method");
    my $sorted = opt_val("sorted");

    if($method eq 'spearman' || $method eq 'kendall'){
	require Statistics::RankCorrelation;
	my $stat = Statistics::RankCorrelation->new(@args, sorted=>$sorted);

	return $stat->spearman if($method eq 'spearman');
	return $stat->kendall_tau if($method eq 'kendall');

    }else{
	require Statistics::LineFit;
	my $stat = Statistics::LineFit->new();
	$stat->setData(@args) or die("G::Tools::Statistics::cor  -  Invalid data"); 
	my ($intercept, $slope) = $stat->coefficients();
	my $r = $stat->rSquared();
	my $error = $stat->meanSqError();
	
	if(wantarray()){
	    return ($intercept, $slope, $r, $error);
	}else{
	    return $r;
	}
    }
}




=head2 ttest

  Name: ttest   -   performs Student's t-test on given two array references

  Description:
    Performs Student's t-test (independent or pair-wise) on two array references.

    Only the p-value is returned in scalar context. If called in array context,
    an array of 3 values ($t_value, $p_value, $degree_of_freedom) are returned.

    This is a wrapper around Statistics::TTest and Statistics::DependantTTest modules.

  Usage:
    ($t_value, $p_value, $df) = ttest([@array1], [@array2]);
      or
    $p_value = ttest([@array1], [@array2]); 
      or
    $p_value = scalar ttest([@array1], [@array2]); 

  Options:
    -paired    1 for dependent (paired t-test), 0 for independent (default:0)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)

  History:
   20070607-01 initial posting

=cut

sub ttest{
    opt_default(paired=>0);

    my @args = opt_get(@_);
    my $paired = opt_val("paired");

    my ($t_value, $p_value, $df);

    if($paired){
	require Statistics::DependantTTest;
	require Statistics::Distributions;
	my $stat = new Statistics::DependantTTest;
	$stat->load_data('x',@{$args[0]});
	$stat->load_data('y',@{$args[1]});
	($t_value,$df) = $stat->perform_t_test('x','y');
	$p_value = Statistics::Distributions::tprob ($df, $t_value);
    }else{
	require Statistics::TTest;
	my $stat = new Statistics::TTest;  
	$stat->load_data(@args);  

	$t_value = $stat->t_statistic;
	$p_value = $stat->{t_prob};
	$df = $stat->df;
    }

    if(wantarray()){
	return ($t_value, $p_value, $df);
    }else{
	return ($p_value);
    }
}






1;


