MayaChemTools

   1 package StatisticsUtil;
   2 #
   3 # $RCSfile: StatisticsUtil.pm,v $
   4 # $Date: 2008/04/19 16:11:03 $
   5 # $Revision: 1.25 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 use 5.006;
  29 use strict;
  30 use Exporter;
  31 
  32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  33 
  34 $VERSION = '1.00';
  35 @ISA = qw(Exporter);
  36 @EXPORT = qw(Average AverageDeviation Covariance Correlation Factorial FactorialDivison GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode PearsonCorrelation Product Range RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN  StandardError Standardize StandardScores StandardScoresN TrimMean Variance VarianceN);
  37 @EXPORT_OK = qw();
  38 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  39 
  40 # Compute the mean of an array of numbers
  41 sub Average {
  42   my($XArrayRef) = @_;
  43   return Mean($XArrayRef);
  44 }
  45 
  46 # Compute the average of the absolute deviation of an array of numbers: SUM( ABS(x[i] - Xmean) ) / n
  47 sub AverageDeviation {
  48   my($XArrayRef) = @_;
  49 
  50   if (!@$XArrayRef) {
  51     return undef;
  52   }
  53   my($AverageDeviation, $Mean, $Value, $SumOfDeviations);
  54 
  55   $AverageDeviation = 0;
  56   $Mean = Mean($XArrayRef);
  57   foreach $Value (@$XArrayRef) {
  58     $SumOfDeviations += abs($Value - $Mean);
  59   }
  60   $AverageDeviation = $SumOfDeviations / @$XArrayRef;
  61 
  62   return $AverageDeviation;
  63 }
  64 
  65 # Compute correlation coefficient between two arrays of numbers
  66 sub Correlation {
  67   my($XArrayRef, $YArrayRef) = @_;
  68   return PearsonCorrelation($XArrayRef, $YArrayRef);
  69 }
  70 
  71 # Compute the covariance between two arrays of numbers: SUM( (x[i] - Xmean) (y[i] - Ymean) ) / n
  72 sub Covariance {
  73   my($XArrayRef, $YArrayRef) = @_;
  74 
  75   if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) {
  76     return undef;
  77   }
  78   my($Covariance, $XMean, $YMean, $Index, $ProductOfDeviations);
  79 
  80   $Covariance = 0;
  81   $XMean = Mean($XArrayRef);
  82   $YMean = Mean($YArrayRef);
  83   $ProductOfDeviations = 0;
  84   for $Index (0 .. $#{@$XArrayRef}) {
  85     $ProductOfDeviations += (($XArrayRef->[$Index] - $XMean) * ($YArrayRef->[$Index] - $YMean));
  86   }
  87   $Covariance = $ProductOfDeviations / @$XArrayRef;
  88   return $Covariance;
  89 }
  90 
  91 # Compute factorial of a number...
  92 sub Factorial {
  93   my($Num) = @_;
  94 
  95   return _Factorial($Num, 1);
  96 }
  97 
  98 # Perform factorial division of two numbers...
  99 sub FactorialDivison {
 100   my($Numerator, $Denominator) = @_;
 101 
 102   # Only works for integer numbers...
 103   if ($Numerator <= 0 || ($Numerator != int($Numerator)) ||
 104      $Denominator <= 0 || ($Denominator != int($Denominator)) ) {
 105     return undef;
 106   }
 107   my($LargerNum, $SmallerNum, $Result);
 108   $LargerNum = ($Numerator > $Denominator) ? $Numerator : $Denominator;
 109   $SmallerNum = ($Numerator < $Denominator) ? $Numerator : $Denominator;
 110 
 111   $Result = _Factorial($LargerNum, $SmallerNum);
 112   if ($Numerator < $Denominator) {
 113     $Result = 1/$Result;
 114   }
 115   return $Result;
 116 }
 117 
 118 # Calculate factorial of a number upto a specific limit...
 119 sub _Factorial {
 120   my($Num, $Limit) = @_;
 121 
 122   # Only works for integer numbers...
 123   if ($Num <= 0 || ($Num != int($Num)) || $Limit < 1) {
 124     return undef;
 125   }
 126 
 127   my($Result) = 1;
 128 
 129   while ($Num > $Limit) {
 130     $Result *= $Num;
 131     $Num--;
 132   }
 133   return $Result;
 134 }
 135 
 136 
 137 # Compute the frequency of occurance of values in an array of numbers. Three different
 138 # invocation methods are supported:
 139 #
 140 # Frequency(\@ArrayRef) : Using the smallest and largest values, group the numbers into
 141 # 10 bins.
 142 #
 143 # Frequency(\@ArrayRef, $NumOfBins) : Using the smallest and largest values, group the
 144 # numbers into specified bins.
 145 #
 146 # Frequency(\@ArrayRef, \@BinRange): Use bin range to goup the values into different bins.
 147 #
 148 # A hash array is returned with keys and values representing range and frequency values respectively.
 149 # The frequency value for a specific key corresponds to all the values which are greater than
 150 # the previous key and less than or equal to the current key. A key value representing maximum value is
 151 # added for generating frequency distribution for specific number of bins, and whenever the maximum
 152 # array value is greater than the maximum specified in bin range, it is also added to bin range.
 153 #
 154 sub Frequency {
 155   my($XArrayRef) = @_;
 156 
 157   if (!@$XArrayRef) {
 158     return undef;
 159   }
 160 
 161   my($BinRange, $NumOfBins, $BinRangeSpecified);
 162 
 163   $BinRangeSpecified = 0;
 164   $NumOfBins = 10;
 165   if (@_ == 2) {
 166     if (ref($_[1]) eq 'ARRAY') {
 167       $BinRange = $_[1];
 168       if (!(@$BinRange && (@$BinRange > 1))) {
 169 	return undef;
 170       }
 171       # Make sure the bin range contains values in increasing order...
 172       my($Index1, $Index2);
 173       for $Index1 (0 .. $#{@$BinRange}) {
 174 	for $Index2 (($Index1 + 1) .. $#{@$BinRange}) {
 175 	  if ($BinRange->[$Index1] >= $BinRange->[$Index2]) {
 176 	    return undef;
 177 	  }
 178 	}
 179       }
 180       $BinRangeSpecified = 1;
 181     }
 182     else {
 183       $NumOfBins = $_[1];
 184       if ($NumOfBins <= 1) {
 185 	return undef;
 186       }
 187     }
 188   }
 189 
 190   # Setup range keys...
 191   my(@RangeKeys);
 192   @RangeKeys = ();
 193 
 194   my($MinValue, $MaxValue) = Range($XArrayRef);
 195   if ($BinRangeSpecified) {
 196     push @RangeKeys, @$BinRange;
 197     if ($MaxValue > $RangeKeys[$#RangeKeys]) {
 198       push @RangeKeys, $MaxValue;
 199     }
 200   }
 201   else {
 202     my($MinValue, $MaxValue) = Range($XArrayRef);
 203     my($Interval) = ($MaxValue - $MinValue)/$NumOfBins;
 204     my($KeyValue) = $MinValue + $Interval;
 205     while ($KeyValue  < $MaxValue) {
 206       push @RangeKeys, $KeyValue;
 207       $KeyValue += $Interval;
 208     }
 209     push @RangeKeys, $MaxValue;
 210   }
 211 
 212   #Setup frequency hash array...
 213   my(%FrequencyMap);
 214   %FrequencyMap = ();
 215 
 216   %FrequencyMap = map { $_ => 0 } @RangeKeys;
 217 
 218   # Count values...
 219   my($Key, $Value);
 220 
 221   VALUE: for $Value (@$XArrayRef) {
 222       for $Key (@RangeKeys) {
 223 	if ($Value <= $Key) {
 224 	  $FrequencyMap{$Key} += 1;
 225 	  next VALUE;
 226 	}
 227       }
 228   }
 229   return (%FrequencyMap);
 230 }
 231 
 232 # Compute the geometric mean of an array of numbers: NthROOT( PRODUCT(x[i]) )
 233 sub GeometricMean {
 234   my($XArrayRef) = @_;
 235 
 236   if (!@$XArrayRef) {
 237     return undef;
 238   }
 239   my($Mean, $Product, $Value);
 240   $Product = 1;
 241   foreach $Value (@$XArrayRef) {
 242     if ($Value <= 0 ) {
 243       return undef;
 244     }
 245     $Product *= $Value;
 246   }
 247   $Mean = $Product ** (1 / @$XArrayRef);
 248   return $Mean;
 249 }
 250 
 251 # Compute the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i]) / n )
 252 sub HarmonicMean {
 253   my($XArrayRef) = @_;
 254 
 255   if (!@$XArrayRef) {
 256     return undef;
 257   }
 258   my($Mean, $Sum, $Value);
 259   $Sum = 0;
 260   foreach $Value (@$XArrayRef) {
 261     if ($Value <= 0 ) {
 262       return undef;
 263     }
 264     $Sum += 1/$Value;
 265   }
 266   $Mean = 1/($Sum/@$XArrayRef);
 267   return $Mean;
 268 }
 269 
 270 # Return the k-largest value from an array of numbers
 271 sub KLargest {
 272   my($XArrayRef, $K) = @_;
 273 
 274   if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) {
 275     return undef;
 276   }
 277   my($KLargest, @SortedXArray);
 278   @SortedXArray = sort { $b <=> $a } @$XArrayRef;
 279   $KLargest = $SortedXArray[$K - 1];
 280   return $KLargest;
 281 }
 282 
 283 # Return the k-smallest value from an array of numbers
 284 sub KSmallest {
 285   my($XArrayRef, $K) = @_;
 286 
 287   if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) {
 288     return undef;
 289   }
 290   my($KSmallest, @SortedXArray);
 291   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 292   $KSmallest = $SortedXArray[$K - 1];
 293   return $KSmallest;
 294 }
 295 
 296 # Compute the kurtosis of an array of numbers:
 297 # [ {n(n + 1)/(n - 1)(n - 2)(n - 3)}  SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n - 2)(n-3)}
 298 #
 299 sub Kurtosis {
 300   my($XArrayRef) = @_;
 301 
 302   if (!@$XArrayRef || ((@$XArrayRef - 3) <= 0)) {
 303     return undef;
 304   }
 305   my($Kurtosis, $Mean, $StandardDeviation, $Value);
 306   $Mean = Mean($XArrayRef);
 307   if (!defined $Mean) {
 308     return undef;
 309   }
 310   $StandardDeviation = StandardDeviation($XArrayRef);
 311   if (!(defined $StandardDeviation &&  $StandardDeviation != 0)) {
 312     return undef;
 313   }
 314 
 315   my($SumOfScores, $SampleSize);
 316   $SumOfScores = 0;
 317   for $Value (@$XArrayRef) {
 318     $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 4;
 319   }
 320   $SampleSize = @$XArrayRef;
 321   $Kurtosis = ((($SampleSize * ($SampleSize + 1))/(($SampleSize - 1) * ($SampleSize - 2) * ($SampleSize - 3))) * $SumOfScores) - ((3 * (($SampleSize - 1) ** 2))/(($SampleSize - 2) * ($SampleSize - 3)));
 322   return $Kurtosis;
 323 }
 324 
 325 # Return the smallest value from an array of numbers
 326 sub Minimum {
 327   my($XArrayRef) = @_;
 328   return KSmallest($XArrayRef, 1);
 329 }
 330 
 331 # Return the largest value from an array of numbers
 332 sub Maximum {
 333   my($XArrayRef) = @_;
 334   return KLargest($XArrayRef, 1);
 335 }
 336 
 337 # Compute the mean of an array of numbers: SUM( x[i] ) / n
 338 sub Mean {
 339   my($XArrayRef) = @_;
 340 
 341   if (!@$XArrayRef) {
 342     return undef;
 343   }
 344   my($Mean, $Sum, $Value);
 345   $Sum = 0;
 346   foreach $Value (@$XArrayRef) {
 347     $Sum += $Value;
 348   }
 349   $Mean = $Sum / @$XArrayRef;
 350   return $Mean;
 351 }
 352 
 353 # Compute the median value of an array of numbers. For an even number array, it's
 354 # the average of two middle values.
 355 #
 356 # For even values of n: Xsorted[(n - 1)/2 + 1]
 357 # For odd values of n: (Xsorted[n/2] + Xsorted[n/2 + 1])/2
 358 #
 359 sub Median {
 360   my($XArrayRef) = @_;
 361 
 362   if (!@$XArrayRef) {
 363     return undef;
 364   }
 365   my($Median, @SortedXArray);
 366   $Median = 0;
 367   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 368   if (@$XArrayRef % 2) {
 369     my($MidIndex);
 370     $MidIndex = int(@SortedXArray - 1)/2;
 371     $Median = $SortedXArray[$MidIndex];
 372   }
 373   else {
 374     # Even number array...
 375     my($MidPosition);
 376     $MidPosition = int(@SortedXArray / 2);
 377     $Median = ($SortedXArray[$MidPosition - 1] + $SortedXArray[$MidPosition]) / 2;
 378   }
 379   return $Median;
 380 }
 381 
 382 # Return the most frequently occuring value in an array of numbers
 383 sub Mode {
 384   my($XArrayRef) = @_;
 385 
 386   if (!@$XArrayRef) {
 387     return undef;
 388   }
 389   my($Value, %ValueToCountMap, @CountList, @SortedCountList);
 390   %ValueToCountMap = ();
 391   @CountList = ();
 392   @SortedCountList = ();
 393   for $Value (@$XArrayRef) {
 394     if (exists $ValueToCountMap{$Value}) {
 395       $ValueToCountMap{$Value} += 1;
 396     }
 397     else {
 398       $ValueToCountMap{$Value} = 1;
 399     }
 400   }
 401   for $Value (keys %ValueToCountMap) {
 402     push @CountList, $ValueToCountMap{$Value};
 403   }
 404   @SortedCountList = sort { $b <=> $a } @CountList;
 405 
 406   # Make sure the frequency of mode value is greater than one and check for
 407   # multiple modes as well...
 408   #
 409   my($ModeCount, $ModeValue);
 410   $ModeCount = $SortedCountList[0];
 411   if ($ModeCount <= 1) {
 412     return undef;
 413   }
 414   # Get the first mode value...
 415   VALUE: for $Value (keys %ValueToCountMap) {
 416     if ($ValueToCountMap{$Value} == $ModeCount) {
 417       $ModeValue = $Value;
 418       # Set it to zero to skip it next time...
 419       $ValueToCountMap{$Value} = 0;
 420       last VALUE;
 421     }
 422   }
 423 
 424   if (wantarray) {
 425     # Retrieve all the modes...
 426     my(@Modes, $Count);
 427     @Modes = ();
 428     push @Modes, $ModeValue;
 429     for $Count (@SortedCountList) {
 430       if ($Count == $ModeCount) {
 431       VALUE: for $Value (keys %ValueToCountMap) {
 432 	  if ($ValueToCountMap{$Value} == $ModeCount) {
 433 	    push @Modes, $Value;
 434 	    # Set it to zero to skip it next time...
 435 	    $ValueToCountMap{$Value} = 0;
 436 	    last VALUE;
 437 	  }
 438 	}
 439       }
 440     }
 441     return sort {$b <=> $a} @Modes;
 442   }
 443   else {
 444     return $ModeValue;
 445   }
 446 }
 447 
 448 
 449 # Compute the Pearson correlation coefficient between two arrays of numbers:
 450 #
 451 #  SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )(SUM( (y[i] - Ymean)^2 ))   )
 452 #
 453 # It returns values in the range from -1.0 to 1.0
 454 sub PearsonCorrelation {
 455   my($XArrayRef, $YArrayRef) = @_;
 456 
 457   if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) {
 458     return undef;
 459   }
 460   my($Correlation, $XMean, $YMean, $Index, $XValueDeviation, $YValueDeviation, $SquareOfXDeviations, $SquareOfYDeviations, $ProductOfDeviations);
 461 
 462   $Correlation = 0;
 463   $XMean = Mean($XArrayRef);
 464   $YMean = Mean($YArrayRef);
 465   $ProductOfDeviations = 0; $SquareOfXDeviations = 0; $SquareOfYDeviations = 0;
 466   for $Index (0 .. $#{@$XArrayRef}) {
 467     $XValueDeviation = $XArrayRef->[$Index] - $XMean;
 468     $YValueDeviation = $YArrayRef->[$Index] - $YMean;
 469     $ProductOfDeviations += ($XValueDeviation * $YValueDeviation);
 470     $SquareOfXDeviations += $XValueDeviation ** 2;
 471     $SquareOfYDeviations += $YValueDeviation ** 2;
 472   }
 473   $Correlation = $ProductOfDeviations / sqrt($SquareOfXDeviations *  $SquareOfYDeviations);
 474   return $Correlation;
 475 }
 476 
 477 # Return the smallest and largest values from an array of numbers
 478 sub Range {
 479   my($XArrayRef) = @_;
 480 
 481   if (!@$XArrayRef) {
 482     return (undef, undef);
 483   }
 484   my($Smallest, $Largest, @SortedXArray);
 485   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 486   $Smallest = $SortedXArray[0];
 487   $Largest = $SortedXArray[$#SortedXArray];
 488   return ($Smallest, $Largest);
 489 }
 490 
 491 # Compute square of the Pearson correlation coefficient between two arrays of numbers.
 492 #
 493 sub RSquare {
 494   my($XArrayRef, $YArrayRef) = @_;
 495   my($RSquare, $Correlation);
 496 
 497   $RSquare = undef;
 498   $Correlation = PearsonCorrelation($XArrayRef, $YArrayRef);
 499   if (defined $Correlation) {
 500     $RSquare = $Correlation ** 2;
 501   }
 502   return $RSquare;
 503 }
 504 
 505 # Compute the skewness of an array of numbers:
 506 #  {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 }
 507 #
 508 sub Skewness {
 509   my($XArrayRef) = @_;
 510 
 511   if (!@$XArrayRef || ((@$XArrayRef - 2) <= 0)) {
 512     return undef;
 513   }
 514   my($Skewness, $Mean, $StandardDeviation, $Value);
 515   $Mean = Mean($XArrayRef);
 516   if (!defined $Mean) {
 517     return undef;
 518   }
 519   $StandardDeviation = StandardDeviation($XArrayRef);
 520   if (!(defined $StandardDeviation &&  $StandardDeviation != 0)) {
 521     return undef;
 522   }
 523 
 524   my($SumOfScores, $SampleSize);
 525   $SumOfScores = 0;
 526   for $Value (@$XArrayRef) {
 527     $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 3;
 528   }
 529   $SampleSize = @$XArrayRef;
 530   $Skewness = ($SampleSize/(($SampleSize - 1) * ($SampleSize - 2) )) * $SumOfScores;
 531   return $Skewness;
 532 }
 533 
 534 # Compute the standard deviation of an array of numbers
 535 sub StandardDeviation {
 536   my($XArrayRef) = @_;
 537   return CalculateStandardDeviation($XArrayRef, 2);
 538 }
 539 
 540 # Compute the standard deviation of an array of numbers representing entire population
 541 sub StandardDeviationN {
 542   my($XArrayRef) = @_;
 543   return CalculateStandardDeviation($XArrayRef, 1);
 544 }
 545 
 546 # Compute the standard deviation of an array of numbers.
 547 # Mode 1: SQRT ( SUM( (x[i] - mean)^2 ) / n )
 548 # Mode 2: SQRT ( SUM( (x[i] - mean)^2 ) / (n - 1) )
 549 #
 550 sub CalculateStandardDeviation {
 551   my($XArrayRef, $Mode) = @_;
 552 
 553   if (!@$XArrayRef) {
 554     return undef;
 555   }
 556   my($StandardDeviation, $Value, $SquareOfDeviations, $Mean, $N);
 557 
 558   $StandardDeviation = 0;
 559   $Mean = Mean($XArrayRef);
 560   $SquareOfDeviations = 0;
 561   foreach $Value (@$XArrayRef) {
 562     $SquareOfDeviations += ($Value - $Mean) ** 2;
 563   }
 564   $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1);
 565   $StandardDeviation =  sqrt($SquareOfDeviations / $N);
 566 
 567   return $StandardDeviation;
 568 }
 569 
 570 # Compute the standard error using standard deviation and sample size
 571 sub StandardError {
 572   my($StandardDeviation, $Count) = @_;
 573 
 574   if ($Count <= 0) {
 575     return undef;
 576   }
 577   my($StandardError);
 578   $StandardError = $StandardDeviation / sqrt($Count);
 579 
 580   return $StandardError;
 581 }
 582 
 583 # Standardize the value using mean and standard deviation
 584 sub Standardize {
 585   my($Value, $Mean, $StandardDeviation) = @_;
 586 
 587   if ($StandardDeviation <= 0) {
 588     return undef;
 589   }
 590   my($StandardizedValue);
 591   $StandardizedValue = ($Value - $Mean)/$StandardDeviation;
 592 
 593   return $StandardizedValue;
 594 }
 595 
 596 # Compute the standard deviation above the mean for an array of numbers.
 597 sub StandardScores {
 598   my($XArrayRef) = @_;
 599   return CalculateStandardScores($XArrayRef, 2);
 600 }
 601 
 602 # Compute the standard deviation above the mean for an array of numbers representing entire population
 603 sub StandardScoresN {
 604   my($XArrayRef) = @_;
 605   return CalculateStandardScores($XArrayRef, 1);
 606 }
 607 
 608 # Compute the standard deviation above the mean for an array of numbers.
 609 # Mode 1:  (x[i] - mean) / n
 610 # Mode 2:  (x[i] - mean) / (n - 1)
 611 #
 612 sub CalculateStandardScores {
 613   my($XArrayRef, $Mode) = @_;
 614 
 615   if (!@$XArrayRef) {
 616     return undef;
 617   }
 618   my(@StandardScores, $Mean, $StandardDeviation, $Value);
 619 
 620   $Mean = Mean($XArrayRef);
 621   $StandardDeviation = CalculateStandardDeviation($XArrayRef, $Mode);
 622   if (!(defined($StandardDeviation) && $StandardDeviation > 0)) {
 623     return undef;
 624   }
 625   @StandardScores = ();
 626   for $Value (@$XArrayRef) {
 627     push @StandardScores, ($Value - $Mean)/$StandardDeviation;
 628   }
 629 
 630   return @StandardScores;
 631 }
 632 
 633 # Compute the product of an array of numbers
 634 sub Product {
 635   my($XArrayRef) = @_;
 636 
 637   if (!@$XArrayRef) {
 638     return undef;
 639   }
 640   my($Product, $Value);
 641   $Product = 1;
 642   foreach $Value (@$XArrayRef) {
 643     $Product *= $Value;
 644   }
 645   return $Product;
 646 }
 647 
 648 # Compute the sum of an array of numbers
 649 sub Sum {
 650   my($XArrayRef) = @_;
 651 
 652   if (!@$XArrayRef) {
 653     return undef;
 654   }
 655   my($Sum, $Value);
 656   $Sum = 0;
 657   foreach $Value (@$XArrayRef) {
 658     $Sum += $Value;
 659   }
 660   return $Sum;
 661 }
 662 
 663 # Compute the sum of squares of an array of numbers
 664 sub SumOfSquares {
 665   my($XArrayRef) = @_;
 666 
 667   if (!@$XArrayRef) {
 668     return undef;
 669   }
 670   my($SumOfSquares, $Value);
 671   $SumOfSquares = 0;
 672   foreach $Value (@$XArrayRef) {
 673     $SumOfSquares += $Value ** 2;
 674   }
 675   return $SumOfSquares;
 676 }
 677 
 678 # Compute the mean of an array of numbers by excluding a fraction of
 679 # numbers from the top and bottom of the data set.
 680 sub TrimMean {
 681   my($XArrayRef, $FractionToExclude) = @_;
 682 
 683   if (!(@$XArrayRef && $FractionToExclude > 0 && $FractionToExclude <= 1)) {
 684     return undef;
 685   }
 686   my($NumberToExclude);
 687   $NumberToExclude = int(@$XArrayRef * $FractionToExclude);
 688   $NumberToExclude = ($NumberToExclude % 2) ? ($NumberToExclude - 1) : $NumberToExclude;
 689   if ($NumberToExclude == @$XArrayRef) {
 690     return undef;
 691   }
 692   my($Mean, $Sum, $Index, $FirstIndex, $LastIndex);
 693   $FirstIndex = $NumberToExclude/2;
 694   $LastIndex = @$XArrayRef - ($NumberToExclude/2) - 1;
 695   $Sum = 0;
 696   my(@SortedXArray);
 697   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 698   for $Index ($FirstIndex .. $LastIndex) {
 699     $Sum += $SortedXArray[$Index];
 700   }
 701   $Mean = $Sum/(@SortedXArray - $NumberToExclude);
 702   return $Mean;
 703 }
 704 
 705 # Compute the variance of an array of numbers
 706 sub Variance {
 707   my($XArrayRef) = @_;
 708   return CalculateVariance($XArrayRef, 2);
 709 }
 710 
 711 # Compute the variance of an array of numbers representing entire population
 712 sub VarianceN {
 713   my($XArrayRef) = @_;
 714   return CalculateVariance($XArrayRef, 1);
 715 }
 716 
 717 # Compute the variance of an array of numbers:
 718 # Mode 1: SUM( (x[i] - Xmean)^2  / n )
 719 # Mode 2: SUM( (x[i] - Xmean)^2  / (n - 1) )
 720 #
 721 sub CalculateVariance {
 722   my($XArrayRef, $Mode) = @_;
 723 
 724   if (!@$XArrayRef) {
 725     return undef;
 726   }
 727   my($Variance, $Value, $SquareOfDeviations, $Mean, $N);
 728 
 729   $Variance = 0;
 730   $Mean = Mean($XArrayRef);
 731   $SquareOfDeviations = 0;
 732   foreach $Value (@$XArrayRef) {
 733     $SquareOfDeviations += ($Value - $Mean) ** 2;
 734   }
 735   $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1);
 736   $Variance =  $SquareOfDeviations / $N;
 737 
 738   return $Variance;
 739 }
 740