MayaChemTools

   1 package StatisticsUtil;
   2 #
   3 # $RCSfile: StatisticsUtil.pm,v $
   4 # $Date: 2011/12/16 00:04:12 $
   5 # $Revision: 1.34 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2004-2012 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Exporter;
  31 
  32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  33 
  34 @ISA = qw(Exporter);
  35 @EXPORT = qw(Average AverageDeviation Covariance Correlation Euclidean Factorial FactorialDivison GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode PearsonCorrelation Product Range RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN  StandardError Standardize StandardScores StandardScoresN TrimMean Variance VarianceN);
  36 @EXPORT_OK = qw();
  37 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  38 
  39 # Compute the mean of an array of numbers
  40 sub Average {
  41   my($XArrayRef) = @_;
  42   return Mean($XArrayRef);
  43 }
  44 
  45 # Compute the average of the absolute deviation of an array of numbers: SUM( ABS(x[i] - Xmean) ) / n
  46 sub AverageDeviation {
  47   my($XArrayRef) = @_;
  48 
  49   if (!@$XArrayRef) {
  50     return undef;
  51   }
  52   my($AverageDeviation, $Mean, $Value, $SumOfDeviations);
  53 
  54   $AverageDeviation = 0;
  55   $Mean = Mean($XArrayRef);
  56   foreach $Value (@$XArrayRef) {
  57     $SumOfDeviations += abs($Value - $Mean);
  58   }
  59   $AverageDeviation = $SumOfDeviations / @$XArrayRef;
  60 
  61   return $AverageDeviation;
  62 }
  63 
  64 # Compute correlation coefficient between two arrays of numbers
  65 sub Correlation {
  66   my($XArrayRef, $YArrayRef) = @_;
  67   return PearsonCorrelation($XArrayRef, $YArrayRef);
  68 }
  69 
  70 # Compute the covariance between two arrays of numbers: SUM( (x[i] - Xmean) (y[i] - Ymean) ) / n
  71 sub Covariance {
  72   my($XArrayRef, $YArrayRef) = @_;
  73 
  74   if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) {
  75     return undef;
  76   }
  77   my($Covariance, $XMean, $YMean, $Index, $ProductOfDeviations);
  78 
  79   $Covariance = 0;
  80   $XMean = Mean($XArrayRef);
  81   $YMean = Mean($YArrayRef);
  82   $ProductOfDeviations = 0;
  83   for $Index (0 .. $#{@$XArrayRef}) {
  84     $ProductOfDeviations += (($XArrayRef->[$Index] - $XMean) * ($YArrayRef->[$Index] - $YMean));
  85   }
  86   $Covariance = $ProductOfDeviations / @$XArrayRef;
  87   return $Covariance;
  88 }
  89 
  90 # Compute the euclidean distance of an array of numbers: SQRT( SUM( x[i] ** 2) )
  91 sub Euclidean {
  92   my($XArrayRef) = @_;
  93 
  94   if (!@$XArrayRef) {
  95     return undef;
  96   }
  97   my($SumOfSquares);
  98 
  99   $SumOfSquares = SumOfSquares($XArrayRef);
 100 
 101   return sqrt $SumOfSquares;
 102 }
 103 
 104 # Compute factorial of a number...
 105 sub Factorial {
 106   my($Num) = @_;
 107 
 108   return _Factorial($Num, 1);
 109 }
 110 
 111 # Perform factorial division of two numbers...
 112 sub FactorialDivison {
 113   my($Numerator, $Denominator) = @_;
 114 
 115   # Only works for integer numbers...
 116   if ($Numerator <= 0 || ($Numerator != int($Numerator)) ||
 117      $Denominator <= 0 || ($Denominator != int($Denominator)) ) {
 118     return undef;
 119   }
 120   my($LargerNum, $SmallerNum, $Result);
 121   $LargerNum = ($Numerator > $Denominator) ? $Numerator : $Denominator;
 122   $SmallerNum = ($Numerator < $Denominator) ? $Numerator : $Denominator;
 123 
 124   $Result = _Factorial($LargerNum, $SmallerNum);
 125   if ($Numerator < $Denominator) {
 126     $Result = 1/$Result;
 127   }
 128   return $Result;
 129 }
 130 
 131 # Calculate factorial of a number upto a specific limit...
 132 sub _Factorial {
 133   my($Num, $Limit) = @_;
 134 
 135   # Only works for integer numbers...
 136   if ($Num <= 0 || ($Num != int($Num)) || $Limit < 1) {
 137     return undef;
 138   }
 139 
 140   my($Result) = 1;
 141 
 142   while ($Num > $Limit) {
 143     $Result *= $Num;
 144     $Num--;
 145   }
 146   return $Result;
 147 }
 148 
 149 
 150 # Compute the frequency of occurance of values in an array of numbers. Three different
 151 # invocation methods are supported:
 152 #
 153 # Frequency(\@ArrayRef) : Using the smallest and largest values, group the numbers into
 154 # 10 bins.
 155 #
 156 # Frequency(\@ArrayRef, $NumOfBins) : Using the smallest and largest values, group the
 157 # numbers into specified bins.
 158 #
 159 # Frequency(\@ArrayRef, \@BinRange): Use bin range to goup the values into different bins.
 160 #
 161 # A hash array is returned with keys and values representing range and frequency values respectively.
 162 # The frequency value for a specific key corresponds to all the values which are greater than
 163 # the previous key and less than or equal to the current key. A key value representing maximum value is
 164 # added for generating frequency distribution for specific number of bins, and whenever the maximum
 165 # array value is greater than the maximum specified in bin range, it is also added to bin range.
 166 #
 167 sub Frequency {
 168   my($XArrayRef) = @_;
 169 
 170   if (!@$XArrayRef) {
 171     return undef;
 172   }
 173 
 174   my($BinRange, $NumOfBins, $BinRangeSpecified);
 175 
 176   $BinRangeSpecified = 0;
 177   $NumOfBins = 10;
 178   if (@_ == 2) {
 179     if (ref($_[1]) eq 'ARRAY') {
 180       $BinRange = $_[1];
 181       if (!(@$BinRange && (@$BinRange > 1))) {
 182         return undef;
 183       }
 184       # Make sure the bin range contains values in increasing order...
 185       my($Index1, $Index2);
 186       for $Index1 (0 .. $#{@$BinRange}) {
 187         for $Index2 (($Index1 + 1) .. $#{@$BinRange}) {
 188           if ($BinRange->[$Index1] >= $BinRange->[$Index2]) {
 189             return undef;
 190           }
 191         }
 192       }
 193       $BinRangeSpecified = 1;
 194     }
 195     else {
 196       $NumOfBins = $_[1];
 197       if ($NumOfBins <= 1) {
 198         return undef;
 199       }
 200     }
 201   }
 202 
 203   # Setup range keys...
 204   my(@RangeKeys);
 205   @RangeKeys = ();
 206 
 207   my($MinValue, $MaxValue) = Range($XArrayRef);
 208   if ($BinRangeSpecified) {
 209     push @RangeKeys, @$BinRange;
 210     if ($MaxValue > $RangeKeys[$#RangeKeys]) {
 211       push @RangeKeys, $MaxValue;
 212     }
 213   }
 214   else {
 215     my($MinValue, $MaxValue) = Range($XArrayRef);
 216     my($Interval) = ($MaxValue - $MinValue)/$NumOfBins;
 217     my($KeyValue) = $MinValue + $Interval;
 218     while ($KeyValue  < $MaxValue) {
 219       push @RangeKeys, $KeyValue;
 220       $KeyValue += $Interval;
 221     }
 222     push @RangeKeys, $MaxValue;
 223   }
 224 
 225   #Setup frequency hash array...
 226   my(%FrequencyMap);
 227   %FrequencyMap = ();
 228 
 229   %FrequencyMap = map { $_ => 0 } @RangeKeys;
 230 
 231   # Count values...
 232   my($Key, $Value);
 233 
 234   VALUE: for $Value (@$XArrayRef) {
 235       for $Key (@RangeKeys) {
 236         if ($Value <= $Key) {
 237           $FrequencyMap{$Key} += 1;
 238           next VALUE;
 239         }
 240       }
 241   }
 242   return (%FrequencyMap);
 243 }
 244 
 245 # Compute the geometric mean of an array of numbers: NthROOT( PRODUCT(x[i]) )
 246 sub GeometricMean {
 247   my($XArrayRef) = @_;
 248 
 249   if (!@$XArrayRef) {
 250     return undef;
 251   }
 252   my($Mean, $Product, $Value);
 253   $Product = 1;
 254   foreach $Value (@$XArrayRef) {
 255     if ($Value <= 0 ) {
 256       return undef;
 257     }
 258     $Product *= $Value;
 259   }
 260   $Mean = $Product ** (1 / @$XArrayRef);
 261   return $Mean;
 262 }
 263 
 264 # Compute the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i]) / n )
 265 sub HarmonicMean {
 266   my($XArrayRef) = @_;
 267 
 268   if (!@$XArrayRef) {
 269     return undef;
 270   }
 271   my($Mean, $Sum, $Value);
 272   $Sum = 0;
 273   foreach $Value (@$XArrayRef) {
 274     if ($Value <= 0 ) {
 275       return undef;
 276     }
 277     $Sum += 1/$Value;
 278   }
 279   $Mean = 1/($Sum/@$XArrayRef);
 280   return $Mean;
 281 }
 282 
 283 # Return the k-largest value from an array of numbers
 284 sub KLargest {
 285   my($XArrayRef, $K) = @_;
 286 
 287   if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) {
 288     return undef;
 289   }
 290   my($KLargest, @SortedXArray);
 291   @SortedXArray = sort { $b <=> $a } @$XArrayRef;
 292   $KLargest = $SortedXArray[$K - 1];
 293   return $KLargest;
 294 }
 295 
 296 # Return the k-smallest value from an array of numbers
 297 sub KSmallest {
 298   my($XArrayRef, $K) = @_;
 299 
 300   if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) {
 301     return undef;
 302   }
 303   my($KSmallest, @SortedXArray);
 304   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 305   $KSmallest = $SortedXArray[$K - 1];
 306   return $KSmallest;
 307 }
 308 
 309 # Compute the kurtosis of an array of numbers:
 310 # [ {n(n + 1)/(n - 1)(n - 2)(n - 3)}  SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n - 2)(n-3)}
 311 #
 312 sub Kurtosis {
 313   my($XArrayRef) = @_;
 314 
 315   if (!@$XArrayRef || ((@$XArrayRef - 3) <= 0)) {
 316     return undef;
 317   }
 318   my($Kurtosis, $Mean, $StandardDeviation, $Value);
 319   $Mean = Mean($XArrayRef);
 320   if (!defined $Mean) {
 321     return undef;
 322   }
 323   $StandardDeviation = StandardDeviation($XArrayRef);
 324   if (!(defined $StandardDeviation &&  $StandardDeviation != 0)) {
 325     return undef;
 326   }
 327 
 328   my($SumOfScores, $SampleSize);
 329   $SumOfScores = 0;
 330   for $Value (@$XArrayRef) {
 331     $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 4;
 332   }
 333   $SampleSize = @$XArrayRef;
 334   $Kurtosis = ((($SampleSize * ($SampleSize + 1))/(($SampleSize - 1) * ($SampleSize - 2) * ($SampleSize - 3))) * $SumOfScores) - ((3 * (($SampleSize - 1) ** 2))/(($SampleSize - 2) * ($SampleSize - 3)));
 335   return $Kurtosis;
 336 }
 337 
 338 # Return the smallest value from an array of numbers
 339 sub Minimum {
 340   my($XArrayRef) = @_;
 341   return KSmallest($XArrayRef, 1);
 342 }
 343 
 344 # Return the largest value from an array of numbers
 345 sub Maximum {
 346   my($XArrayRef) = @_;
 347   return KLargest($XArrayRef, 1);
 348 }
 349 
 350 # Compute the mean of an array of numbers: SUM( x[i] ) / n
 351 sub Mean {
 352   my($XArrayRef) = @_;
 353 
 354   if (!@$XArrayRef) {
 355     return undef;
 356   }
 357   my($Mean, $Sum, $Value);
 358   $Sum = 0;
 359   foreach $Value (@$XArrayRef) {
 360     $Sum += $Value;
 361   }
 362   $Mean = $Sum / @$XArrayRef;
 363   return $Mean;
 364 }
 365 
 366 # Compute the median value of an array of numbers. For an even number array, it's
 367 # the average of two middle values.
 368 #
 369 # For even values of n: Xsorted[(n - 1)/2 + 1]
 370 # For odd values of n: (Xsorted[n/2] + Xsorted[n/2 + 1])/2
 371 #
 372 sub Median {
 373   my($XArrayRef) = @_;
 374 
 375   if (!@$XArrayRef) {
 376     return undef;
 377   }
 378   my($Median, @SortedXArray);
 379   $Median = 0;
 380   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 381   if (@$XArrayRef % 2) {
 382     my($MidIndex);
 383     $MidIndex = int(@SortedXArray - 1)/2;
 384     $Median = $SortedXArray[$MidIndex];
 385   }
 386   else {
 387     # Even number array...
 388     my($MidPosition);
 389     $MidPosition = int(@SortedXArray / 2);
 390     $Median = ($SortedXArray[$MidPosition - 1] + $SortedXArray[$MidPosition]) / 2;
 391   }
 392   return $Median;
 393 }
 394 
 395 # Return the most frequently occuring value in an array of numbers
 396 sub Mode {
 397   my($XArrayRef) = @_;
 398 
 399   if (!@$XArrayRef) {
 400     return undef;
 401   }
 402   my($Value, %ValueToCountMap, @CountList, @SortedCountList);
 403   %ValueToCountMap = ();
 404   @CountList = ();
 405   @SortedCountList = ();
 406   for $Value (@$XArrayRef) {
 407     if (exists $ValueToCountMap{$Value}) {
 408       $ValueToCountMap{$Value} += 1;
 409     }
 410     else {
 411       $ValueToCountMap{$Value} = 1;
 412     }
 413   }
 414   for $Value (keys %ValueToCountMap) {
 415     push @CountList, $ValueToCountMap{$Value};
 416   }
 417   @SortedCountList = sort { $b <=> $a } @CountList;
 418 
 419   # Make sure the frequency of mode value is greater than one and check for
 420   # multiple modes as well...
 421   #
 422   my($ModeCount, $ModeValue);
 423   $ModeCount = $SortedCountList[0];
 424   if ($ModeCount <= 1) {
 425     return undef;
 426   }
 427   # Get the first mode value...
 428   VALUE: for $Value (keys %ValueToCountMap) {
 429     if ($ValueToCountMap{$Value} == $ModeCount) {
 430       $ModeValue = $Value;
 431       # Set it to zero to skip it next time...
 432       $ValueToCountMap{$Value} = 0;
 433       last VALUE;
 434     }
 435   }
 436 
 437   if (wantarray) {
 438     # Retrieve all the modes...
 439     my(@Modes, $Count);
 440     @Modes = ();
 441     push @Modes, $ModeValue;
 442     for $Count (@SortedCountList) {
 443       if ($Count == $ModeCount) {
 444       VALUE: for $Value (keys %ValueToCountMap) {
 445           if ($ValueToCountMap{$Value} == $ModeCount) {
 446             push @Modes, $Value;
 447             # Set it to zero to skip it next time...
 448             $ValueToCountMap{$Value} = 0;
 449             last VALUE;
 450           }
 451         }
 452       }
 453     }
 454     return sort {$b <=> $a} @Modes;
 455   }
 456   else {
 457     return $ModeValue;
 458   }
 459 }
 460 
 461 
 462 # Compute the Pearson correlation coefficient between two arrays of numbers:
 463 #
 464 #  SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )(SUM( (y[i] - Ymean)^2 ))   )
 465 #
 466 # It returns values in the range from -1.0 to 1.0
 467 sub PearsonCorrelation {
 468   my($XArrayRef, $YArrayRef) = @_;
 469 
 470   if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) {
 471     return undef;
 472   }
 473   my($Correlation, $XMean, $YMean, $Index, $XValueDeviation, $YValueDeviation, $SquareOfXDeviations, $SquareOfYDeviations, $ProductOfDeviations);
 474 
 475   $Correlation = 0;
 476   $XMean = Mean($XArrayRef);
 477   $YMean = Mean($YArrayRef);
 478   $ProductOfDeviations = 0; $SquareOfXDeviations = 0; $SquareOfYDeviations = 0;
 479   for $Index (0 .. $#{@$XArrayRef}) {
 480     $XValueDeviation = $XArrayRef->[$Index] - $XMean;
 481     $YValueDeviation = $YArrayRef->[$Index] - $YMean;
 482     $ProductOfDeviations += ($XValueDeviation * $YValueDeviation);
 483     $SquareOfXDeviations += $XValueDeviation ** 2;
 484     $SquareOfYDeviations += $YValueDeviation ** 2;
 485   }
 486   $Correlation = $ProductOfDeviations / sqrt($SquareOfXDeviations *  $SquareOfYDeviations);
 487   return $Correlation;
 488 }
 489 
 490 # Return the smallest and largest values from an array of numbers
 491 sub Range {
 492   my($XArrayRef) = @_;
 493 
 494   if (!@$XArrayRef) {
 495     return (undef, undef);
 496   }
 497   my($Smallest, $Largest, @SortedXArray);
 498   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 499   $Smallest = $SortedXArray[0];
 500   $Largest = $SortedXArray[$#SortedXArray];
 501   return ($Smallest, $Largest);
 502 }
 503 
 504 # Compute square of the Pearson correlation coefficient between two arrays of numbers.
 505 #
 506 sub RSquare {
 507   my($XArrayRef, $YArrayRef) = @_;
 508   my($RSquare, $Correlation);
 509 
 510   $RSquare = undef;
 511   $Correlation = PearsonCorrelation($XArrayRef, $YArrayRef);
 512   if (defined $Correlation) {
 513     $RSquare = $Correlation ** 2;
 514   }
 515   return $RSquare;
 516 }
 517 
 518 # Compute the skewness of an array of numbers:
 519 #  {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 }
 520 #
 521 sub Skewness {
 522   my($XArrayRef) = @_;
 523 
 524   if (!@$XArrayRef || ((@$XArrayRef - 2) <= 0)) {
 525     return undef;
 526   }
 527   my($Skewness, $Mean, $StandardDeviation, $Value);
 528   $Mean = Mean($XArrayRef);
 529   if (!defined $Mean) {
 530     return undef;
 531   }
 532   $StandardDeviation = StandardDeviation($XArrayRef);
 533   if (!(defined $StandardDeviation &&  $StandardDeviation != 0)) {
 534     return undef;
 535   }
 536 
 537   my($SumOfScores, $SampleSize);
 538   $SumOfScores = 0;
 539   for $Value (@$XArrayRef) {
 540     $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 3;
 541   }
 542   $SampleSize = @$XArrayRef;
 543   $Skewness = ($SampleSize/(($SampleSize - 1) * ($SampleSize - 2) )) * $SumOfScores;
 544   return $Skewness;
 545 }
 546 
 547 # Compute the standard deviation of an array of numbers
 548 sub StandardDeviation {
 549   my($XArrayRef) = @_;
 550   return _CalculateStandardDeviation($XArrayRef, 2);
 551 }
 552 
 553 # Compute the standard deviation of an array of numbers representing entire population
 554 sub StandardDeviationN {
 555   my($XArrayRef) = @_;
 556   return _CalculateStandardDeviation($XArrayRef, 1);
 557 }
 558 
 559 # Compute the standard deviation of an array of numbers.
 560 # Mode 1: SQRT ( SUM( (x[i] - mean)^2 ) / n )
 561 # Mode 2: SQRT ( SUM( (x[i] - mean)^2 ) / (n - 1) )
 562 #
 563 sub _CalculateStandardDeviation {
 564   my($XArrayRef, $Mode) = @_;
 565 
 566   if (!@$XArrayRef) {
 567     return undef;
 568   }
 569   my($StandardDeviation, $Value, $SquareOfDeviations, $Mean, $N);
 570 
 571   $StandardDeviation = 0;
 572   $Mean = Mean($XArrayRef);
 573   $SquareOfDeviations = 0;
 574   foreach $Value (@$XArrayRef) {
 575     $SquareOfDeviations += ($Value - $Mean) ** 2;
 576   }
 577   $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1);
 578   $StandardDeviation =  sqrt($SquareOfDeviations / $N);
 579 
 580   return $StandardDeviation;
 581 }
 582 
 583 # Compute the standard error using standard deviation and sample size
 584 sub StandardError {
 585   my($StandardDeviation, $Count) = @_;
 586 
 587   if ($Count <= 0) {
 588     return undef;
 589   }
 590   my($StandardError);
 591   $StandardError = $StandardDeviation / sqrt($Count);
 592 
 593   return $StandardError;
 594 }
 595 
 596 # Standardize the value using mean and standard deviation
 597 sub Standardize {
 598   my($Value, $Mean, $StandardDeviation) = @_;
 599 
 600   if ($StandardDeviation <= 0) {
 601     return undef;
 602   }
 603   my($StandardizedValue);
 604   $StandardizedValue = ($Value - $Mean)/$StandardDeviation;
 605 
 606   return $StandardizedValue;
 607 }
 608 
 609 # Compute the standard deviation above the mean for an array of numbers.
 610 sub StandardScores {
 611   my($XArrayRef) = @_;
 612   return _CalculateStandardScores($XArrayRef, 2);
 613 }
 614 
 615 # Compute the standard deviation above the mean for an array of numbers representing entire population
 616 sub StandardScoresN {
 617   my($XArrayRef) = @_;
 618   return _CalculateStandardScores($XArrayRef, 1);
 619 }
 620 
 621 # Compute the standard deviation above the mean for an array of numbers.
 622 # Mode 1:  (x[i] - mean) / n
 623 # Mode 2:  (x[i] - mean) / (n - 1)
 624 #
 625 sub _CalculateStandardScores {
 626   my($XArrayRef, $Mode) = @_;
 627 
 628   if (!@$XArrayRef) {
 629     return undef;
 630   }
 631   my(@StandardScores, $Mean, $StandardDeviation, $Value);
 632 
 633   $Mean = Mean($XArrayRef);
 634   $StandardDeviation = _CalculateStandardDeviation($XArrayRef, $Mode);
 635   if (!(defined($StandardDeviation) && $StandardDeviation > 0)) {
 636     return undef;
 637   }
 638   @StandardScores = ();
 639   for $Value (@$XArrayRef) {
 640     push @StandardScores, ($Value - $Mean)/$StandardDeviation;
 641   }
 642 
 643   return @StandardScores;
 644 }
 645 
 646 # Compute the product of an array of numbers
 647 sub Product {
 648   my($XArrayRef) = @_;
 649 
 650   if (!@$XArrayRef) {
 651     return undef;
 652   }
 653   my($Product, $Value);
 654   $Product = 1;
 655   foreach $Value (@$XArrayRef) {
 656     $Product *= $Value;
 657   }
 658   return $Product;
 659 }
 660 
 661 # Compute the sum of an array of numbers
 662 sub Sum {
 663   my($XArrayRef) = @_;
 664 
 665   if (!@$XArrayRef) {
 666     return undef;
 667   }
 668   my($Sum, $Value);
 669   $Sum = 0;
 670   foreach $Value (@$XArrayRef) {
 671     $Sum += $Value;
 672   }
 673   return $Sum;
 674 }
 675 
 676 # Compute the sum of squares of an array of numbers
 677 sub SumOfSquares {
 678   my($XArrayRef) = @_;
 679 
 680   if (!@$XArrayRef) {
 681     return undef;
 682   }
 683   my($SumOfSquares, $Value);
 684   $SumOfSquares = 0;
 685   foreach $Value (@$XArrayRef) {
 686     $SumOfSquares += $Value ** 2;
 687   }
 688   return $SumOfSquares;
 689 }
 690 
 691 # Compute the mean of an array of numbers by excluding a fraction of
 692 # numbers from the top and bottom of the data set.
 693 sub TrimMean {
 694   my($XArrayRef, $FractionToExclude) = @_;
 695 
 696   if (!(@$XArrayRef && $FractionToExclude > 0 && $FractionToExclude <= 1)) {
 697     return undef;
 698   }
 699   my($NumberToExclude);
 700   $NumberToExclude = int(@$XArrayRef * $FractionToExclude);
 701   $NumberToExclude = ($NumberToExclude % 2) ? ($NumberToExclude - 1) : $NumberToExclude;
 702   if ($NumberToExclude == @$XArrayRef) {
 703     return undef;
 704   }
 705   my($Mean, $Sum, $Index, $FirstIndex, $LastIndex);
 706   $FirstIndex = $NumberToExclude/2;
 707   $LastIndex = @$XArrayRef - ($NumberToExclude/2) - 1;
 708   $Sum = 0;
 709   my(@SortedXArray);
 710   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 711   for $Index ($FirstIndex .. $LastIndex) {
 712     $Sum += $SortedXArray[$Index];
 713   }
 714   $Mean = $Sum/(@SortedXArray - $NumberToExclude);
 715   return $Mean;
 716 }
 717 
 718 # Compute the variance of an array of numbers
 719 sub Variance {
 720   my($XArrayRef) = @_;
 721   return _CalculateVariance($XArrayRef, 2);
 722 }
 723 
 724 # Compute the variance of an array of numbers representing entire population
 725 sub VarianceN {
 726   my($XArrayRef) = @_;
 727   return _CalculateVariance($XArrayRef, 1);
 728 }
 729 
 730 # Compute the variance of an array of numbers:
 731 # Mode 1: SUM( (x[i] - Xmean)^2  / n )
 732 # Mode 2: SUM( (x[i] - Xmean)^2  / (n - 1) )
 733 #
 734 sub _CalculateVariance {
 735   my($XArrayRef, $Mode) = @_;
 736 
 737   if (!@$XArrayRef) {
 738     return undef;
 739   }
 740   my($Variance, $Value, $SquareOfDeviations, $Mean, $N);
 741 
 742   $Variance = 0;
 743   $Mean = Mean($XArrayRef);
 744   $SquareOfDeviations = 0;
 745   foreach $Value (@$XArrayRef) {
 746     $SquareOfDeviations += ($Value - $Mean) ** 2;
 747   }
 748   $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1);
 749   $Variance =  $SquareOfDeviations / $N;
 750 
 751   return $Variance;
 752 }
 753