1 package StatisticsUtil; 2 # 3 # $RCSfile: StatisticsUtil.pm,v $ 4 # $Date: 2008/04/19 16:11:03 $ 5 # $Revision: 1.25 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2008 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 use 5.006; 29 use strict; 30 use Exporter; 31 32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 33 34 $VERSION = '1.00'; 35 @ISA = qw(Exporter); 36 @EXPORT = qw(Average AverageDeviation Covariance Correlation Factorial FactorialDivison GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode PearsonCorrelation Product Range RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN StandardError Standardize StandardScores StandardScoresN TrimMean Variance VarianceN); 37 @EXPORT_OK = qw(); 38 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 39 40 # Compute the mean of an array of numbers 41 sub Average { 42 my($XArrayRef) = @_; 43 return Mean($XArrayRef); 44 } 45 46 # Compute the average of the absolute deviation of an array of numbers: SUM( ABS(x[i] - Xmean) ) / n 47 sub AverageDeviation { 48 my($XArrayRef) = @_; 49 50 if (!@$XArrayRef) { 51 return undef; 52 } 53 my($AverageDeviation, $Mean, $Value, $SumOfDeviations); 54 55 $AverageDeviation = 0; 56 $Mean = Mean($XArrayRef); 57 foreach $Value (@$XArrayRef) { 58 $SumOfDeviations += abs($Value - $Mean); 59 } 60 $AverageDeviation = $SumOfDeviations / @$XArrayRef; 61 62 return $AverageDeviation; 63 } 64 65 # Compute correlation coefficient between two arrays of numbers 66 sub Correlation { 67 my($XArrayRef, $YArrayRef) = @_; 68 return PearsonCorrelation($XArrayRef, $YArrayRef); 69 } 70 71 # Compute the covariance between two arrays of numbers: SUM( (x[i] - Xmean) (y[i] - Ymean) ) / n 72 sub Covariance { 73 my($XArrayRef, $YArrayRef) = @_; 74 75 if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) { 76 return undef; 77 } 78 my($Covariance, $XMean, $YMean, $Index, $ProductOfDeviations); 79 80 $Covariance = 0; 81 $XMean = Mean($XArrayRef); 82 $YMean = Mean($YArrayRef); 83 $ProductOfDeviations = 0; 84 for $Index (0 .. $#{@$XArrayRef}) { 85 $ProductOfDeviations += (($XArrayRef->[$Index] - $XMean) * ($YArrayRef->[$Index] - $YMean)); 86 } 87 $Covariance = $ProductOfDeviations / @$XArrayRef; 88 return $Covariance; 89 } 90 91 # Compute factorial of a number... 92 sub Factorial { 93 my($Num) = @_; 94 95 return _Factorial($Num, 1); 96 } 97 98 # Perform factorial division of two numbers... 99 sub FactorialDivison { 100 my($Numerator, $Denominator) = @_; 101 102 # Only works for integer numbers... 103 if ($Numerator <= 0 || ($Numerator != int($Numerator)) || 104 $Denominator <= 0 || ($Denominator != int($Denominator)) ) { 105 return undef; 106 } 107 my($LargerNum, $SmallerNum, $Result); 108 $LargerNum = ($Numerator > $Denominator) ? $Numerator : $Denominator; 109 $SmallerNum = ($Numerator < $Denominator) ? $Numerator : $Denominator; 110 111 $Result = _Factorial($LargerNum, $SmallerNum); 112 if ($Numerator < $Denominator) { 113 $Result = 1/$Result; 114 } 115 return $Result; 116 } 117 118 # Calculate factorial of a number upto a specific limit... 119 sub _Factorial { 120 my($Num, $Limit) = @_; 121 122 # Only works for integer numbers... 123 if ($Num <= 0 || ($Num != int($Num)) || $Limit < 1) { 124 return undef; 125 } 126 127 my($Result) = 1; 128 129 while ($Num > $Limit) { 130 $Result *= $Num; 131 $Num--; 132 } 133 return $Result; 134 } 135 136 137 # Compute the frequency of occurance of values in an array of numbers. Three different 138 # invocation methods are supported: 139 # 140 # Frequency(\@ArrayRef) : Using the smallest and largest values, group the numbers into 141 # 10 bins. 142 # 143 # Frequency(\@ArrayRef, $NumOfBins) : Using the smallest and largest values, group the 144 # numbers into specified bins. 145 # 146 # Frequency(\@ArrayRef, \@BinRange): Use bin range to goup the values into different bins. 147 # 148 # A hash array is returned with keys and values representing range and frequency values respectively. 149 # The frequency value for a specific key corresponds to all the values which are greater than 150 # the previous key and less than or equal to the current key. A key value representing maximum value is 151 # added for generating frequency distribution for specific number of bins, and whenever the maximum 152 # array value is greater than the maximum specified in bin range, it is also added to bin range. 153 # 154 sub Frequency { 155 my($XArrayRef) = @_; 156 157 if (!@$XArrayRef) { 158 return undef; 159 } 160 161 my($BinRange, $NumOfBins, $BinRangeSpecified); 162 163 $BinRangeSpecified = 0; 164 $NumOfBins = 10; 165 if (@_ == 2) { 166 if (ref($_[1]) eq 'ARRAY') { 167 $BinRange = $_[1]; 168 if (!(@$BinRange && (@$BinRange > 1))) { 169 return undef; 170 } 171 # Make sure the bin range contains values in increasing order... 172 my($Index1, $Index2); 173 for $Index1 (0 .. $#{@$BinRange}) { 174 for $Index2 (($Index1 + 1) .. $#{@$BinRange}) { 175 if ($BinRange->[$Index1] >= $BinRange->[$Index2]) { 176 return undef; 177 } 178 } 179 } 180 $BinRangeSpecified = 1; 181 } 182 else { 183 $NumOfBins = $_[1]; 184 if ($NumOfBins <= 1) { 185 return undef; 186 } 187 } 188 } 189 190 # Setup range keys... 191 my(@RangeKeys); 192 @RangeKeys = (); 193 194 my($MinValue, $MaxValue) = Range($XArrayRef); 195 if ($BinRangeSpecified) { 196 push @RangeKeys, @$BinRange; 197 if ($MaxValue > $RangeKeys[$#RangeKeys]) { 198 push @RangeKeys, $MaxValue; 199 } 200 } 201 else { 202 my($MinValue, $MaxValue) = Range($XArrayRef); 203 my($Interval) = ($MaxValue - $MinValue)/$NumOfBins; 204 my($KeyValue) = $MinValue + $Interval; 205 while ($KeyValue < $MaxValue) { 206 push @RangeKeys, $KeyValue; 207 $KeyValue += $Interval; 208 } 209 push @RangeKeys, $MaxValue; 210 } 211 212 #Setup frequency hash array... 213 my(%FrequencyMap); 214 %FrequencyMap = (); 215 216 %FrequencyMap = map { $_ => 0 } @RangeKeys; 217 218 # Count values... 219 my($Key, $Value); 220 221 VALUE: for $Value (@$XArrayRef) { 222 for $Key (@RangeKeys) { 223 if ($Value <= $Key) { 224 $FrequencyMap{$Key} += 1; 225 next VALUE; 226 } 227 } 228 } 229 return (%FrequencyMap); 230 } 231 232 # Compute the geometric mean of an array of numbers: NthROOT( PRODUCT(x[i]) ) 233 sub GeometricMean { 234 my($XArrayRef) = @_; 235 236 if (!@$XArrayRef) { 237 return undef; 238 } 239 my($Mean, $Product, $Value); 240 $Product = 1; 241 foreach $Value (@$XArrayRef) { 242 if ($Value <= 0 ) { 243 return undef; 244 } 245 $Product *= $Value; 246 } 247 $Mean = $Product ** (1 / @$XArrayRef); 248 return $Mean; 249 } 250 251 # Compute the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i]) / n ) 252 sub HarmonicMean { 253 my($XArrayRef) = @_; 254 255 if (!@$XArrayRef) { 256 return undef; 257 } 258 my($Mean, $Sum, $Value); 259 $Sum = 0; 260 foreach $Value (@$XArrayRef) { 261 if ($Value <= 0 ) { 262 return undef; 263 } 264 $Sum += 1/$Value; 265 } 266 $Mean = 1/($Sum/@$XArrayRef); 267 return $Mean; 268 } 269 270 # Return the k-largest value from an array of numbers 271 sub KLargest { 272 my($XArrayRef, $K) = @_; 273 274 if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) { 275 return undef; 276 } 277 my($KLargest, @SortedXArray); 278 @SortedXArray = sort { $b <=> $a } @$XArrayRef; 279 $KLargest = $SortedXArray[$K - 1]; 280 return $KLargest; 281 } 282 283 # Return the k-smallest value from an array of numbers 284 sub KSmallest { 285 my($XArrayRef, $K) = @_; 286 287 if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) { 288 return undef; 289 } 290 my($KSmallest, @SortedXArray); 291 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 292 $KSmallest = $SortedXArray[$K - 1]; 293 return $KSmallest; 294 } 295 296 # Compute the kurtosis of an array of numbers: 297 # [ {n(n + 1)/(n - 1)(n - 2)(n - 3)} SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n - 2)(n-3)} 298 # 299 sub Kurtosis { 300 my($XArrayRef) = @_; 301 302 if (!@$XArrayRef || ((@$XArrayRef - 3) <= 0)) { 303 return undef; 304 } 305 my($Kurtosis, $Mean, $StandardDeviation, $Value); 306 $Mean = Mean($XArrayRef); 307 if (!defined $Mean) { 308 return undef; 309 } 310 $StandardDeviation = StandardDeviation($XArrayRef); 311 if (!(defined $StandardDeviation && $StandardDeviation != 0)) { 312 return undef; 313 } 314 315 my($SumOfScores, $SampleSize); 316 $SumOfScores = 0; 317 for $Value (@$XArrayRef) { 318 $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 4; 319 } 320 $SampleSize = @$XArrayRef; 321 $Kurtosis = ((($SampleSize * ($SampleSize + 1))/(($SampleSize - 1) * ($SampleSize - 2) * ($SampleSize - 3))) * $SumOfScores) - ((3 * (($SampleSize - 1) ** 2))/(($SampleSize - 2) * ($SampleSize - 3))); 322 return $Kurtosis; 323 } 324 325 # Return the smallest value from an array of numbers 326 sub Minimum { 327 my($XArrayRef) = @_; 328 return KSmallest($XArrayRef, 1); 329 } 330 331 # Return the largest value from an array of numbers 332 sub Maximum { 333 my($XArrayRef) = @_; 334 return KLargest($XArrayRef, 1); 335 } 336 337 # Compute the mean of an array of numbers: SUM( x[i] ) / n 338 sub Mean { 339 my($XArrayRef) = @_; 340 341 if (!@$XArrayRef) { 342 return undef; 343 } 344 my($Mean, $Sum, $Value); 345 $Sum = 0; 346 foreach $Value (@$XArrayRef) { 347 $Sum += $Value; 348 } 349 $Mean = $Sum / @$XArrayRef; 350 return $Mean; 351 } 352 353 # Compute the median value of an array of numbers. For an even number array, it's 354 # the average of two middle values. 355 # 356 # For even values of n: Xsorted[(n - 1)/2 + 1] 357 # For odd values of n: (Xsorted[n/2] + Xsorted[n/2 + 1])/2 358 # 359 sub Median { 360 my($XArrayRef) = @_; 361 362 if (!@$XArrayRef) { 363 return undef; 364 } 365 my($Median, @SortedXArray); 366 $Median = 0; 367 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 368 if (@$XArrayRef % 2) { 369 my($MidIndex); 370 $MidIndex = int(@SortedXArray - 1)/2; 371 $Median = $SortedXArray[$MidIndex]; 372 } 373 else { 374 # Even number array... 375 my($MidPosition); 376 $MidPosition = int(@SortedXArray / 2); 377 $Median = ($SortedXArray[$MidPosition - 1] + $SortedXArray[$MidPosition]) / 2; 378 } 379 return $Median; 380 } 381 382 # Return the most frequently occuring value in an array of numbers 383 sub Mode { 384 my($XArrayRef) = @_; 385 386 if (!@$XArrayRef) { 387 return undef; 388 } 389 my($Value, %ValueToCountMap, @CountList, @SortedCountList); 390 %ValueToCountMap = (); 391 @CountList = (); 392 @SortedCountList = (); 393 for $Value (@$XArrayRef) { 394 if (exists $ValueToCountMap{$Value}) { 395 $ValueToCountMap{$Value} += 1; 396 } 397 else { 398 $ValueToCountMap{$Value} = 1; 399 } 400 } 401 for $Value (keys %ValueToCountMap) { 402 push @CountList, $ValueToCountMap{$Value}; 403 } 404 @SortedCountList = sort { $b <=> $a } @CountList; 405 406 # Make sure the frequency of mode value is greater than one and check for 407 # multiple modes as well... 408 # 409 my($ModeCount, $ModeValue); 410 $ModeCount = $SortedCountList[0]; 411 if ($ModeCount <= 1) { 412 return undef; 413 } 414 # Get the first mode value... 415 VALUE: for $Value (keys %ValueToCountMap) { 416 if ($ValueToCountMap{$Value} == $ModeCount) { 417 $ModeValue = $Value; 418 # Set it to zero to skip it next time... 419 $ValueToCountMap{$Value} = 0; 420 last VALUE; 421 } 422 } 423 424 if (wantarray) { 425 # Retrieve all the modes... 426 my(@Modes, $Count); 427 @Modes = (); 428 push @Modes, $ModeValue; 429 for $Count (@SortedCountList) { 430 if ($Count == $ModeCount) { 431 VALUE: for $Value (keys %ValueToCountMap) { 432 if ($ValueToCountMap{$Value} == $ModeCount) { 433 push @Modes, $Value; 434 # Set it to zero to skip it next time... 435 $ValueToCountMap{$Value} = 0; 436 last VALUE; 437 } 438 } 439 } 440 } 441 return sort {$b <=> $a} @Modes; 442 } 443 else { 444 return $ModeValue; 445 } 446 } 447 448 449 # Compute the Pearson correlation coefficient between two arrays of numbers: 450 # 451 # SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )(SUM( (y[i] - Ymean)^2 )) ) 452 # 453 # It returns values in the range from -1.0 to 1.0 454 sub PearsonCorrelation { 455 my($XArrayRef, $YArrayRef) = @_; 456 457 if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) { 458 return undef; 459 } 460 my($Correlation, $XMean, $YMean, $Index, $XValueDeviation, $YValueDeviation, $SquareOfXDeviations, $SquareOfYDeviations, $ProductOfDeviations); 461 462 $Correlation = 0; 463 $XMean = Mean($XArrayRef); 464 $YMean = Mean($YArrayRef); 465 $ProductOfDeviations = 0; $SquareOfXDeviations = 0; $SquareOfYDeviations = 0; 466 for $Index (0 .. $#{@$XArrayRef}) { 467 $XValueDeviation = $XArrayRef->[$Index] - $XMean; 468 $YValueDeviation = $YArrayRef->[$Index] - $YMean; 469 $ProductOfDeviations += ($XValueDeviation * $YValueDeviation); 470 $SquareOfXDeviations += $XValueDeviation ** 2; 471 $SquareOfYDeviations += $YValueDeviation ** 2; 472 } 473 $Correlation = $ProductOfDeviations / sqrt($SquareOfXDeviations * $SquareOfYDeviations); 474 return $Correlation; 475 } 476 477 # Return the smallest and largest values from an array of numbers 478 sub Range { 479 my($XArrayRef) = @_; 480 481 if (!@$XArrayRef) { 482 return (undef, undef); 483 } 484 my($Smallest, $Largest, @SortedXArray); 485 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 486 $Smallest = $SortedXArray[0]; 487 $Largest = $SortedXArray[$#SortedXArray]; 488 return ($Smallest, $Largest); 489 } 490 491 # Compute square of the Pearson correlation coefficient between two arrays of numbers. 492 # 493 sub RSquare { 494 my($XArrayRef, $YArrayRef) = @_; 495 my($RSquare, $Correlation); 496 497 $RSquare = undef; 498 $Correlation = PearsonCorrelation($XArrayRef, $YArrayRef); 499 if (defined $Correlation) { 500 $RSquare = $Correlation ** 2; 501 } 502 return $RSquare; 503 } 504 505 # Compute the skewness of an array of numbers: 506 # {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 } 507 # 508 sub Skewness { 509 my($XArrayRef) = @_; 510 511 if (!@$XArrayRef || ((@$XArrayRef - 2) <= 0)) { 512 return undef; 513 } 514 my($Skewness, $Mean, $StandardDeviation, $Value); 515 $Mean = Mean($XArrayRef); 516 if (!defined $Mean) { 517 return undef; 518 } 519 $StandardDeviation = StandardDeviation($XArrayRef); 520 if (!(defined $StandardDeviation && $StandardDeviation != 0)) { 521 return undef; 522 } 523 524 my($SumOfScores, $SampleSize); 525 $SumOfScores = 0; 526 for $Value (@$XArrayRef) { 527 $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 3; 528 } 529 $SampleSize = @$XArrayRef; 530 $Skewness = ($SampleSize/(($SampleSize - 1) * ($SampleSize - 2) )) * $SumOfScores; 531 return $Skewness; 532 } 533 534 # Compute the standard deviation of an array of numbers 535 sub StandardDeviation { 536 my($XArrayRef) = @_; 537 return CalculateStandardDeviation($XArrayRef, 2); 538 } 539 540 # Compute the standard deviation of an array of numbers representing entire population 541 sub StandardDeviationN { 542 my($XArrayRef) = @_; 543 return CalculateStandardDeviation($XArrayRef, 1); 544 } 545 546 # Compute the standard deviation of an array of numbers. 547 # Mode 1: SQRT ( SUM( (x[i] - mean)^2 ) / n ) 548 # Mode 2: SQRT ( SUM( (x[i] - mean)^2 ) / (n - 1) ) 549 # 550 sub CalculateStandardDeviation { 551 my($XArrayRef, $Mode) = @_; 552 553 if (!@$XArrayRef) { 554 return undef; 555 } 556 my($StandardDeviation, $Value, $SquareOfDeviations, $Mean, $N); 557 558 $StandardDeviation = 0; 559 $Mean = Mean($XArrayRef); 560 $SquareOfDeviations = 0; 561 foreach $Value (@$XArrayRef) { 562 $SquareOfDeviations += ($Value - $Mean) ** 2; 563 } 564 $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1); 565 $StandardDeviation = sqrt($SquareOfDeviations / $N); 566 567 return $StandardDeviation; 568 } 569 570 # Compute the standard error using standard deviation and sample size 571 sub StandardError { 572 my($StandardDeviation, $Count) = @_; 573 574 if ($Count <= 0) { 575 return undef; 576 } 577 my($StandardError); 578 $StandardError = $StandardDeviation / sqrt($Count); 579 580 return $StandardError; 581 } 582 583 # Standardize the value using mean and standard deviation 584 sub Standardize { 585 my($Value, $Mean, $StandardDeviation) = @_; 586 587 if ($StandardDeviation <= 0) { 588 return undef; 589 } 590 my($StandardizedValue); 591 $StandardizedValue = ($Value - $Mean)/$StandardDeviation; 592 593 return $StandardizedValue; 594 } 595 596 # Compute the standard deviation above the mean for an array of numbers. 597 sub StandardScores { 598 my($XArrayRef) = @_; 599 return CalculateStandardScores($XArrayRef, 2); 600 } 601 602 # Compute the standard deviation above the mean for an array of numbers representing entire population 603 sub StandardScoresN { 604 my($XArrayRef) = @_; 605 return CalculateStandardScores($XArrayRef, 1); 606 } 607 608 # Compute the standard deviation above the mean for an array of numbers. 609 # Mode 1: (x[i] - mean) / n 610 # Mode 2: (x[i] - mean) / (n - 1) 611 # 612 sub CalculateStandardScores { 613 my($XArrayRef, $Mode) = @_; 614 615 if (!@$XArrayRef) { 616 return undef; 617 } 618 my(@StandardScores, $Mean, $StandardDeviation, $Value); 619 620 $Mean = Mean($XArrayRef); 621 $StandardDeviation = CalculateStandardDeviation($XArrayRef, $Mode); 622 if (!(defined($StandardDeviation) && $StandardDeviation > 0)) { 623 return undef; 624 } 625 @StandardScores = (); 626 for $Value (@$XArrayRef) { 627 push @StandardScores, ($Value - $Mean)/$StandardDeviation; 628 } 629 630 return @StandardScores; 631 } 632 633 # Compute the product of an array of numbers 634 sub Product { 635 my($XArrayRef) = @_; 636 637 if (!@$XArrayRef) { 638 return undef; 639 } 640 my($Product, $Value); 641 $Product = 1; 642 foreach $Value (@$XArrayRef) { 643 $Product *= $Value; 644 } 645 return $Product; 646 } 647 648 # Compute the sum of an array of numbers 649 sub Sum { 650 my($XArrayRef) = @_; 651 652 if (!@$XArrayRef) { 653 return undef; 654 } 655 my($Sum, $Value); 656 $Sum = 0; 657 foreach $Value (@$XArrayRef) { 658 $Sum += $Value; 659 } 660 return $Sum; 661 } 662 663 # Compute the sum of squares of an array of numbers 664 sub SumOfSquares { 665 my($XArrayRef) = @_; 666 667 if (!@$XArrayRef) { 668 return undef; 669 } 670 my($SumOfSquares, $Value); 671 $SumOfSquares = 0; 672 foreach $Value (@$XArrayRef) { 673 $SumOfSquares += $Value ** 2; 674 } 675 return $SumOfSquares; 676 } 677 678 # Compute the mean of an array of numbers by excluding a fraction of 679 # numbers from the top and bottom of the data set. 680 sub TrimMean { 681 my($XArrayRef, $FractionToExclude) = @_; 682 683 if (!(@$XArrayRef && $FractionToExclude > 0 && $FractionToExclude <= 1)) { 684 return undef; 685 } 686 my($NumberToExclude); 687 $NumberToExclude = int(@$XArrayRef * $FractionToExclude); 688 $NumberToExclude = ($NumberToExclude % 2) ? ($NumberToExclude - 1) : $NumberToExclude; 689 if ($NumberToExclude == @$XArrayRef) { 690 return undef; 691 } 692 my($Mean, $Sum, $Index, $FirstIndex, $LastIndex); 693 $FirstIndex = $NumberToExclude/2; 694 $LastIndex = @$XArrayRef - ($NumberToExclude/2) - 1; 695 $Sum = 0; 696 my(@SortedXArray); 697 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 698 for $Index ($FirstIndex .. $LastIndex) { 699 $Sum += $SortedXArray[$Index]; 700 } 701 $Mean = $Sum/(@SortedXArray - $NumberToExclude); 702 return $Mean; 703 } 704 705 # Compute the variance of an array of numbers 706 sub Variance { 707 my($XArrayRef) = @_; 708 return CalculateVariance($XArrayRef, 2); 709 } 710 711 # Compute the variance of an array of numbers representing entire population 712 sub VarianceN { 713 my($XArrayRef) = @_; 714 return CalculateVariance($XArrayRef, 1); 715 } 716 717 # Compute the variance of an array of numbers: 718 # Mode 1: SUM( (x[i] - Xmean)^2 / n ) 719 # Mode 2: SUM( (x[i] - Xmean)^2 / (n - 1) ) 720 # 721 sub CalculateVariance { 722 my($XArrayRef, $Mode) = @_; 723 724 if (!@$XArrayRef) { 725 return undef; 726 } 727 my($Variance, $Value, $SquareOfDeviations, $Mean, $N); 728 729 $Variance = 0; 730 $Mean = Mean($XArrayRef); 731 $SquareOfDeviations = 0; 732 foreach $Value (@$XArrayRef) { 733 $SquareOfDeviations += ($Value - $Mean) ** 2; 734 } 735 $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1); 736 $Variance = $SquareOfDeviations / $N; 737 738 return $Variance; 739 } 740