1 package StatisticsUtil; 2 # 3 # $RCSfile: StatisticsUtil.pm,v $ 4 # $Date: 2011/12/16 00:04:12 $ 5 # $Revision: 1.34 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2004-2012 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Exporter; 31 32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 33 34 @ISA = qw(Exporter); 35 @EXPORT = qw(Average AverageDeviation Covariance Correlation Euclidean Factorial FactorialDivison GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode PearsonCorrelation Product Range RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN StandardError Standardize StandardScores StandardScoresN TrimMean Variance VarianceN); 36 @EXPORT_OK = qw(); 37 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 38 39 # Compute the mean of an array of numbers 40 sub Average { 41 my($XArrayRef) = @_; 42 return Mean($XArrayRef); 43 } 44 45 # Compute the average of the absolute deviation of an array of numbers: SUM( ABS(x[i] - Xmean) ) / n 46 sub AverageDeviation { 47 my($XArrayRef) = @_; 48 49 if (!@$XArrayRef) { 50 return undef; 51 } 52 my($AverageDeviation, $Mean, $Value, $SumOfDeviations); 53 54 $AverageDeviation = 0; 55 $Mean = Mean($XArrayRef); 56 foreach $Value (@$XArrayRef) { 57 $SumOfDeviations += abs($Value - $Mean); 58 } 59 $AverageDeviation = $SumOfDeviations / @$XArrayRef; 60 61 return $AverageDeviation; 62 } 63 64 # Compute correlation coefficient between two arrays of numbers 65 sub Correlation { 66 my($XArrayRef, $YArrayRef) = @_; 67 return PearsonCorrelation($XArrayRef, $YArrayRef); 68 } 69 70 # Compute the covariance between two arrays of numbers: SUM( (x[i] - Xmean) (y[i] - Ymean) ) / n 71 sub Covariance { 72 my($XArrayRef, $YArrayRef) = @_; 73 74 if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) { 75 return undef; 76 } 77 my($Covariance, $XMean, $YMean, $Index, $ProductOfDeviations); 78 79 $Covariance = 0; 80 $XMean = Mean($XArrayRef); 81 $YMean = Mean($YArrayRef); 82 $ProductOfDeviations = 0; 83 for $Index (0 .. $#{@$XArrayRef}) { 84 $ProductOfDeviations += (($XArrayRef->[$Index] - $XMean) * ($YArrayRef->[$Index] - $YMean)); 85 } 86 $Covariance = $ProductOfDeviations / @$XArrayRef; 87 return $Covariance; 88 } 89 90 # Compute the euclidean distance of an array of numbers: SQRT( SUM( x[i] ** 2) ) 91 sub Euclidean { 92 my($XArrayRef) = @_; 93 94 if (!@$XArrayRef) { 95 return undef; 96 } 97 my($SumOfSquares); 98 99 $SumOfSquares = SumOfSquares($XArrayRef); 100 101 return sqrt $SumOfSquares; 102 } 103 104 # Compute factorial of a number... 105 sub Factorial { 106 my($Num) = @_; 107 108 return _Factorial($Num, 1); 109 } 110 111 # Perform factorial division of two numbers... 112 sub FactorialDivison { 113 my($Numerator, $Denominator) = @_; 114 115 # Only works for integer numbers... 116 if ($Numerator <= 0 || ($Numerator != int($Numerator)) || 117 $Denominator <= 0 || ($Denominator != int($Denominator)) ) { 118 return undef; 119 } 120 my($LargerNum, $SmallerNum, $Result); 121 $LargerNum = ($Numerator > $Denominator) ? $Numerator : $Denominator; 122 $SmallerNum = ($Numerator < $Denominator) ? $Numerator : $Denominator; 123 124 $Result = _Factorial($LargerNum, $SmallerNum); 125 if ($Numerator < $Denominator) { 126 $Result = 1/$Result; 127 } 128 return $Result; 129 } 130 131 # Calculate factorial of a number upto a specific limit... 132 sub _Factorial { 133 my($Num, $Limit) = @_; 134 135 # Only works for integer numbers... 136 if ($Num <= 0 || ($Num != int($Num)) || $Limit < 1) { 137 return undef; 138 } 139 140 my($Result) = 1; 141 142 while ($Num > $Limit) { 143 $Result *= $Num; 144 $Num--; 145 } 146 return $Result; 147 } 148 149 150 # Compute the frequency of occurance of values in an array of numbers. Three different 151 # invocation methods are supported: 152 # 153 # Frequency(\@ArrayRef) : Using the smallest and largest values, group the numbers into 154 # 10 bins. 155 # 156 # Frequency(\@ArrayRef, $NumOfBins) : Using the smallest and largest values, group the 157 # numbers into specified bins. 158 # 159 # Frequency(\@ArrayRef, \@BinRange): Use bin range to goup the values into different bins. 160 # 161 # A hash array is returned with keys and values representing range and frequency values respectively. 162 # The frequency value for a specific key corresponds to all the values which are greater than 163 # the previous key and less than or equal to the current key. A key value representing maximum value is 164 # added for generating frequency distribution for specific number of bins, and whenever the maximum 165 # array value is greater than the maximum specified in bin range, it is also added to bin range. 166 # 167 sub Frequency { 168 my($XArrayRef) = @_; 169 170 if (!@$XArrayRef) { 171 return undef; 172 } 173 174 my($BinRange, $NumOfBins, $BinRangeSpecified); 175 176 $BinRangeSpecified = 0; 177 $NumOfBins = 10; 178 if (@_ == 2) { 179 if (ref($_[1]) eq 'ARRAY') { 180 $BinRange = $_[1]; 181 if (!(@$BinRange && (@$BinRange > 1))) { 182 return undef; 183 } 184 # Make sure the bin range contains values in increasing order... 185 my($Index1, $Index2); 186 for $Index1 (0 .. $#{@$BinRange}) { 187 for $Index2 (($Index1 + 1) .. $#{@$BinRange}) { 188 if ($BinRange->[$Index1] >= $BinRange->[$Index2]) { 189 return undef; 190 } 191 } 192 } 193 $BinRangeSpecified = 1; 194 } 195 else { 196 $NumOfBins = $_[1]; 197 if ($NumOfBins <= 1) { 198 return undef; 199 } 200 } 201 } 202 203 # Setup range keys... 204 my(@RangeKeys); 205 @RangeKeys = (); 206 207 my($MinValue, $MaxValue) = Range($XArrayRef); 208 if ($BinRangeSpecified) { 209 push @RangeKeys, @$BinRange; 210 if ($MaxValue > $RangeKeys[$#RangeKeys]) { 211 push @RangeKeys, $MaxValue; 212 } 213 } 214 else { 215 my($MinValue, $MaxValue) = Range($XArrayRef); 216 my($Interval) = ($MaxValue - $MinValue)/$NumOfBins; 217 my($KeyValue) = $MinValue + $Interval; 218 while ($KeyValue < $MaxValue) { 219 push @RangeKeys, $KeyValue; 220 $KeyValue += $Interval; 221 } 222 push @RangeKeys, $MaxValue; 223 } 224 225 #Setup frequency hash array... 226 my(%FrequencyMap); 227 %FrequencyMap = (); 228 229 %FrequencyMap = map { $_ => 0 } @RangeKeys; 230 231 # Count values... 232 my($Key, $Value); 233 234 VALUE: for $Value (@$XArrayRef) { 235 for $Key (@RangeKeys) { 236 if ($Value <= $Key) { 237 $FrequencyMap{$Key} += 1; 238 next VALUE; 239 } 240 } 241 } 242 return (%FrequencyMap); 243 } 244 245 # Compute the geometric mean of an array of numbers: NthROOT( PRODUCT(x[i]) ) 246 sub GeometricMean { 247 my($XArrayRef) = @_; 248 249 if (!@$XArrayRef) { 250 return undef; 251 } 252 my($Mean, $Product, $Value); 253 $Product = 1; 254 foreach $Value (@$XArrayRef) { 255 if ($Value <= 0 ) { 256 return undef; 257 } 258 $Product *= $Value; 259 } 260 $Mean = $Product ** (1 / @$XArrayRef); 261 return $Mean; 262 } 263 264 # Compute the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i]) / n ) 265 sub HarmonicMean { 266 my($XArrayRef) = @_; 267 268 if (!@$XArrayRef) { 269 return undef; 270 } 271 my($Mean, $Sum, $Value); 272 $Sum = 0; 273 foreach $Value (@$XArrayRef) { 274 if ($Value <= 0 ) { 275 return undef; 276 } 277 $Sum += 1/$Value; 278 } 279 $Mean = 1/($Sum/@$XArrayRef); 280 return $Mean; 281 } 282 283 # Return the k-largest value from an array of numbers 284 sub KLargest { 285 my($XArrayRef, $K) = @_; 286 287 if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) { 288 return undef; 289 } 290 my($KLargest, @SortedXArray); 291 @SortedXArray = sort { $b <=> $a } @$XArrayRef; 292 $KLargest = $SortedXArray[$K - 1]; 293 return $KLargest; 294 } 295 296 # Return the k-smallest value from an array of numbers 297 sub KSmallest { 298 my($XArrayRef, $K) = @_; 299 300 if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) { 301 return undef; 302 } 303 my($KSmallest, @SortedXArray); 304 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 305 $KSmallest = $SortedXArray[$K - 1]; 306 return $KSmallest; 307 } 308 309 # Compute the kurtosis of an array of numbers: 310 # [ {n(n + 1)/(n - 1)(n - 2)(n - 3)} SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n - 2)(n-3)} 311 # 312 sub Kurtosis { 313 my($XArrayRef) = @_; 314 315 if (!@$XArrayRef || ((@$XArrayRef - 3) <= 0)) { 316 return undef; 317 } 318 my($Kurtosis, $Mean, $StandardDeviation, $Value); 319 $Mean = Mean($XArrayRef); 320 if (!defined $Mean) { 321 return undef; 322 } 323 $StandardDeviation = StandardDeviation($XArrayRef); 324 if (!(defined $StandardDeviation && $StandardDeviation != 0)) { 325 return undef; 326 } 327 328 my($SumOfScores, $SampleSize); 329 $SumOfScores = 0; 330 for $Value (@$XArrayRef) { 331 $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 4; 332 } 333 $SampleSize = @$XArrayRef; 334 $Kurtosis = ((($SampleSize * ($SampleSize + 1))/(($SampleSize - 1) * ($SampleSize - 2) * ($SampleSize - 3))) * $SumOfScores) - ((3 * (($SampleSize - 1) ** 2))/(($SampleSize - 2) * ($SampleSize - 3))); 335 return $Kurtosis; 336 } 337 338 # Return the smallest value from an array of numbers 339 sub Minimum { 340 my($XArrayRef) = @_; 341 return KSmallest($XArrayRef, 1); 342 } 343 344 # Return the largest value from an array of numbers 345 sub Maximum { 346 my($XArrayRef) = @_; 347 return KLargest($XArrayRef, 1); 348 } 349 350 # Compute the mean of an array of numbers: SUM( x[i] ) / n 351 sub Mean { 352 my($XArrayRef) = @_; 353 354 if (!@$XArrayRef) { 355 return undef; 356 } 357 my($Mean, $Sum, $Value); 358 $Sum = 0; 359 foreach $Value (@$XArrayRef) { 360 $Sum += $Value; 361 } 362 $Mean = $Sum / @$XArrayRef; 363 return $Mean; 364 } 365 366 # Compute the median value of an array of numbers. For an even number array, it's 367 # the average of two middle values. 368 # 369 # For even values of n: Xsorted[(n - 1)/2 + 1] 370 # For odd values of n: (Xsorted[n/2] + Xsorted[n/2 + 1])/2 371 # 372 sub Median { 373 my($XArrayRef) = @_; 374 375 if (!@$XArrayRef) { 376 return undef; 377 } 378 my($Median, @SortedXArray); 379 $Median = 0; 380 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 381 if (@$XArrayRef % 2) { 382 my($MidIndex); 383 $MidIndex = int(@SortedXArray - 1)/2; 384 $Median = $SortedXArray[$MidIndex]; 385 } 386 else { 387 # Even number array... 388 my($MidPosition); 389 $MidPosition = int(@SortedXArray / 2); 390 $Median = ($SortedXArray[$MidPosition - 1] + $SortedXArray[$MidPosition]) / 2; 391 } 392 return $Median; 393 } 394 395 # Return the most frequently occuring value in an array of numbers 396 sub Mode { 397 my($XArrayRef) = @_; 398 399 if (!@$XArrayRef) { 400 return undef; 401 } 402 my($Value, %ValueToCountMap, @CountList, @SortedCountList); 403 %ValueToCountMap = (); 404 @CountList = (); 405 @SortedCountList = (); 406 for $Value (@$XArrayRef) { 407 if (exists $ValueToCountMap{$Value}) { 408 $ValueToCountMap{$Value} += 1; 409 } 410 else { 411 $ValueToCountMap{$Value} = 1; 412 } 413 } 414 for $Value (keys %ValueToCountMap) { 415 push @CountList, $ValueToCountMap{$Value}; 416 } 417 @SortedCountList = sort { $b <=> $a } @CountList; 418 419 # Make sure the frequency of mode value is greater than one and check for 420 # multiple modes as well... 421 # 422 my($ModeCount, $ModeValue); 423 $ModeCount = $SortedCountList[0]; 424 if ($ModeCount <= 1) { 425 return undef; 426 } 427 # Get the first mode value... 428 VALUE: for $Value (keys %ValueToCountMap) { 429 if ($ValueToCountMap{$Value} == $ModeCount) { 430 $ModeValue = $Value; 431 # Set it to zero to skip it next time... 432 $ValueToCountMap{$Value} = 0; 433 last VALUE; 434 } 435 } 436 437 if (wantarray) { 438 # Retrieve all the modes... 439 my(@Modes, $Count); 440 @Modes = (); 441 push @Modes, $ModeValue; 442 for $Count (@SortedCountList) { 443 if ($Count == $ModeCount) { 444 VALUE: for $Value (keys %ValueToCountMap) { 445 if ($ValueToCountMap{$Value} == $ModeCount) { 446 push @Modes, $Value; 447 # Set it to zero to skip it next time... 448 $ValueToCountMap{$Value} = 0; 449 last VALUE; 450 } 451 } 452 } 453 } 454 return sort {$b <=> $a} @Modes; 455 } 456 else { 457 return $ModeValue; 458 } 459 } 460 461 462 # Compute the Pearson correlation coefficient between two arrays of numbers: 463 # 464 # SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )(SUM( (y[i] - Ymean)^2 )) ) 465 # 466 # It returns values in the range from -1.0 to 1.0 467 sub PearsonCorrelation { 468 my($XArrayRef, $YArrayRef) = @_; 469 470 if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) { 471 return undef; 472 } 473 my($Correlation, $XMean, $YMean, $Index, $XValueDeviation, $YValueDeviation, $SquareOfXDeviations, $SquareOfYDeviations, $ProductOfDeviations); 474 475 $Correlation = 0; 476 $XMean = Mean($XArrayRef); 477 $YMean = Mean($YArrayRef); 478 $ProductOfDeviations = 0; $SquareOfXDeviations = 0; $SquareOfYDeviations = 0; 479 for $Index (0 .. $#{@$XArrayRef}) { 480 $XValueDeviation = $XArrayRef->[$Index] - $XMean; 481 $YValueDeviation = $YArrayRef->[$Index] - $YMean; 482 $ProductOfDeviations += ($XValueDeviation * $YValueDeviation); 483 $SquareOfXDeviations += $XValueDeviation ** 2; 484 $SquareOfYDeviations += $YValueDeviation ** 2; 485 } 486 $Correlation = $ProductOfDeviations / sqrt($SquareOfXDeviations * $SquareOfYDeviations); 487 return $Correlation; 488 } 489 490 # Return the smallest and largest values from an array of numbers 491 sub Range { 492 my($XArrayRef) = @_; 493 494 if (!@$XArrayRef) { 495 return (undef, undef); 496 } 497 my($Smallest, $Largest, @SortedXArray); 498 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 499 $Smallest = $SortedXArray[0]; 500 $Largest = $SortedXArray[$#SortedXArray]; 501 return ($Smallest, $Largest); 502 } 503 504 # Compute square of the Pearson correlation coefficient between two arrays of numbers. 505 # 506 sub RSquare { 507 my($XArrayRef, $YArrayRef) = @_; 508 my($RSquare, $Correlation); 509 510 $RSquare = undef; 511 $Correlation = PearsonCorrelation($XArrayRef, $YArrayRef); 512 if (defined $Correlation) { 513 $RSquare = $Correlation ** 2; 514 } 515 return $RSquare; 516 } 517 518 # Compute the skewness of an array of numbers: 519 # {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 } 520 # 521 sub Skewness { 522 my($XArrayRef) = @_; 523 524 if (!@$XArrayRef || ((@$XArrayRef - 2) <= 0)) { 525 return undef; 526 } 527 my($Skewness, $Mean, $StandardDeviation, $Value); 528 $Mean = Mean($XArrayRef); 529 if (!defined $Mean) { 530 return undef; 531 } 532 $StandardDeviation = StandardDeviation($XArrayRef); 533 if (!(defined $StandardDeviation && $StandardDeviation != 0)) { 534 return undef; 535 } 536 537 my($SumOfScores, $SampleSize); 538 $SumOfScores = 0; 539 for $Value (@$XArrayRef) { 540 $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 3; 541 } 542 $SampleSize = @$XArrayRef; 543 $Skewness = ($SampleSize/(($SampleSize - 1) * ($SampleSize - 2) )) * $SumOfScores; 544 return $Skewness; 545 } 546 547 # Compute the standard deviation of an array of numbers 548 sub StandardDeviation { 549 my($XArrayRef) = @_; 550 return _CalculateStandardDeviation($XArrayRef, 2); 551 } 552 553 # Compute the standard deviation of an array of numbers representing entire population 554 sub StandardDeviationN { 555 my($XArrayRef) = @_; 556 return _CalculateStandardDeviation($XArrayRef, 1); 557 } 558 559 # Compute the standard deviation of an array of numbers. 560 # Mode 1: SQRT ( SUM( (x[i] - mean)^2 ) / n ) 561 # Mode 2: SQRT ( SUM( (x[i] - mean)^2 ) / (n - 1) ) 562 # 563 sub _CalculateStandardDeviation { 564 my($XArrayRef, $Mode) = @_; 565 566 if (!@$XArrayRef) { 567 return undef; 568 } 569 my($StandardDeviation, $Value, $SquareOfDeviations, $Mean, $N); 570 571 $StandardDeviation = 0; 572 $Mean = Mean($XArrayRef); 573 $SquareOfDeviations = 0; 574 foreach $Value (@$XArrayRef) { 575 $SquareOfDeviations += ($Value - $Mean) ** 2; 576 } 577 $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1); 578 $StandardDeviation = sqrt($SquareOfDeviations / $N); 579 580 return $StandardDeviation; 581 } 582 583 # Compute the standard error using standard deviation and sample size 584 sub StandardError { 585 my($StandardDeviation, $Count) = @_; 586 587 if ($Count <= 0) { 588 return undef; 589 } 590 my($StandardError); 591 $StandardError = $StandardDeviation / sqrt($Count); 592 593 return $StandardError; 594 } 595 596 # Standardize the value using mean and standard deviation 597 sub Standardize { 598 my($Value, $Mean, $StandardDeviation) = @_; 599 600 if ($StandardDeviation <= 0) { 601 return undef; 602 } 603 my($StandardizedValue); 604 $StandardizedValue = ($Value - $Mean)/$StandardDeviation; 605 606 return $StandardizedValue; 607 } 608 609 # Compute the standard deviation above the mean for an array of numbers. 610 sub StandardScores { 611 my($XArrayRef) = @_; 612 return _CalculateStandardScores($XArrayRef, 2); 613 } 614 615 # Compute the standard deviation above the mean for an array of numbers representing entire population 616 sub StandardScoresN { 617 my($XArrayRef) = @_; 618 return _CalculateStandardScores($XArrayRef, 1); 619 } 620 621 # Compute the standard deviation above the mean for an array of numbers. 622 # Mode 1: (x[i] - mean) / n 623 # Mode 2: (x[i] - mean) / (n - 1) 624 # 625 sub _CalculateStandardScores { 626 my($XArrayRef, $Mode) = @_; 627 628 if (!@$XArrayRef) { 629 return undef; 630 } 631 my(@StandardScores, $Mean, $StandardDeviation, $Value); 632 633 $Mean = Mean($XArrayRef); 634 $StandardDeviation = _CalculateStandardDeviation($XArrayRef, $Mode); 635 if (!(defined($StandardDeviation) && $StandardDeviation > 0)) { 636 return undef; 637 } 638 @StandardScores = (); 639 for $Value (@$XArrayRef) { 640 push @StandardScores, ($Value - $Mean)/$StandardDeviation; 641 } 642 643 return @StandardScores; 644 } 645 646 # Compute the product of an array of numbers 647 sub Product { 648 my($XArrayRef) = @_; 649 650 if (!@$XArrayRef) { 651 return undef; 652 } 653 my($Product, $Value); 654 $Product = 1; 655 foreach $Value (@$XArrayRef) { 656 $Product *= $Value; 657 } 658 return $Product; 659 } 660 661 # Compute the sum of an array of numbers 662 sub Sum { 663 my($XArrayRef) = @_; 664 665 if (!@$XArrayRef) { 666 return undef; 667 } 668 my($Sum, $Value); 669 $Sum = 0; 670 foreach $Value (@$XArrayRef) { 671 $Sum += $Value; 672 } 673 return $Sum; 674 } 675 676 # Compute the sum of squares of an array of numbers 677 sub SumOfSquares { 678 my($XArrayRef) = @_; 679 680 if (!@$XArrayRef) { 681 return undef; 682 } 683 my($SumOfSquares, $Value); 684 $SumOfSquares = 0; 685 foreach $Value (@$XArrayRef) { 686 $SumOfSquares += $Value ** 2; 687 } 688 return $SumOfSquares; 689 } 690 691 # Compute the mean of an array of numbers by excluding a fraction of 692 # numbers from the top and bottom of the data set. 693 sub TrimMean { 694 my($XArrayRef, $FractionToExclude) = @_; 695 696 if (!(@$XArrayRef && $FractionToExclude > 0 && $FractionToExclude <= 1)) { 697 return undef; 698 } 699 my($NumberToExclude); 700 $NumberToExclude = int(@$XArrayRef * $FractionToExclude); 701 $NumberToExclude = ($NumberToExclude % 2) ? ($NumberToExclude - 1) : $NumberToExclude; 702 if ($NumberToExclude == @$XArrayRef) { 703 return undef; 704 } 705 my($Mean, $Sum, $Index, $FirstIndex, $LastIndex); 706 $FirstIndex = $NumberToExclude/2; 707 $LastIndex = @$XArrayRef - ($NumberToExclude/2) - 1; 708 $Sum = 0; 709 my(@SortedXArray); 710 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 711 for $Index ($FirstIndex .. $LastIndex) { 712 $Sum += $SortedXArray[$Index]; 713 } 714 $Mean = $Sum/(@SortedXArray - $NumberToExclude); 715 return $Mean; 716 } 717 718 # Compute the variance of an array of numbers 719 sub Variance { 720 my($XArrayRef) = @_; 721 return _CalculateVariance($XArrayRef, 2); 722 } 723 724 # Compute the variance of an array of numbers representing entire population 725 sub VarianceN { 726 my($XArrayRef) = @_; 727 return _CalculateVariance($XArrayRef, 1); 728 } 729 730 # Compute the variance of an array of numbers: 731 # Mode 1: SUM( (x[i] - Xmean)^2 / n ) 732 # Mode 2: SUM( (x[i] - Xmean)^2 / (n - 1) ) 733 # 734 sub _CalculateVariance { 735 my($XArrayRef, $Mode) = @_; 736 737 if (!@$XArrayRef) { 738 return undef; 739 } 740 my($Variance, $Value, $SquareOfDeviations, $Mean, $N); 741 742 $Variance = 0; 743 $Mean = Mean($XArrayRef); 744 $SquareOfDeviations = 0; 745 foreach $Value (@$XArrayRef) { 746 $SquareOfDeviations += ($Value - $Mean) ** 2; 747 } 748 $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1); 749 $Variance = $SquareOfDeviations / $N; 750 751 return $Variance; 752 } 753