1 /* 2 * Copyright 2003-2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package org.apache.commons.math.stat; 17 18 import org.apache.commons.math.stat.descriptive.UnivariateStatistic; 19 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 20 import org.apache.commons.math.stat.descriptive.moment.Mean; 21 import org.apache.commons.math.stat.descriptive.moment.Variance; 22 import org.apache.commons.math.stat.descriptive.rank.Max; 23 import org.apache.commons.math.stat.descriptive.rank.Min; 24 import org.apache.commons.math.stat.descriptive.rank.Percentile; 25 import org.apache.commons.math.stat.descriptive.summary.Product; 26 import org.apache.commons.math.stat.descriptive.summary.Sum; 27 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs; 28 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 29 30 /** 31 * StatUtils provides static methods for computing statistics based on data 32 * stored in double[] arrays. 33 * 34 * @version $Revision: 233996 $ $Date: 2005-08-19 21:26:27 -0700 (Fri, 19 Aug 2005) $ 35 */ 36 public final class StatUtils { 37 38 /** sum */ 39 private static UnivariateStatistic sum = new Sum(); 40 41 /** sumSq */ 42 private static UnivariateStatistic sumSq = new SumOfSquares(); 43 44 /** prod */ 45 private static UnivariateStatistic prod = new Product(); 46 47 /** sumLog */ 48 private static UnivariateStatistic sumLog = new SumOfLogs(); 49 50 /** min */ 51 private static UnivariateStatistic min = new Min(); 52 53 /** max */ 54 private static UnivariateStatistic max = new Max(); 55 56 /** mean */ 57 private static UnivariateStatistic mean = new Mean(); 58 59 /** variance */ 60 private static Variance variance = new Variance(); 61 62 /** percentile */ 63 private static Percentile percentile = new Percentile(); 64 65 /** geometric mean */ 66 private static GeometricMean geometricMean = new GeometricMean(); 67 68 /** 69 * Private Constructor 70 */ 71 private StatUtils() { 72 } 73 74 /** 75 * Returns the sum of the values in the input array, or 76 * <code>Double.NaN</code> if the array is empty. 77 * <p> 78 * Throws <code>IllegalArgumentException</code> if the input array 79 * is null. 80 * 81 * @param values array of values to sum 82 * @return the sum of the values or <code>Double.NaN</code> if the array 83 * is empty 84 * @throws IllegalArgumentException if the array is null 85 */ 86 public static double sum(final double[] values) { 87 return sum.evaluate(values); 88 } 89 90 /** 91 * Returns the sum of the entries in the specified portion of 92 * the input array, or <code>Double.NaN</code> if the designated subarray 93 * is empty. 94 * <p> 95 * Throws <code>IllegalArgumentException</code> if the array is null. 96 * 97 * @param values the input array 98 * @param begin index of the first array element to include 99 * @param length the number of elements to include 100 * @return the sum of the values or Double.NaN if length = 0 101 * @throws IllegalArgumentException if the array is null or the array index 102 * parameters are not valid 103 */ 104 public static double sum(final double[] values, final int begin, 105 final int length) { 106 return sum.evaluate(values, begin, length); 107 } 108 109 /** 110 * Returns the sum of the squares of the entries in the input array, or 111 * <code>Double.NaN</code> if the array is empty. 112 * <p> 113 * Throws <code>IllegalArgumentException</code> if the array is null. 114 * 115 * @param values input array 116 * @return the sum of the squared values or <code>Double.NaN</code> if the 117 * array is empty 118 * @throws IllegalArgumentException if the array is null 119 */ 120 public static double sumSq(final double[] values) { 121 return sumSq.evaluate(values); 122 } 123 124 /** 125 * Returns the sum of the squares of the entries in the specified portion of 126 * the input array, or <code>Double.NaN</code> if the designated subarray 127 * is empty. 128 * <p> 129 * Throws <code>IllegalArgumentException</code> if the array is null. 130 * 131 * @param values the input array 132 * @param begin index of the first array element to include 133 * @param length the number of elements to include 134 * @return the sum of the squares of the values or Double.NaN if length = 0 135 * @throws IllegalArgumentException if the array is null or the array index 136 * parameters are not valid 137 */ 138 public static double sumSq(final double[] values, final int begin, 139 final int length) { 140 return sumSq.evaluate(values, begin, length); 141 } 142 143 /** 144 * Returns the product of the entries in the input array, or 145 * <code>Double.NaN</code> if the array is empty. 146 * <p> 147 * Throws <code>IllegalArgumentException</code> if the array is null. 148 * 149 * @param values the input array 150 * @return the product of the values or Double.NaN if the array is empty 151 * @throws IllegalArgumentException if the array is null 152 */ 153 public static double product(final double[] values) { 154 return prod.evaluate(values); 155 } 156 157 /** 158 * Returns the product of the entries in the specified portion of 159 * the input array, or <code>Double.NaN</code> if the designated subarray 160 * is empty. 161 * <p> 162 * Throws <code>IllegalArgumentException</code> if the array is null. 163 * 164 * @param values the input array 165 * @param begin index of the first array element to include 166 * @param length the number of elements to include 167 * @return the product of the values or Double.NaN if length = 0 168 * @throws IllegalArgumentException if the array is null or the array index 169 * parameters are not valid 170 */ 171 public static double product(final double[] values, final int begin, 172 final int length) { 173 return prod.evaluate(values, begin, length); 174 } 175 176 /** 177 * Returns the sum of the natural logs of the entries in the input array, or 178 * <code>Double.NaN</code> if the array is empty. 179 * <p> 180 * Throws <code>IllegalArgumentException</code> if the array is null. 181 * <p> 182 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}. 183 * 184 * @param values the input array 185 * @return the sum of the natural logs of the values or Double.NaN if 186 * the array is empty 187 * @throws IllegalArgumentException if the array is null 188 */ 189 public static double sumLog(final double[] values) { 190 return sumLog.evaluate(values); 191 } 192 193 /** 194 * Returns the sum of the natural logs of the entries in the specified portion of 195 * the input array, or <code>Double.NaN</code> if the designated subarray 196 * is empty. 197 * <p> 198 * Throws <code>IllegalArgumentException</code> if the array is null. 199 * <p> 200 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}. 201 * 202 * @param values the input array 203 * @param begin index of the first array element to include 204 * @param length the number of elements to include 205 * @return the sum of the natural logs of the values or Double.NaN if 206 * length = 0 207 * @throws IllegalArgumentException if the array is null or the array index 208 * parameters are not valid 209 */ 210 public static double sumLog(final double[] values, final int begin, 211 final int length) { 212 return sumLog.evaluate(values, begin, length); 213 } 214 215 /** 216 * Returns the arithmetic mean of the entries in the input array, or 217 * <code>Double.NaN</code> if the array is empty. 218 * <p> 219 * Throws <code>IllegalArgumentException</code> if the array is null. 220 * <p> 221 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for 222 * details on the computing algorithm. 223 * 224 * @param values the input array 225 * @return the mean of the values or Double.NaN if the array is empty 226 * @throws IllegalArgumentException if the array is null 227 */ 228 public static double mean(final double[] values) { 229 return mean.evaluate(values); 230 } 231 232 /** 233 * Returns the arithmetic mean of the entries in the specified portion of 234 * the input array, or <code>Double.NaN</code> if the designated subarray 235 * is empty. 236 * <p> 237 * Throws <code>IllegalArgumentException</code> if the array is null. 238 * <p> 239 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for 240 * details on the computing algorithm. 241 * 242 * @param values the input array 243 * @param begin index of the first array element to include 244 * @param length the number of elements to include 245 * @return the mean of the values or Double.NaN if length = 0 246 * @throws IllegalArgumentException if the array is null or the array index 247 * parameters are not valid 248 */ 249 public static double mean(final double[] values, final int begin, 250 final int length) { 251 return mean.evaluate(values, begin, length); 252 } 253 254 /** 255 * Returns the geometric mean of the entries in the input array, or 256 * <code>Double.NaN</code> if the array is empty. 257 * <p> 258 * Throws <code>IllegalArgumentException</code> if the array is null. 259 * <p> 260 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean} 261 * for details on the computing algorithm. 262 * 263 * @param values the input array 264 * @return the geometric mean of the values or Double.NaN if the array is empty 265 * @throws IllegalArgumentException if the array is null 266 */ 267 public static double geometricMean(final double[] values) { 268 return geometricMean.evaluate(values); 269 } 270 271 /** 272 * Returns the geometric mean of the entries in the specified portion of 273 * the input array, or <code>Double.NaN</code> if the designated subarray 274 * is empty. 275 * <p> 276 * Throws <code>IllegalArgumentException</code> if the array is null. 277 * <p> 278 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean} 279 * for details on the computing algorithm. 280 * 281 * @param values the input array 282 * @param begin index of the first array element to include 283 * @param length the number of elements to include 284 * @return the geometric mean of the values or Double.NaN if length = 0 285 * @throws IllegalArgumentException if the array is null or the array index 286 * parameters are not valid 287 */ 288 public static double geometricMean(final double[] values, final int begin, 289 final int length) { 290 return geometricMean.evaluate(values, begin, length); 291 } 292 293 294 /** 295 * Returns the variance of the entries in the input array, or 296 * <code>Double.NaN</code> if the array is empty. 297 * <p> 298 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 299 * details on the computing algorithm. 300 * <p> 301 * Returns 0 for a single-value (i.e. length = 1) sample. 302 * <p> 303 * Throws <code>IllegalArgumentException</code> if the array is null. 304 * 305 * @param values the input array 306 * @return the variance of the values or Double.NaN if the array is empty 307 * @throws IllegalArgumentException if the array is null 308 */ 309 public static double variance(final double[] values) { 310 return variance.evaluate(values); 311 } 312 313 /** 314 * Returns the variance of the entries in the specified portion of 315 * the input array, or <code>Double.NaN</code> if the designated subarray 316 * is empty. 317 * <p> 318 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 319 * details on the computing algorithm. 320 * <p> 321 * Returns 0 for a single-value (i.e. length = 1) sample. 322 * <p> 323 * Throws <code>IllegalArgumentException</code> if the array is null or the 324 * array index parameters are not valid. 325 * 326 * @param values the input array 327 * @param begin index of the first array element to include 328 * @param length the number of elements to include 329 * @return the variance of the values or Double.NaN if length = 0 330 * @throws IllegalArgumentException if the array is null or the array index 331 * parameters are not valid 332 */ 333 public static double variance(final double[] values, final int begin, 334 final int length) { 335 return variance.evaluate(values, begin, length); 336 } 337 338 /** 339 * Returns the variance of the entries in the specified portion of 340 * the input array, using the precomputed mean value. Returns 341 * <code>Double.NaN</code> if the designated subarray is empty. 342 * <p> 343 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 344 * details on the computing algorithm. 345 * <p> 346 * The formula used assumes that the supplied mean value is the arithmetic 347 * mean of the sample data, not a known population parameter. This method 348 * is supplied only to save computation when the mean has already been 349 * computed. 350 * <p> 351 * Returns 0 for a single-value (i.e. length = 1) sample. 352 * <p> 353 * Throws <code>IllegalArgumentException</code> if the array is null or the 354 * array index parameters are not valid. 355 * 356 * @param values the input array 357 * @param mean the precomputed mean value 358 * @param begin index of the first array element to include 359 * @param length the number of elements to include 360 * @return the variance of the values or Double.NaN if length = 0 361 * @throws IllegalArgumentException if the array is null or the array index 362 * parameters are not valid 363 */ 364 public static double variance(final double[] values, final double mean, 365 final int begin, final int length) { 366 return variance.evaluate(values, mean, begin, length); 367 } 368 369 /** 370 * Returns the variance of the entries in the input array, using the 371 * precomputed mean value. Returns <code>Double.NaN</code> if the array 372 * is empty. 373 * <p> 374 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 375 * details on the computing algorithm. 376 * <p> 377 * The formula used assumes that the supplied mean value is the arithmetic 378 * mean of the sample data, not a known population parameter. This method 379 * is supplied only to save computation when the mean has already been 380 * computed. 381 * <p> 382 * Returns 0 for a single-value (i.e. length = 1) sample. 383 * <p> 384 * Throws <code>IllegalArgumentException</code> if the array is null. 385 * 386 * @param values the input array 387 * @param mean the precomputed mean value 388 * @return the variance of the values or Double.NaN if the array is empty 389 * @throws IllegalArgumentException if the array is null 390 */ 391 public static double variance(final double[] values, final double mean) { 392 return variance.evaluate(values, mean); 393 } 394 395 /** 396 * Returns the maximum of the entries in the input array, or 397 * <code>Double.NaN</code> if the array is empty. 398 * <p> 399 * Throws <code>IllegalArgumentException</code> if the array is null. 400 * <p> 401 * <ul> 402 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 403 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 404 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 405 * the result is <code>Double.POSITIVE_INFINITY.</code></li> 406 * </ul> 407 * 408 * @param values the input array 409 * @return the maximum of the values or Double.NaN if the array is empty 410 * @throws IllegalArgumentException if the array is null 411 */ 412 public static double max(final double[] values) { 413 return max.evaluate(values); 414 } 415 416 /** 417 * Returns the maximum of the entries in the specified portion of 418 * the input array, or <code>Double.NaN</code> if the designated subarray 419 * is empty. 420 * <p> 421 * Throws <code>IllegalArgumentException</code> if the array is null or 422 * the array index parameters are not valid. 423 * <p> 424 * <ul> 425 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 426 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 427 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 428 * the result is <code>Double.POSITIVE_INFINITY.</code></li> 429 * </ul> 430 * 431 * @param values the input array 432 * @param begin index of the first array element to include 433 * @param length the number of elements to include 434 * @return the maximum of the values or Double.NaN if length = 0 435 * @throws IllegalArgumentException if the array is null or the array index 436 * parameters are not valid 437 */ 438 public static double max(final double[] values, final int begin, 439 final int length) { 440 return max.evaluate(values, begin, length); 441 } 442 443 /** 444 * Returns the minimum of the entries in the input array, or 445 * <code>Double.NaN</code> if the array is empty. 446 * <p> 447 * Throws <code>IllegalArgumentException</code> if the array is null. 448 * <p> 449 * <ul> 450 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 451 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 452 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 453 * the result is <code>Double.NEGATIVE_INFINITY.</code></li> 454 * </ul> 455 * 456 * @param values the input array 457 * @return the minimum of the values or Double.NaN if the array is empty 458 * @throws IllegalArgumentException if the array is null 459 */ 460 public static double min(final double[] values) { 461 return min.evaluate(values); 462 } 463 464 /** 465 * Returns the minimum of the entries in the specified portion of 466 * the input array, or <code>Double.NaN</code> if the designated subarray 467 * is empty. 468 * <p> 469 * Throws <code>IllegalArgumentException</code> if the array is null or 470 * the array index parameters are not valid. 471 * <p> 472 * <ul> 473 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 474 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 475 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 476 * the result is <code>Double.NEGATIVE_INFINITY.</code></li> 477 * </ul> 478 * 479 * @param values the input array 480 * @param begin index of the first array element to include 481 * @param length the number of elements to include 482 * @return the minimum of the values or Double.NaN if length = 0 483 * @throws IllegalArgumentException if the array is null or the array index 484 * parameters are not valid 485 */ 486 public static double min(final double[] values, final int begin, 487 final int length) { 488 return min.evaluate(values, begin, length); 489 } 490 491 /** 492 * Returns an estimate of the <code>p</code>th percentile of the values 493 * in the <code>values</code> array. 494 * <p> 495 * <ul> 496 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 497 * <code>0</code></li> 498 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 499 * if <code>values</code> has length <code>1</code></li> 500 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 501 * is null or p is not a valid quantile value (p must be greater than 0 502 * and less than or equal to 100)</li> 503 * </ul> 504 * <p> 505 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for 506 * a description of the percentile estimation algorithm used. 507 * 508 * @param values input array of values 509 * @param p the percentile value to compute 510 * @return the percentile value or Double.NaN if the array is empty 511 * @throws IllegalArgumentException if <code>values</code> is null 512 * or p is invalid 513 */ 514 public static double percentile(final double[] values, final double p) { 515 return percentile.evaluate(values,p); 516 } 517 518 /** 519 * Returns an estimate of the <code>p</code>th percentile of the values 520 * in the <code>values</code> array, starting with the element in (0-based) 521 * position <code>begin</code> in the array and including <code>length</code> 522 * values. 523 * <p> 524 * <ul> 525 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 526 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 527 * if <code>length = 1 </code></li> 528 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 529 * is null , <code>begin</code> or <code>length</code> is invalid, or 530 * <code>p</code> is not a valid quantile value (p must be greater than 0 531 * and less than or equal to 100)</li> 532 * </ul> 533 * <p> 534 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for 535 * a description of the percentile estimation algorithm used. 536 * 537 * @param values array of input values 538 * @param p the percentile to compute 539 * @param begin the first (0-based) element to include in the computation 540 * @param length the number of array elements to include 541 * @return the percentile value 542 * @throws IllegalArgumentException if the parameters are not valid or the 543 * input array is null 544 */ 545 public static double percentile(final double[] values, final int begin, 546 final int length, final double p) { 547 return percentile.evaluate(values, begin, length, p); 548 } 549 550 /** 551 * Returns the sum of the (signed) differences between corresponding elements of the 552 * input arrays -- i.e., sum(sample1[i] - sample2[i]). 553 * 554 * @param sample1 the first array 555 * @param sample2 the second array 556 * @return sum of paired differences 557 * @throws IllegalArgumentException if the arrays do not have the same 558 * (positive) length 559 */ 560 public static double sumDifference(final double[] sample1, final double[] sample2) 561 throws IllegalArgumentException { 562 int n = sample1.length; 563 if (n != sample2.length || n < 1) { 564 throw new IllegalArgumentException 565 ("Input arrays must have the same (positive) length."); 566 } 567 double result = 0; 568 for (int i = 0; i < n; i++) { 569 result += sample1[i] - sample2[i]; 570 } 571 return result; 572 } 573 574 /** 575 * Returns the mean of the (signed) differences between corresponding elements of the 576 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length. 577 * 578 * @param sample1 the first array 579 * @param sample2 the second array 580 * @return mean of paired differences 581 * @throws IllegalArgumentException if the arrays do not have the same 582 * (positive) length 583 */ 584 public static double meanDifference(final double[] sample1, final double[] sample2) 585 throws IllegalArgumentException { 586 return sumDifference(sample1, sample2) / (double) sample1.length; 587 } 588 589 /** 590 * Returns the variance of the (signed) differences between corresponding elements of the 591 * input arrays -- i.e., var(sample1[i] - sample2[i]). 592 * 593 * @param sample1 the first array 594 * @param sample2 the second array 595 * @param meanDifference the mean difference between corresponding entries 596 * @see #meanDifference(double[],double[]) 597 * @return variance of paired differences 598 * @throws IllegalArgumentException if the arrays do not have the same 599 * length or their common length is less than 2. 600 */ 601 public static double varianceDifference(final double[] sample1, final double[] sample2, 602 double meanDifference) throws IllegalArgumentException { 603 double sum1 = 0d; 604 double sum2 = 0d; 605 double diff = 0d; 606 int n = sample1.length; 607 if (n < 2 || n != sample2.length) { 608 throw new IllegalArgumentException("Input array lengths must be equal and at least 2."); 609 } 610 for (int i = 0; i < n; i++) { 611 diff = sample1[i] - sample2[i]; 612 sum1 += (diff - meanDifference) *(diff - meanDifference); 613 sum2 += diff - meanDifference; 614 } 615 return (sum1 - (sum2 * sum2 / (double) n)) / (double) (n - 1); 616 } 617 618 }