View Javadoc

1   /*
2    * Copyright 2003-2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.apache.commons.math.stat;
17  
18  import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
19  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
20  import org.apache.commons.math.stat.descriptive.moment.Mean;
21  import org.apache.commons.math.stat.descriptive.moment.Variance;
22  import org.apache.commons.math.stat.descriptive.rank.Max;
23  import org.apache.commons.math.stat.descriptive.rank.Min;
24  import org.apache.commons.math.stat.descriptive.rank.Percentile;
25  import org.apache.commons.math.stat.descriptive.summary.Product;
26  import org.apache.commons.math.stat.descriptive.summary.Sum;
27  import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
28  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
29  
30  /**
31   * StatUtils provides static methods for computing statistics based on data
32   * stored in double[] arrays. 
33   * 
34   * @version $Revision: 233996 $ $Date: 2005-08-19 21:26:27 -0700 (Fri, 19 Aug 2005) $
35   */
36  public final class StatUtils {
37  
38      /** sum */
39      private static UnivariateStatistic sum = new Sum();
40  
41      /** sumSq */
42      private static UnivariateStatistic sumSq = new SumOfSquares();
43  
44      /** prod */
45      private static UnivariateStatistic prod = new Product();
46  
47      /** sumLog */
48      private static UnivariateStatistic sumLog = new SumOfLogs();
49  
50      /** min */
51      private static UnivariateStatistic min = new Min();
52  
53      /** max */
54      private static UnivariateStatistic max = new Max();
55  
56      /** mean */
57      private static UnivariateStatistic mean = new Mean();
58  
59      /** variance */
60      private static Variance variance = new Variance();
61  
62      /** percentile */
63      private static Percentile percentile = new Percentile();
64      
65      /** geometric mean */
66      private static GeometricMean geometricMean = new GeometricMean();
67  
68      /**
69       * Private Constructor
70       */
71      private StatUtils() {
72      }
73  
74      /**
75       * Returns the sum of the values in the input array, or
76       * <code>Double.NaN</code> if the array is empty.
77       * <p>
78       * Throws <code>IllegalArgumentException</code> if the input array
79       * is null.
80       * 
81       * @param values  array of values to sum
82       * @return the sum of the values or <code>Double.NaN</code> if the array
83       * is empty
84       * @throws IllegalArgumentException if the array is null
85       */
86      public static double sum(final double[] values) {
87          return sum.evaluate(values);
88      }
89  
90      /**
91       * Returns the sum of the entries in the specified portion of
92       * the input array, or <code>Double.NaN</code> if the designated subarray
93       * is empty.
94       * <p>
95       * Throws <code>IllegalArgumentException</code> if the array is null.
96       * 
97       * @param values the input array
98       * @param begin index of the first array element to include
99       * @param length the number of elements to include
100      * @return the sum of the values or Double.NaN if length = 0
101      * @throws IllegalArgumentException if the array is null or the array index
102      *  parameters are not valid
103      */
104     public static double sum(final double[] values, final int begin, 
105             final int length) {
106         return sum.evaluate(values, begin, length);
107     }
108 
109     /**
110      * Returns the sum of the squares of the entries in the input array, or 
111      * <code>Double.NaN</code> if the array is empty.
112      * <p>
113      * Throws <code>IllegalArgumentException</code> if the array is null.
114      * 
115      * @param values  input array
116      * @return the sum of the squared values or <code>Double.NaN</code> if the
117      * array is empty
118      * @throws IllegalArgumentException if the array is null
119      */
120     public static double sumSq(final double[] values) {
121         return sumSq.evaluate(values);
122     }
123 
124     /**
125      * Returns the sum of the squares of the entries in the specified portion of
126      * the input array, or <code>Double.NaN</code> if the designated subarray
127      * is empty.
128      * <p>
129      * Throws <code>IllegalArgumentException</code> if the array is null.
130      * 
131      * @param values the input array
132      * @param begin index of the first array element to include
133      * @param length the number of elements to include
134      * @return the sum of the squares of the values or Double.NaN if length = 0
135      * @throws IllegalArgumentException if the array is null or the array index
136      * parameters are not valid
137      */
138     public static double sumSq(final double[] values, final int begin,
139             final int length) {
140         return sumSq.evaluate(values, begin, length);
141     }
142 
143     /**
144      * Returns the product of the entries in the input array, or 
145      * <code>Double.NaN</code> if the array is empty.
146      * <p>
147      * Throws <code>IllegalArgumentException</code> if the array is null.
148      * 
149      * @param values the input array
150      * @return the product of the values or Double.NaN if the array is empty
151      * @throws IllegalArgumentException if the array is null
152      */
153     public static double product(final double[] values) {
154         return prod.evaluate(values);
155     }
156 
157     /**
158      * Returns the product of the entries in the specified portion of
159      * the input array, or <code>Double.NaN</code> if the designated subarray
160      * is empty.
161      * <p>
162      * Throws <code>IllegalArgumentException</code> if the array is null.
163      * 
164      * @param values the input array
165      * @param begin index of the first array element to include
166      * @param length the number of elements to include
167      * @return the product of the values or Double.NaN if length = 0
168      * @throws IllegalArgumentException if the array is null or the array index
169      * parameters are not valid
170      */
171     public static double product(final double[] values, final int begin,
172             final int length) {
173         return prod.evaluate(values, begin, length);
174     }
175 
176     /**
177      * Returns the sum of the natural logs of the entries in the input array, or 
178      * <code>Double.NaN</code> if the array is empty.
179      * <p>
180      * Throws <code>IllegalArgumentException</code> if the array is null.
181      * <p>
182      * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
183      * 
184      * @param values the input array
185      * @return the sum of the natural logs of the values or Double.NaN if 
186      * the array is empty
187      * @throws IllegalArgumentException if the array is null
188      */
189     public static double sumLog(final double[] values) {
190         return sumLog.evaluate(values);
191     }
192 
193     /**
194      * Returns the sum of the natural logs of the entries in the specified portion of
195      * the input array, or <code>Double.NaN</code> if the designated subarray
196      * is empty.
197      * <p>
198      * Throws <code>IllegalArgumentException</code> if the array is null.
199      * <p>
200       * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
201      * 
202      * @param values the input array
203      * @param begin index of the first array element to include
204      * @param length the number of elements to include
205      * @return the sum of the natural logs of the values or Double.NaN if 
206      * length = 0
207      * @throws IllegalArgumentException if the array is null or the array index
208      * parameters are not valid
209      */
210     public static double sumLog(final double[] values, final int begin,
211             final int length) {
212         return sumLog.evaluate(values, begin, length);
213     }
214 
215     /**
216      * Returns the arithmetic mean of the entries in the input array, or 
217      * <code>Double.NaN</code> if the array is empty.
218      * <p>
219      * Throws <code>IllegalArgumentException</code> if the array is null.
220      * <p>
221      * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
222      * details on the computing algorithm.
223      * 
224      * @param values the input array
225      * @return the mean of the values or Double.NaN if the array is empty
226      * @throws IllegalArgumentException if the array is null
227      */
228     public static double mean(final double[] values) {
229         return mean.evaluate(values);
230     }
231 
232     /**
233      * Returns the arithmetic mean of the entries in the specified portion of
234      * the input array, or <code>Double.NaN</code> if the designated subarray
235      * is empty.
236      * <p>
237      * Throws <code>IllegalArgumentException</code> if the array is null.
238      * <p>
239      * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
240      * details on the computing algorithm.
241      * 
242      * @param values the input array
243      * @param begin index of the first array element to include
244      * @param length the number of elements to include
245      * @return the mean of the values or Double.NaN if length = 0
246      * @throws IllegalArgumentException if the array is null or the array index
247      * parameters are not valid
248      */
249     public static double mean(final double[] values, final int begin,
250             final int length) {
251         return mean.evaluate(values, begin, length);
252     }
253     
254     /**
255      * Returns the geometric mean of the entries in the input array, or 
256      * <code>Double.NaN</code> if the array is empty.
257      * <p>
258      * Throws <code>IllegalArgumentException</code> if the array is null.
259      * <p>
260      * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
261      * for details on the computing algorithm.
262      * 
263      * @param values the input array
264      * @return the geometric mean of the values or Double.NaN if the array is empty
265      * @throws IllegalArgumentException if the array is null
266      */
267     public static double geometricMean(final double[] values) {
268         return geometricMean.evaluate(values);
269     }
270 
271     /**
272      * Returns the geometric mean of the entries in the specified portion of
273      * the input array, or <code>Double.NaN</code> if the designated subarray
274      * is empty.
275      * <p>
276      * Throws <code>IllegalArgumentException</code> if the array is null.
277      * <p>
278      * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
279      * for details on the computing algorithm.
280      * 
281      * @param values the input array
282      * @param begin index of the first array element to include
283      * @param length the number of elements to include
284      * @return the geometric mean of the values or Double.NaN if length = 0
285      * @throws IllegalArgumentException if the array is null or the array index
286      * parameters are not valid
287      */
288     public static double geometricMean(final double[] values, final int begin,
289             final int length) {
290         return geometricMean.evaluate(values, begin, length);
291     }
292     
293 
294     /**
295      * Returns the variance of the entries in the input array, or 
296      * <code>Double.NaN</code> if the array is empty.
297      * <p>
298      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
299      * details on the computing algorithm.
300      * <p>
301      * Returns 0 for a single-value (i.e. length = 1) sample.
302      * <p>
303      * Throws <code>IllegalArgumentException</code> if the array is null.
304      * 
305      * @param values the input array
306      * @return the variance of the values or Double.NaN if the array is empty
307      * @throws IllegalArgumentException if the array is null
308      */
309     public static double variance(final double[] values) {
310         return variance.evaluate(values);
311     }
312 
313     /**
314      * Returns the variance of the entries in the specified portion of
315      * the input array, or <code>Double.NaN</code> if the designated subarray
316      * is empty.
317      * <p>
318      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
319      * details on the computing algorithm.
320      * <p>
321      * Returns 0 for a single-value (i.e. length = 1) sample.
322      * <p>
323      * Throws <code>IllegalArgumentException</code> if the array is null or the
324      * array index parameters are not valid.
325      * 
326      * @param values the input array
327      * @param begin index of the first array element to include
328      * @param length the number of elements to include
329      * @return the variance of the values or Double.NaN if length = 0
330      * @throws IllegalArgumentException if the array is null or the array index
331      *  parameters are not valid
332      */
333     public static double variance(final double[] values, final int begin,
334             final int length) {
335         return variance.evaluate(values, begin, length);
336     }
337     
338     /**
339      * Returns the variance of the entries in the specified portion of
340      * the input array, using the precomputed mean value.  Returns 
341      * <code>Double.NaN</code> if the designated subarray is empty.
342      * <p>
343      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
344      * details on the computing algorithm.
345      * <p>
346      * The formula used assumes that the supplied mean value is the arithmetic
347      * mean of the sample data, not a known population parameter.  This method
348      * is supplied only to save computation when the mean has already been
349      * computed.
350      * <p>
351      * Returns 0 for a single-value (i.e. length = 1) sample.
352      * <p>
353      * Throws <code>IllegalArgumentException</code> if the array is null or the
354      * array index parameters are not valid.
355      * 
356      * @param values the input array
357      * @param mean the precomputed mean value
358      * @param begin index of the first array element to include
359      * @param length the number of elements to include
360      * @return the variance of the values or Double.NaN if length = 0
361      * @throws IllegalArgumentException if the array is null or the array index
362      *  parameters are not valid
363      */
364     public static double variance(final double[] values, final double mean, 
365             final int begin, final int length) {
366         return variance.evaluate(values, mean, begin, length);    
367     }
368     
369     /**
370      * Returns the variance of the entries in the input array, using the
371      * precomputed mean value.  Returns <code>Double.NaN</code> if the array
372      * is empty.  
373      * <p>
374      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
375      * details on the computing algorithm.   
376      * <p>
377      * The formula used assumes that the supplied mean value is the arithmetic
378      * mean of the sample data, not a known population parameter.  This method
379      * is supplied only to save computation when the mean has already been
380      * computed.
381      * <p>
382      * Returns 0 for a single-value (i.e. length = 1) sample.
383      * <p>
384      * Throws <code>IllegalArgumentException</code> if the array is null.
385      * 
386      * @param values the input array
387      * @param mean the precomputed mean value
388      * @return the variance of the values or Double.NaN if the array is empty
389      * @throws IllegalArgumentException if the array is null
390      */
391     public static double variance(final double[] values, final double mean) {
392         return variance.evaluate(values, mean);    
393     }
394 
395     /**
396      * Returns the maximum of the entries in the input array, or 
397      * <code>Double.NaN</code> if the array is empty.
398      * <p>
399      * Throws <code>IllegalArgumentException</code> if the array is null.
400      * <p>
401      * <ul>
402      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
403      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
404      * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 
405      * the result is <code>Double.POSITIVE_INFINITY.</code></li>
406      * </ul>
407      * 
408      * @param values the input array
409      * @return the maximum of the values or Double.NaN if the array is empty
410      * @throws IllegalArgumentException if the array is null
411      */
412     public static double max(final double[] values) {
413         return max.evaluate(values);
414     }
415 
416     /**
417      * Returns the maximum of the entries in the specified portion of
418      * the input array, or <code>Double.NaN</code> if the designated subarray
419      * is empty.
420      * <p>
421      * Throws <code>IllegalArgumentException</code> if the array is null or
422      * the array index parameters are not valid.
423      * <p>
424      * <ul>
425      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
426      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
427      * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 
428      * the result is <code>Double.POSITIVE_INFINITY.</code></li>
429      * </ul>
430      * 
431      * @param values the input array
432      * @param begin index of the first array element to include
433      * @param length the number of elements to include
434      * @return the maximum of the values or Double.NaN if length = 0
435      * @throws IllegalArgumentException if the array is null or the array index
436      * parameters are not valid
437      */
438     public static double max(final double[] values, final int begin,
439             final int length) {
440         return max.evaluate(values, begin, length);
441     }
442 
443      /**
444      * Returns the minimum of the entries in the input array, or 
445      * <code>Double.NaN</code> if the array is empty.
446      * <p>
447      * Throws <code>IllegalArgumentException</code> if the array is null.
448      * <p>
449      * <ul>
450      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
451      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
452      * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 
453      * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
454      * </ul> 
455      * 
456      * @param values the input array
457      * @return the minimum of the values or Double.NaN if the array is empty
458      * @throws IllegalArgumentException if the array is null
459      */
460     public static double min(final double[] values) {
461         return min.evaluate(values);
462     }
463 
464      /**
465      * Returns the minimum of the entries in the specified portion of
466      * the input array, or <code>Double.NaN</code> if the designated subarray
467      * is empty.
468      * <p>
469      * Throws <code>IllegalArgumentException</code> if the array is null or
470      * the array index parameters are not valid.
471      * <p>
472      * <ul>
473      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
474      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
475      * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 
476      * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
477      * </ul> 
478      * 
479      * @param values the input array
480      * @param begin index of the first array element to include
481      * @param length the number of elements to include
482      * @return the minimum of the values or Double.NaN if length = 0
483      * @throws IllegalArgumentException if the array is null or the array index
484      * parameters are not valid
485      */
486     public static double min(final double[] values, final int begin,
487             final int length) {
488         return min.evaluate(values, begin, length);
489     }
490     
491     /**
492      * Returns an estimate of the <code>p</code>th percentile of the values
493      * in the <code>values</code> array.
494      * <p>
495      * <ul>
496      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 
497      * <code>0</code></li>
498      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
499      *  if <code>values</code> has length <code>1</code></li>
500      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
501      * is null  or p is not a valid quantile value (p must be greater than 0
502      * and less than or equal to 100)</li>
503      * </ul>
504      * <p>
505      * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
506      * a description of the percentile estimation algorithm used.
507      * 
508      * @param values input array of values
509      * @param p the percentile value to compute
510      * @return the percentile value or Double.NaN if the array is empty
511      * @throws IllegalArgumentException if <code>values</code> is null 
512      * or p is invalid
513      */
514     public static double percentile(final double[] values, final double p) {
515             return percentile.evaluate(values,p);
516     }
517 
518      /**
519      * Returns an estimate of the <code>p</code>th percentile of the values
520      * in the <code>values</code> array, starting with the element in (0-based)
521      * position <code>begin</code> in the array and including <code>length</code>
522      * values.
523      * <p>
524      * <ul>
525      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
526      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
527      *  if <code>length = 1 </code></li>
528      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
529      *  is null , <code>begin</code> or <code>length</code> is invalid, or 
530      * <code>p</code> is not a valid quantile value (p must be greater than 0
531      * and less than or equal to 100)</li>
532      * </ul>
533      * <p>
534       * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
535       * a description of the percentile estimation algorithm used.
536      * 
537      * @param values array of input values
538      * @param p  the percentile to compute
539      * @param begin  the first (0-based) element to include in the computation
540      * @param length  the number of array elements to include
541      * @return  the percentile value
542      * @throws IllegalArgumentException if the parameters are not valid or the
543      * input array is null
544      */
545     public static double percentile(final double[] values, final int begin, 
546             final int length, final double p) {
547         return percentile.evaluate(values, begin, length, p);
548     }   
549     
550     /**
551      * Returns the sum of the (signed) differences between corresponding elements of the
552      * input arrays -- i.e., sum(sample1[i] - sample2[i]).
553      * 
554      * @param sample1  the first array
555      * @param sample2  the second array
556      * @return sum of paired differences
557      * @throws IllegalArgumentException if the arrays do not have the same
558      * (positive) length
559      */
560     public static double sumDifference(final double[] sample1, final double[] sample2)
561         throws IllegalArgumentException {
562         int n = sample1.length;
563         if (n  != sample2.length || n < 1) {
564             throw new IllegalArgumentException 
565                 ("Input arrays must have the same (positive) length.");
566         }
567         double result = 0;
568         for (int i = 0; i < n; i++) {
569             result += sample1[i] - sample2[i];
570         }
571         return result;
572     }
573     
574     /**
575      * Returns the mean of the (signed) differences between corresponding elements of the
576      * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
577      * 
578      * @param sample1  the first array
579      * @param sample2  the second array
580      * @return mean of paired differences
581      * @throws IllegalArgumentException if the arrays do not have the same
582      * (positive) length
583      */
584     public static double meanDifference(final double[] sample1, final double[] sample2)
585     throws IllegalArgumentException {
586         return sumDifference(sample1, sample2) / (double) sample1.length;
587     }
588     
589     /**
590      * Returns the variance of the (signed) differences between corresponding elements of the
591      * input arrays -- i.e., var(sample1[i] - sample2[i]).
592      * 
593      * @param sample1  the first array
594      * @param sample2  the second array
595      * @param meanDifference   the mean difference between corresponding entries 
596      * @see #meanDifference(double[],double[])
597      * @return variance of paired differences
598      * @throws IllegalArgumentException if the arrays do not have the same
599      * length or their common length is less than 2.
600      */
601     public static double varianceDifference(final double[] sample1, final double[] sample2, 
602             double meanDifference)  throws IllegalArgumentException {
603         double sum1 = 0d;
604         double sum2 = 0d;
605         double diff = 0d;
606         int n = sample1.length;
607         if (n < 2 || n != sample2.length) {
608             throw new IllegalArgumentException("Input array lengths must be equal and at least 2.");
609         }
610         for (int i = 0; i < n; i++) {
611             diff = sample1[i] - sample2[i];
612             sum1 += (diff - meanDifference) *(diff - meanDifference);
613             sum2 += diff - meanDifference;
614         }
615         return (sum1 - (sum2 * sum2 / (double) n)) / (double) (n - 1);
616     }      
617     
618 }