View Javadoc

1   /*
2    * Copyright 2003-2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.apache.commons.math.stat.descriptive.rank;
17  
18  import java.io.Serializable;
19  import java.util.Arrays;
20  import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
21  
22  /**
23   * Provides percentile computation.
24   * <p>
25   * There are several commonly used methods for estimating percentiles (a.k.a. 
26   * quantiles) based on sample data.  For large samples, the different methods 
27   * agree closely, but when sample sizes are small, different methods will give
28   * significantly different results.  The algorithm implemented here works as follows:
29   * <ol>
30   * <li>Let <code>n</code> be the length of the (sorted) array and 
31   * <code>0 < p <= 100</code> be the desired percentile.</li>
32   * <li>If <code> n = 1 </code> return the unique array element (regardless of 
33   * the value of <code>p</code>); otherwise </li>
34   * <li>Compute the estimated percentile position  
35   * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
36   * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
37   * part of <code>pos</code>).  If <code>pos >= n</code> return the largest
38   * element in the array; otherwise</li>
39   * <li>Let <code>lower</code> be the element in position 
40   * <code>floor(pos)</code> in the array and let <code>upper</code> be the
41   * next element in the array.  Return <code>lower + d * (upper - lower)</code>
42   * </li>
43   * </ol>
44   * <p>
45   * To compute percentiles, the data must be (totally) ordered.  Input arrays
46   * are copied and then sorted using  {@link java.util.Arrays#sort(double[])}.
47   * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
48   * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes 
49   * <code>Double.NaN</code> larger than any other value (including 
50   * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
51   * (50th percentile) of  
52   * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code>  
53   * <p>
54   * Since percentile estimation usually involves interpolation between array 
55   * elements, arrays containing  <code>NaN</code> or infinite values will often
56   * result in <code>NaN<code> or infinite values returned.
57   * <p>
58   * <strong>Note that this implementation is not synchronized.</strong> If 
59   * multiple threads access an instance of this class concurrently, and at least
60   * one of the threads invokes the <code>increment()</code> or 
61   * <code>clear()</code> method, it must be synchronized externally.
62   * 
63   * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
64   */
65  public class Percentile extends AbstractUnivariateStatistic implements Serializable {
66  
67      /** Serializable version identifier */
68      private static final long serialVersionUID = -8091216485095130416L; 
69         
70      /** Determines what percentile is computed when evaluate() is activated 
71       * with no quantile argument */
72      private double quantile = 0.0;
73  
74      /**
75       * Constructs a Percentile with a default quantile
76       * value of 50.0.
77       */
78      public Percentile() {
79          this(50.0);
80      }
81  
82      /**
83       * Constructs a Percentile with the specific quantile value.
84       * @param p the quantile
85       * @throws IllegalArgumentException  if p is not greater than 0 and less
86       * than or equal to 100
87       */
88      public Percentile(final double p) {
89          setQuantile(p);
90      }
91  
92      /**
93       * Returns an estimate of the <code>p</code>th percentile of the values
94       * in the <code>values</code> array.
95       * <p>
96       * Calls to this method do not modify the internal <code>quantile</code>
97       * state of this statistic.
98       * <p>
99       * <ul>
100      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 
101      * <code>0</code></li>
102      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
103      *  if <code>values</code> has length <code>1</code></li>
104      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
105      * is null or p is not a valid quantile value (p must be greater than 0
106      * and less than or equal to 100) </li>
107      * </ul>
108      * <p>
109      * See {@link Percentile} for a description of the percentile estimation
110      * algorithm used.
111      * 
112      * @param values input array of values
113      * @param p the percentile value to compute
114      * @return the percentile value or Double.NaN if the array is empty
115      * @throws IllegalArgumentException if <code>values</code> is null 
116      *     or p is invalid
117      */
118     public double evaluate(final double[] values, final double p) {
119         test(values, 0, 0);
120         return evaluate(values, 0, values.length, p);
121     }
122 
123     /**
124      * Returns an estimate of the <code>quantile</code>th percentile of the
125      * designated values in the <code>values</code> array.  The quantile
126      * estimated is determined by the <code>quantile</code> property.
127      * <p>
128      * <ul>
129      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
130      * <li>Returns (for any value of <code>quantile</code>) 
131      * <code>values[begin]</code> if <code>length = 1 </code></li>
132      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
133      * is null,  or <code>start</code> or <code>length</code> 
134      * is invalid</li>
135      * </ul>
136      * <p>
137      * See {@link Percentile} for a description of the percentile estimation
138      * algorithm used.
139      * 
140      * @param values the input array
141      * @param start index of the first array element to include
142      * @param length the number of elements to include
143      * @return the percentile value
144      * @throws IllegalArgumentException if the parameters are not valid
145      * 
146      */
147     public double evaluate( final double[] values, final int start, final int length) {
148         return evaluate(values, start, length, quantile);
149     }
150 
151      /**
152      * Returns an estimate of the <code>p</code>th percentile of the values
153      * in the <code>values</code> array, starting with the element in (0-based)
154      * position <code>begin</code> in the array and including <code>length</code>
155      * values.
156      * <p>
157      * Calls to this method do not modify the internal <code>quantile</code>
158      * state of this statistic.
159      * <p>
160      * <ul>
161      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
162      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
163      *  if <code>length = 1 </code></li>
164      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
165      *  is null , <code>begin</code> or <code>length</code> is invalid, or 
166      * <code>p</code> is not a valid quantile value (p must be greater than 0
167      * and less than or equal to 100)</li>
168      * </ul>
169      * <p>
170       * See {@link Percentile} for a description of the percentile estimation
171       * algorithm used.
172      * 
173      * @param values array of input values
174      * @param p  the percentile to compute
175      * @param begin  the first (0-based) element to include in the computation
176      * @param length  the number of array elements to include
177      * @return  the percentile value
178      * @throws IllegalArgumentException if the parameters are not valid or the
179      * input array is null
180      */
181     public double evaluate(final double[] values, final int begin, 
182             final int length, final double p) {
183 
184         test(values, begin, length);
185 
186         if ((p > 100) || (p <= 0)) {
187             throw new IllegalArgumentException("invalid quantile value: " + p);
188         }
189         if (length == 0) {
190             return Double.NaN;
191         }
192         if (length == 1) {
193             return values[begin]; // always return single value for n = 1
194         }
195         double n = (double) length;
196         double pos = p * (n + 1) / 100;
197         double fpos = Math.floor(pos);
198         int intPos = (int) fpos;
199         double dif = pos - fpos;
200         double[] sorted = new double[length];
201         System.arraycopy(values, begin, sorted, 0, length);
202         Arrays.sort(sorted);
203 
204         if (pos < 1) {
205             return sorted[0];
206         }
207         if (pos >= n) {
208             return sorted[length - 1];
209         }
210         double lower = sorted[intPos - 1];
211         double upper = sorted[intPos];
212         return lower + dif * (upper - lower);
213     }
214 
215     /**
216      * Returns the value of the quantile field (determines what percentile is
217      * computed when evaluate() is called with no quantile argument).
218      * 
219      * @return quantile
220      */
221     public double getQuantile() {
222         return quantile;
223     }
224 
225     /**
226      * Sets the value of the quantile field (determines what percentile is 
227      * computed when evaluate() is called with no quantile argument).
228      * 
229      * @param p a value between 0 < p <= 100 
230      * @throws IllegalArgumentException  if p is not greater than 0 and less
231      * than or equal to 100
232      */
233     public void setQuantile(final double p) {
234         if (p <= 0 || p > 100) {
235             throw new IllegalArgumentException("Illegal quantile value: " + p);
236         }
237         quantile = p;
238     }
239 
240 }