1 /* 2 * Copyright 2003-2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package org.apache.commons.math.stat.descriptive.rank; 17 18 import java.io.Serializable; 19 import java.util.Arrays; 20 import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic; 21 22 /** 23 * Provides percentile computation. 24 * <p> 25 * There are several commonly used methods for estimating percentiles (a.k.a. 26 * quantiles) based on sample data. For large samples, the different methods 27 * agree closely, but when sample sizes are small, different methods will give 28 * significantly different results. The algorithm implemented here works as follows: 29 * <ol> 30 * <li>Let <code>n</code> be the length of the (sorted) array and 31 * <code>0 < p <= 100</code> be the desired percentile.</li> 32 * <li>If <code> n = 1 </code> return the unique array element (regardless of 33 * the value of <code>p</code>); otherwise </li> 34 * <li>Compute the estimated percentile position 35 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code> 36 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional 37 * part of <code>pos</code>). If <code>pos >= n</code> return the largest 38 * element in the array; otherwise</li> 39 * <li>Let <code>lower</code> be the element in position 40 * <code>floor(pos)</code> in the array and let <code>upper</code> be the 41 * next element in the array. Return <code>lower + d * (upper - lower)</code> 42 * </li> 43 * </ol> 44 * <p> 45 * To compute percentiles, the data must be (totally) ordered. Input arrays 46 * are copied and then sorted using {@link java.util.Arrays#sort(double[])}. 47 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined 48 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes 49 * <code>Double.NaN</code> larger than any other value (including 50 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median 51 * (50th percentile) of 52 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code> 53 * <p> 54 * Since percentile estimation usually involves interpolation between array 55 * elements, arrays containing <code>NaN</code> or infinite values will often 56 * result in <code>NaN<code> or infinite values returned. 57 * <p> 58 * <strong>Note that this implementation is not synchronized.</strong> If 59 * multiple threads access an instance of this class concurrently, and at least 60 * one of the threads invokes the <code>increment()</code> or 61 * <code>clear()</code> method, it must be synchronized externally. 62 * 63 * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $ 64 */ 65 public class Percentile extends AbstractUnivariateStatistic implements Serializable { 66 67 /** Serializable version identifier */ 68 private static final long serialVersionUID = -8091216485095130416L; 69 70 /** Determines what percentile is computed when evaluate() is activated 71 * with no quantile argument */ 72 private double quantile = 0.0; 73 74 /** 75 * Constructs a Percentile with a default quantile 76 * value of 50.0. 77 */ 78 public Percentile() { 79 this(50.0); 80 } 81 82 /** 83 * Constructs a Percentile with the specific quantile value. 84 * @param p the quantile 85 * @throws IllegalArgumentException if p is not greater than 0 and less 86 * than or equal to 100 87 */ 88 public Percentile(final double p) { 89 setQuantile(p); 90 } 91 92 /** 93 * Returns an estimate of the <code>p</code>th percentile of the values 94 * in the <code>values</code> array. 95 * <p> 96 * Calls to this method do not modify the internal <code>quantile</code> 97 * state of this statistic. 98 * <p> 99 * <ul> 100 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 101 * <code>0</code></li> 102 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 103 * if <code>values</code> has length <code>1</code></li> 104 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 105 * is null or p is not a valid quantile value (p must be greater than 0 106 * and less than or equal to 100) </li> 107 * </ul> 108 * <p> 109 * See {@link Percentile} for a description of the percentile estimation 110 * algorithm used. 111 * 112 * @param values input array of values 113 * @param p the percentile value to compute 114 * @return the percentile value or Double.NaN if the array is empty 115 * @throws IllegalArgumentException if <code>values</code> is null 116 * or p is invalid 117 */ 118 public double evaluate(final double[] values, final double p) { 119 test(values, 0, 0); 120 return evaluate(values, 0, values.length, p); 121 } 122 123 /** 124 * Returns an estimate of the <code>quantile</code>th percentile of the 125 * designated values in the <code>values</code> array. The quantile 126 * estimated is determined by the <code>quantile</code> property. 127 * <p> 128 * <ul> 129 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 130 * <li>Returns (for any value of <code>quantile</code>) 131 * <code>values[begin]</code> if <code>length = 1 </code></li> 132 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 133 * is null, or <code>start</code> or <code>length</code> 134 * is invalid</li> 135 * </ul> 136 * <p> 137 * See {@link Percentile} for a description of the percentile estimation 138 * algorithm used. 139 * 140 * @param values the input array 141 * @param start index of the first array element to include 142 * @param length the number of elements to include 143 * @return the percentile value 144 * @throws IllegalArgumentException if the parameters are not valid 145 * 146 */ 147 public double evaluate( final double[] values, final int start, final int length) { 148 return evaluate(values, start, length, quantile); 149 } 150 151 /** 152 * Returns an estimate of the <code>p</code>th percentile of the values 153 * in the <code>values</code> array, starting with the element in (0-based) 154 * position <code>begin</code> in the array and including <code>length</code> 155 * values. 156 * <p> 157 * Calls to this method do not modify the internal <code>quantile</code> 158 * state of this statistic. 159 * <p> 160 * <ul> 161 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 162 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 163 * if <code>length = 1 </code></li> 164 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 165 * is null , <code>begin</code> or <code>length</code> is invalid, or 166 * <code>p</code> is not a valid quantile value (p must be greater than 0 167 * and less than or equal to 100)</li> 168 * </ul> 169 * <p> 170 * See {@link Percentile} for a description of the percentile estimation 171 * algorithm used. 172 * 173 * @param values array of input values 174 * @param p the percentile to compute 175 * @param begin the first (0-based) element to include in the computation 176 * @param length the number of array elements to include 177 * @return the percentile value 178 * @throws IllegalArgumentException if the parameters are not valid or the 179 * input array is null 180 */ 181 public double evaluate(final double[] values, final int begin, 182 final int length, final double p) { 183 184 test(values, begin, length); 185 186 if ((p > 100) || (p <= 0)) { 187 throw new IllegalArgumentException("invalid quantile value: " + p); 188 } 189 if (length == 0) { 190 return Double.NaN; 191 } 192 if (length == 1) { 193 return values[begin]; // always return single value for n = 1 194 } 195 double n = (double) length; 196 double pos = p * (n + 1) / 100; 197 double fpos = Math.floor(pos); 198 int intPos = (int) fpos; 199 double dif = pos - fpos; 200 double[] sorted = new double[length]; 201 System.arraycopy(values, begin, sorted, 0, length); 202 Arrays.sort(sorted); 203 204 if (pos < 1) { 205 return sorted[0]; 206 } 207 if (pos >= n) { 208 return sorted[length - 1]; 209 } 210 double lower = sorted[intPos - 1]; 211 double upper = sorted[intPos]; 212 return lower + dif * (upper - lower); 213 } 214 215 /** 216 * Returns the value of the quantile field (determines what percentile is 217 * computed when evaluate() is called with no quantile argument). 218 * 219 * @return quantile 220 */ 221 public double getQuantile() { 222 return quantile; 223 } 224 225 /** 226 * Sets the value of the quantile field (determines what percentile is 227 * computed when evaluate() is called with no quantile argument). 228 * 229 * @param p a value between 0 < p <= 100 230 * @throws IllegalArgumentException if p is not greater than 0 and less 231 * than or equal to 100 232 */ 233 public void setQuantile(final double p) { 234 if (p <= 0 || p > 100) { 235 throw new IllegalArgumentException("Illegal quantile value: " + p); 236 } 237 quantile = p; 238 } 239 240 }