1 /* 2 * Copyright 2003-2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package org.apache.commons.math.stat.descriptive; 17 18 import java.io.Serializable; 19 import java.util.Arrays; 20 21 import org.apache.commons.discovery.tools.DiscoverClass; 22 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 23 import org.apache.commons.math.stat.descriptive.moment.Kurtosis; 24 import org.apache.commons.math.stat.descriptive.moment.Mean; 25 import org.apache.commons.math.stat.descriptive.moment.Skewness; 26 import org.apache.commons.math.stat.descriptive.moment.Variance; 27 import org.apache.commons.math.stat.descriptive.rank.Max; 28 import org.apache.commons.math.stat.descriptive.rank.Min; 29 import org.apache.commons.math.stat.descriptive.rank.Percentile; 30 import org.apache.commons.math.stat.descriptive.summary.Sum; 31 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 32 33 34 /** 35 * Abstract factory class for univariate statistical summaries. 36 * 37 * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $ 38 */ 39 public abstract class DescriptiveStatistics implements StatisticalSummary, Serializable { 40 41 /** Serialization UID */ 42 private static final long serialVersionUID = 5188298269533339922L; 43 44 /** 45 * Create an instance of a <code>DescriptiveStatistics</code> 46 * @param cls the type of <code>DescriptiveStatistics</code> object to 47 * create. 48 * @return a new factory. 49 * @throws InstantiationException is thrown if the object can not be 50 * created. 51 * @throws IllegalAccessException is thrown if the type's default 52 * constructor is not accessible. 53 */ 54 public static DescriptiveStatistics newInstance(Class cls) throws InstantiationException, IllegalAccessException { 55 return (DescriptiveStatistics)cls.newInstance(); 56 } 57 58 /** 59 * Create an instance of a <code>DescriptiveStatistics</code> 60 * @return a new factory. 61 */ 62 public static DescriptiveStatistics newInstance() { 63 DescriptiveStatistics factory = null; 64 try { 65 DiscoverClass dc = new DiscoverClass(); 66 factory = (DescriptiveStatistics) dc.newInstance( 67 DescriptiveStatistics.class, 68 "org.apache.commons.math.stat.descriptive.DescriptiveStatisticsImpl"); 69 } catch(Throwable t) { 70 return new DescriptiveStatisticsImpl(); 71 } 72 return factory; 73 } 74 75 /** 76 * This constant signals that a Univariate implementation 77 * takes into account the contributions of an infinite number of 78 * elements. In other words, if getWindow returns this 79 * constant, there is, in effect, no "window". 80 */ 81 public static final int INFINITE_WINDOW = -1; 82 83 /** 84 * Adds the value to the set of numbers 85 * @param v the value to be added 86 */ 87 public abstract void addValue(double v); 88 89 /** 90 * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> 91 * arithmetic mean </a> of the available values 92 * @return The mean or Double.NaN if no values have been added. 93 */ 94 public double getMean() { 95 return apply(new Mean()); 96 } 97 98 /** 99 * Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> 100 * geometric mean </a> of the available values 101 * @return The geometricMean, Double.NaN if no values have been added, 102 * or if the productof the available values is less than or equal to 0. 103 */ 104 public double getGeometricMean() { 105 return apply(new GeometricMean()); 106 } 107 108 /** 109 * Returns the variance of the available values. 110 * @return The variance, Double.NaN if no values have been added 111 * or 0.0 for a single value set. 112 */ 113 public double getVariance() { 114 return apply(new Variance()); 115 } 116 117 /** 118 * Returns the standard deviation of the available values. 119 * @return The standard deviation, Double.NaN if no values have been added 120 * or 0.0 for a single value set. 121 */ 122 public double getStandardDeviation() { 123 double stdDev = Double.NaN; 124 if (getN() > 0) { 125 if (getN() > 1) { 126 stdDev = Math.sqrt(getVariance()); 127 } else { 128 stdDev = 0.0; 129 } 130 } 131 return (stdDev); 132 } 133 134 /** 135 * Returns the skewness of the available values. Skewness is a 136 * measure of the assymetry of a given distribution. 137 * @return The skewness, Double.NaN if no values have been added 138 * or 0.0 for a value set <=2. 139 */ 140 public double getSkewness() { 141 return apply(new Skewness()); 142 } 143 144 /** 145 * Returns the Kurtosis of the available values. Kurtosis is a 146 * measure of the "peakedness" of a distribution 147 * @return The kurtosis, Double.NaN if no values have been added, or 0.0 148 * for a value set <=3. 149 */ 150 public double getKurtosis() { 151 return apply(new Kurtosis()); 152 } 153 154 /** 155 * Returns the maximum of the available values 156 * @return The max or Double.NaN if no values have been added. 157 */ 158 public double getMax() { 159 return apply(new Max()); 160 } 161 162 /** 163 * Returns the minimum of the available values 164 * @return The min or Double.NaN if no values have been added. 165 */ 166 public double getMin() { 167 return apply(new Min()); 168 } 169 170 /** 171 * Returns the number of available values 172 * @return The number of available values 173 */ 174 public abstract long getN(); 175 176 /** 177 * Returns the sum of the values that have been added to Univariate. 178 * @return The sum or Double.NaN if no values have been added 179 */ 180 public double getSum() { 181 return apply(new Sum()); 182 } 183 184 /** 185 * Returns the sum of the squares of the available values. 186 * @return The sum of the squares or Double.NaN if no 187 * values have been added. 188 */ 189 public double getSumsq() { 190 return apply(new SumOfSquares()); 191 } 192 193 /** 194 * Resets all statistics and storage 195 */ 196 public abstract void clear(); 197 198 /** 199 * Univariate has the ability to return only measures for the 200 * last N elements added to the set of values. 201 * @return The current window size or -1 if its Infinite. 202 */ 203 204 public abstract int getWindowSize(); 205 206 /** 207 * WindowSize controls the number of values which contribute 208 * to the values returned by Univariate. For example, if 209 * windowSize is set to 3 and the values {1,2,3,4,5} 210 * have been added <strong> in that order</strong> 211 * then the <i>available values</i> are {3,4,5} and all 212 * reported statistics will be based on these values 213 * @param windowSize sets the size of the window. 214 */ 215 public abstract void setWindowSize(int windowSize); 216 217 /** 218 * Returns the current set of values in an array of double primitives. 219 * The order of addition is preserved. The returned array is a fresh 220 * copy of the underlying data -- i.e., it is not a reference to the 221 * stored data. 222 * 223 * @return returns the current set of numbers in the order in which they 224 * were added to this set 225 */ 226 public abstract double[] getValues(); 227 228 /** 229 * Returns the current set of values in an array of double primitives, 230 * sorted in ascending order. The returned array is a fresh 231 * copy of the underlying data -- i.e., it is not a reference to the 232 * stored data. 233 * @return returns the current set of 234 * numbers sorted in ascending order 235 */ 236 public double[] getSortedValues() { 237 double[] sort = getValues(); 238 Arrays.sort(sort); 239 return sort; 240 } 241 242 /** 243 * Returns the element at the specified index 244 * @param index The Index of the element 245 * @return return the element at the specified index 246 */ 247 public abstract double getElement(int index); 248 249 /** 250 * Returns an estimate for the pth percentile of the stored values. 251 * <p> 252 * The implementation provided here follows the first estimation procedure presented 253 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> 254 * <p> 255 * <strong>Preconditions</strong>:<ul> 256 * <li><code>0 < p < 100</code> (otherwise an 257 * <code>IllegalArgumentException</code> is thrown)</li> 258 * <li>at least one value must be stored (returns <code>Double.NaN 259 * </code> otherwise)</li> 260 * </ul> 261 * 262 * @param p the requested percentile (scaled from 0 - 100) 263 * @return An estimate for the pth percentile of the stored data 264 * values 265 */ 266 public double getPercentile(double p) { 267 return apply(new Percentile(p)); 268 } 269 270 /** 271 * Generates a text report displaying univariate statistics from values 272 * that have been added. Each statistic is displayed on a separate 273 * line. 274 * 275 * @return String with line feeds displaying statistics 276 */ 277 public String toString() { 278 StringBuffer outBuffer = new StringBuffer(); 279 outBuffer.append("DescriptiveStatistics:\n"); 280 outBuffer.append("n: " + getN() + "\n"); 281 outBuffer.append("min: " + getMin() + "\n"); 282 outBuffer.append("max: " + getMax() + "\n"); 283 outBuffer.append("mean: " + getMean() + "\n"); 284 outBuffer.append("std dev: " + getStandardDeviation() + "\n"); 285 outBuffer.append("median: " + getPercentile(50) + "\n"); 286 outBuffer.append("skewness: " + getSkewness() + "\n"); 287 outBuffer.append("kurtosis: " + getKurtosis() + "\n"); 288 return outBuffer.toString(); 289 } 290 291 /** 292 * Apply the given statistic to the data associated with this set of statistics. 293 * @param stat the statistic to apply 294 * @return the computed value of the statistic. 295 */ 296 public abstract double apply(UnivariateStatistic stat); 297 298 }