1 /*
2 * Copyright 2003-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package org.apache.commons.math.stat.descriptive.moment;
17
18 import java.io.Serializable;
19
20 import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
21
22 /**
23 * Computes the variance of the available values. By default, the unbiased
24 * "sample variance" definitional formula is used:
25 * <p>
26 * variance = sum((x_i - mean)^2) / (n - 1)
27 * <p>
28 * where mean is the {@link Mean} and <code>n</code> is the number
29 * of sample observations.
30 * <p>
31 * The definitional formula does not have good numerical properties, so
32 * this implementation uses updating formulas based on West's algorithm
33 * as described in <a href="http://doi.acm.org/10.1145/359146.359152">
34 * Chan, T. F. andJ. G. Lewis 1979, <i>Communications of the ACM</i>,
35 * vol. 22 no. 9, pp. 526-531.</a>.
36 * <p>
37 * The "population variance" ( sum((x_i - mean)^2) / n ) can also
38 * be computed using this statistic. The <code>isBiasCorrected</code>
39 * property determines whether the "population" or "sample" value is
40 * returned by the <code>evaluate</code> and <code>getResult</code> methods.
41 * To compute population variances, set this property to <code>false.</code>
42 *
43 * <strong>Note that this implementation is not synchronized.</strong> If
44 * multiple threads access an instance of this class concurrently, and at least
45 * one of the threads invokes the <code>increment()</code> or
46 * <code>clear()</code> method, it must be synchronized externally.
47 *
48 * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
49 */
50 public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable {
51
52 /** Serializable version identifier */
53 private static final long serialVersionUID = -9111962718267217978L;
54
55 /** SecondMoment is used in incremental calculation of Variance*/
56 protected SecondMoment moment = null;
57
58 /**
59 * Boolean test to determine if this Variance should also increment
60 * the second moment, this evaluates to false when this Variance is
61 * constructed with an external SecondMoment as a parameter.
62 */
63 protected boolean incMoment = true;
64
65 /**
66 * Determines whether or not bias correction is applied when computing the
67 * value of the statisic. True means that bias is corrected. See
68 * {@link Variance} for details on the formula.
69 */
70 private boolean isBiasCorrected = true;
71
72 /**
73 * Constructs a Variance with default (true) <code>isBiasCorrected</code>
74 * property.
75 */
76 public Variance() {
77 moment = new SecondMoment();
78 }
79
80 /**
81 * Constructs a Variance based on an external second moment.
82 *
83 * @param m2 the SecondMoment (Thrid or Fourth moments work
84 * here as well.)
85 */
86 public Variance(final SecondMoment m2) {
87 incMoment = false;
88 this.moment = m2;
89 }
90
91 /**
92 * Constructs a Variance with the specified <code>isBiasCorrected</code>
93 * property
94 *
95 * @param isBiasCorrected setting for bias correction - true means
96 * bias will be corrected and is equivalent to using the argumentless
97 * constructor
98 */
99 public Variance(boolean isBiasCorrected) {
100 moment = new SecondMoment();
101 this.isBiasCorrected = isBiasCorrected;
102 }
103
104 /**
105 * Constructs a Variance with the specified <code>isBiasCorrected</code>
106 * property and the supplied external second moment.
107 *
108 * @param isBiasCorrected setting for bias correction - true means
109 * bias will be corrected
110 * @param m2 the SecondMoment (Thrid or Fourth moments work
111 * here as well.)
112 */
113 public Variance(boolean isBiasCorrected, SecondMoment m2) {
114 incMoment = false;
115 this.moment = m2;
116 this.isBiasCorrected = isBiasCorrected;
117 }
118
119 /**
120 * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#increment(double)
121 */
122 public void increment(final double d) {
123 if (incMoment) {
124 moment.increment(d);
125 }
126 }
127
128 /**
129 * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#getResult()
130 */
131 public double getResult() {
132 if (moment.n == 0) {
133 return Double.NaN;
134 } else if (moment.n == 1) {
135 return 0d;
136 } else {
137 if (isBiasCorrected) {
138 return moment.m2 / ((double) moment.n - 1d);
139 } else {
140 return moment.m2 / ((double) moment.n);
141 }
142 }
143 }
144
145 /**
146 * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#getN()
147 */
148 public long getN() {
149 return moment.getN();
150 }
151
152 /**
153 * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#clear()
154 */
155 public void clear() {
156 if (incMoment) {
157 moment.clear();
158 }
159 }
160
161 /**
162 * Returns the variance of the entries in the input array, or
163 * <code>Double.NaN</code> if the array is empty.
164 * <p>
165 * See {@link Variance} for details on the computing algorithm.
166 * <p>
167 * Returns 0 for a single-value (i.e. length = 1) sample.
168 * <p>
169 * Throws <code>IllegalArgumentException</code> if the array is null.
170 * <p>
171 * Does not change the internal state of the statistic.
172 *
173 * @param values the input array
174 * @return the variance of the values or Double.NaN if length = 0
175 * @throws IllegalArgumentException if the array is null
176 */
177 public double evaluate(final double[] values) {
178 if (values == null) {
179 throw new IllegalArgumentException("input values array is null");
180 }
181 return evaluate(values, 0, values.length);
182 }
183
184 /**
185 * Returns the variance of the entries in the specified portion of
186 * the input array, or <code>Double.NaN</code> if the designated subarray
187 * is empty.
188 * <p>
189 * See {@link Variance} for details on the computing algorithm.
190 * <p>
191 * Returns 0 for a single-value (i.e. length = 1) sample.
192 * <p>
193 * Does not change the internal state of the statistic.
194 * <p>
195 * Throws <code>IllegalArgumentException</code> if the array is null.
196 *
197 * @param values the input array
198 * @param begin index of the first array element to include
199 * @param length the number of elements to include
200 * @return the variance of the values or Double.NaN if length = 0
201 * @throws IllegalArgumentException if the array is null or the array index
202 * parameters are not valid
203 */
204 public double evaluate(final double[] values, final int begin, final int length) {
205
206 double var = Double.NaN;
207
208 if (test(values, begin, length)) {
209 clear();
210 if (length == 1) {
211 var = 0.0;
212 } else if (length > 1) {
213 Mean mean = new Mean();
214 double m = mean.evaluate(values, begin, length);
215 var = evaluate(values, m, begin, length);
216 }
217 }
218 return var;
219 }
220
221 /**
222 * Returns the variance of the entries in the specified portion of
223 * the input array, using the precomputed mean value. Returns
224 * <code>Double.NaN</code> if the designated subarray is empty.
225 * <p>
226 * See {@link Variance} for details on the computing algorithm.
227 * <p>
228 * The formula used assumes that the supplied mean value is the arithmetic
229 * mean of the sample data, not a known population parameter. This method
230 * is supplied only to save computation when the mean has already been
231 * computed.
232 * <p>
233 * Returns 0 for a single-value (i.e. length = 1) sample.
234 * <p>
235 * Throws <code>IllegalArgumentException</code> if the array is null.
236 * <p>
237 * Does not change the internal state of the statistic.
238 *
239 * @param values the input array
240 * @param mean the precomputed mean value
241 * @param begin index of the first array element to include
242 * @param length the number of elements to include
243 * @return the variance of the values or Double.NaN if length = 0
244 * @throws IllegalArgumentException if the array is null or the array index
245 * parameters are not valid
246 */
247 public double evaluate(final double[] values, final double mean,
248 final int begin, final int length) {
249
250 double var = Double.NaN;
251
252 if (test(values, begin, length)) {
253 if (length == 1) {
254 var = 0.0;
255 } else if (length > 1) {
256 double accum = 0.0;
257 double accum2 = 0.0;
258 for (int i = begin; i < begin + length; i++) {
259 accum += Math.pow((values[i] - mean), 2.0);
260 accum2 += (values[i] - mean);
261 }
262 if (isBiasCorrected) {
263 var = (accum - (Math.pow(accum2, 2) / ((double) length))) /
264 (double) (length - 1);
265 } else {
266 var = (accum - (Math.pow(accum2, 2) / ((double) length))) /
267 (double) length;
268 }
269 }
270 }
271 return var;
272 }
273
274 /**
275 * Returns the variance of the entries in the input array, using the
276 * precomputed mean value. Returns <code>Double.NaN</code> if the array
277 * is empty.
278 * <p>
279 * See {@link Variance} for details on the computing algorithm.
280 * <p>
281 * If <code>isBiasCorrected</code> is <code>true</code> the formula used
282 * assumes that the supplied mean value is the arithmetic mean of the
283 * sample data, not a known population parameter. If the mean is a known
284 * population parameter, or if the "population" version of the variance is
285 * desired, set <code>isBiasCorrected</code> to <code>false</code> before
286 * invoking this method.
287 * <p>
288 * Returns 0 for a single-value (i.e. length = 1) sample.
289 * <p>
290 * Throws <code>IllegalArgumentException</code> if the array is null.
291 * <p>
292 * Does not change the internal state of the statistic.
293 *
294 * @param values the input array
295 * @param mean the precomputed mean value
296 * @return the variance of the values or Double.NaN if the array is empty
297 * @throws IllegalArgumentException if the array is null
298 */
299 public double evaluate(final double[] values, final double mean) {
300 return evaluate(values, mean, 0, values.length);
301 }
302
303 /**
304 * @return Returns the isBiasCorrected.
305 */
306 public boolean isBiasCorrected() {
307 return isBiasCorrected;
308 }
309
310 /**
311 * @param isBiasCorrected The isBiasCorrected to set.
312 */
313 public void setBiasCorrected(boolean isBiasCorrected) {
314 this.isBiasCorrected = isBiasCorrected;
315 }
316
317 }