1 /* 2 * Copyright 2003-2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.commons.math.random; 18 19 import java.io.IOException; 20 import java.io.File; 21 import java.net.URL; 22 import java.util.List; 23 24 import org.apache.commons.math.stat.descriptive.StatisticalSummary; 25 26 /** 27 * Represents an <a href="http://random.mat.sbg.ac.at/~ste/dipl/node11.html"> 28 * empirical probability distribution</a> -- a probability distribution derived 29 * from observed data without making any assumptions about the functional form 30 * of the population distribution that the data come from.<p> 31 * Implementations of this interface maintain data structures, called 32 * <i>distribution digests</i>, that describe empirical distributions and 33 * support the following operations: <ul> 34 * <li>loading the distribution from a file of observed data values</li> 35 * <li>dividing the input data into "bin ranges" and reporting bin frequency 36 * counts (data for histogram)</li> 37 * <li>reporting univariate statistics describing the full set of data values 38 * as well as the observations within each bin</li> 39 * <li>generating random values from the distribution</li> 40 * </ul> 41 * Applications can use <code>EmpiricalDistribution</code> implementations to 42 * build grouped frequnecy histograms representing the input data or to 43 * generate random values "like" those in the input file -- i.e., the values 44 * generated will follow the distribution of the values in the file. 45 * 46 * @version $Revision: 155427 $ $Date: 2005-02-26 06:11:52 -0700 (Sat, 26 Feb 2005) $ 47 */ 48 public interface EmpiricalDistribution { 49 50 /** 51 * Computes the empirical distribution from the provided 52 * array of numbers. 53 * 54 * @param dataArray the data array 55 */ 56 void load(double[] dataArray); 57 58 /** 59 * Computes the empirical distribution from the input file. 60 * 61 * @param file the input file 62 * @throws IOException if an IO error occurs 63 */ 64 void load(File file) throws IOException; 65 66 /** 67 * Computes the empirical distribution using data read from a URL. 68 * 69 * @param url url of the input file 70 * @throws IOException if an IO error occurs 71 */ 72 void load(URL url) throws IOException; 73 74 /** 75 * Generates a random value from this distribution. 76 * <strong>Preconditions:</strong><ul> 77 * <li>the distribution must be loaded before invoking this method</li></ul> 78 * @return the random value. 79 * 80 * @throws IllegalStateException if the distribution has not been loaded 81 */ 82 double getNextValue() throws IllegalStateException; 83 84 85 /** 86 * Returns a 87 * {@link org.apache.commons.math.stat.descriptive.StatisticalSummary} 88 * describing this distribution. 89 * <strong>Preconditions:</strong><ul> 90 * <li>the distribution must be loaded before invoking this method</li> 91 * </ul> 92 * 93 * @return the sample statistics 94 * @throws IllegalStateException if the distribution has not been loaded 95 */ 96 StatisticalSummary getSampleStats() throws IllegalStateException; 97 98 /** 99 * Property indicating whether or not the distribution has been loaded. 100 * 101 * @return true if the distribution has been loaded 102 */ 103 boolean isLoaded(); 104 105 /** 106 * Returns the number of bins. 107 * 108 * @return the number of bins 109 */ 110 int getBinCount(); 111 112 /** 113 * Returns a list of 114 * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics} 115 * containing statistics describing the values in each of the bins. The 116 * List is indexed on the bin number. 117 * 118 * @return List of bin statistics 119 */ 120 List getBinStats(); 121 122 /** 123 * Returns the array of upper bounds for the bins. Bins are: <br/> 124 * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],..., 125 * (upperBounds[binCount-1],max]. 126 * 127 * @return array of bin upper bounds 128 */ 129 double[] getUpperBounds(); 130 131 }