1 /* 2 * Copyright 2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package org.apache.commons.math.stat.inference; 17 18 import org.apache.commons.math.MathException; 19 20 /** 21 * An interface for Chi-Square tests. 22 * 23 * @version $Revision: 155427 $ $Date: 2005-02-26 06:11:52 -0700 (Sat, 26 Feb 2005) $ 24 */ 25 public interface ChiSquareTest { 26 27 /** 28 * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 29 * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code> 30 * freqeuncy counts. 31 * <p> 32 * This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that 33 * the observed counts follow the expected distribution. 34 * <p> 35 * <strong>Preconditions</strong>: <ul> 36 * <li>Expected counts must all be positive. 37 * </li> 38 * <li>Observed counts must all be >= 0. 39 * </li> 40 * <li>The observed and expected arrays must have the same length and 41 * their common length must be at least 2. 42 * </li></ul><p> 43 * If any of the preconditions are not met, an 44 * <code>IllegalArgumentException</code> is thrown. 45 * 46 * @param observed array of observed frequency counts 47 * @param expected array of expected frequency counts 48 * @return chiSquare statistic 49 * @throws IllegalArgumentException if preconditions are not met 50 */ 51 double chiSquare(double[] expected, long[] observed) 52 throws IllegalArgumentException; 53 54 /** 55 * Returns the <i>observed significance level</i>, or <a href= 56 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 57 * p-value</a>, associated with a 58 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 59 * Chi-square goodness of fit test</a> comparing the <code>observed</code> 60 * frequency counts to those in the <code>expected</code> array. 61 * <p> 62 * The number returned is the smallest significance level at which one can reject 63 * the null hypothesis that the observed counts conform to the frequency distribution 64 * described by the expected counts. 65 * <p> 66 * <strong>Preconditions</strong>: <ul> 67 * <li>Expected counts must all be positive. 68 * </li> 69 * <li>Observed counts must all be >= 0. 70 * </li> 71 * <li>The observed and expected arrays must have the same length and 72 * their common length must be at least 2. 73 * </li></ul><p> 74 * If any of the preconditions are not met, an 75 * <code>IllegalArgumentException</code> is thrown. 76 * 77 * @param observed array of observed frequency counts 78 * @param expected array of expected frequency counts 79 * @return p-value 80 * @throws IllegalArgumentException if preconditions are not met 81 * @throws MathException if an error occurs computing the p-value 82 */ 83 double chiSquareTest(double[] expected, long[] observed) 84 throws IllegalArgumentException, MathException; 85 86 /** 87 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 88 * Chi-square goodness of fit test</a> evaluating the null hypothesis that the observed counts 89 * conform to the frequency distribution described by the expected counts, with 90 * significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected 91 * with 100 * (1 - alpha) percent confidence. 92 * <p> 93 * <strong>Example:</strong><br> 94 * To test the hypothesis that <code>observed</code> follows 95 * <code>expected</code> at the 99% level, use <p> 96 * <code>chiSquareTest(expected, observed, 0.01) </code> 97 * <p> 98 * <strong>Preconditions</strong>: <ul> 99 * <li>Expected counts must all be positive. 100 * </li> 101 * <li>Observed counts must all be >= 0. 102 * </li> 103 * <li>The observed and expected arrays must have the same length and 104 * their common length must be at least 2. 105 * <li> <code> 0 < alpha < 0.5 </code> 106 * </li></ul><p> 107 * If any of the preconditions are not met, an 108 * <code>IllegalArgumentException</code> is thrown. 109 * 110 * @param observed array of observed frequency counts 111 * @param expected array of expected frequency counts 112 * @param alpha significance level of the test 113 * @return true iff null hypothesis can be rejected with confidence 114 * 1 - alpha 115 * @throws IllegalArgumentException if preconditions are not met 116 * @throws MathException if an error occurs performing the test 117 */ 118 boolean chiSquareTest(double[] expected, long[] observed, double alpha) 119 throws IllegalArgumentException, MathException; 120 121 /** 122 * Computes the Chi-Square statistic associated with a 123 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 124 * chi-square test of independence</a> based on the input <code>counts</code> 125 * array, viewed as a two-way table. 126 * <p> 127 * The rows of the 2-way table are <code>count[0], ... , count[count.length - 1] </code> 128 * <p> 129 * <strong>Preconditions</strong>: <ul> 130 * <li>All counts must be >= 0. 131 * </li> 132 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length). 133 * </li> 134 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and 135 * at least 2 rows. 136 * </li> 137 * </li></ul><p> 138 * If any of the preconditions are not met, an 139 * <code>IllegalArgumentException</code> is thrown. 140 * 141 * @param counts array representation of 2-way table 142 * @return chiSquare statistic 143 * @throws IllegalArgumentException if preconditions are not met 144 */ 145 double chiSquare(long[][] counts) 146 throws IllegalArgumentException; 147 148 /** 149 * Returns the <i>observed significance level</i>, or <a href= 150 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 151 * p-value</a>, associated with a 152 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 153 * chi-square test of independence</a> based on the input <code>counts</code> 154 * array, viewed as a two-way table. 155 * <p> 156 * The rows of the 2-way table are <code>count[0], ... , count[count.length - 1] </code> 157 * <p> 158 * <strong>Preconditions</strong>: <ul> 159 * <li>All counts must be >= 0. 160 * </li> 161 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length). 162 * </li> 163 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and 164 * at least 2 rows. 165 * </li> 166 * </li></ul><p> 167 * If any of the preconditions are not met, an 168 * <code>IllegalArgumentException</code> is thrown. 169 * 170 * @param counts array representation of 2-way table 171 * @return p-value 172 * @throws IllegalArgumentException if preconditions are not met 173 * @throws MathException if an error occurs computing the p-value 174 */ 175 double chiSquareTest(long[][] counts) 176 throws IllegalArgumentException, MathException; 177 178 /** 179 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 180 * chi-square test of independence</a> evaluating the null hypothesis that the classifications 181 * represented by the counts in the columns of the input 2-way table are independent of the rows, 182 * with significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected 183 * with 100 * (1 - alpha) percent confidence. 184 * <p> 185 * The rows of the 2-way table are <code>count[0], ... , count[count.length - 1] </code> 186 * <p> 187 * <strong>Example:</strong><br> 188 * To test the null hypothesis that the counts in <code>count[0], ... , count[count.length - 1] </code> 189 * all correspond to the same underlying probability distribution at the 99% level, use <p> 190 * <code>chiSquareTest(counts, 0.01) </code> 191 * <p> 192 * <strong>Preconditions</strong>: <ul> 193 * <li>All counts must be >= 0. 194 * </li> 195 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length). 196 * </li> 197 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and 198 * at least 2 rows. 199 * </li> 200 * </li></ul><p> 201 * If any of the preconditions are not met, an 202 * <code>IllegalArgumentException</code> is thrown. 203 * 204 * @param counts array representation of 2-way table 205 * @param alpha significance level of the test 206 * @return true iff null hypothesis can be rejected with confidence 207 * 1 - alpha 208 * @throws IllegalArgumentException if preconditions are not met 209 * @throws MathException if an error occurs performing the test 210 */ 211 boolean chiSquareTest(long[][] counts, double alpha) 212 throws IllegalArgumentException, MathException; 213 }