source: trunk/yat/statistics/Fisher.h @ 3561

Last change on this file since 3561 was 3561, checked in by Peter, 6 years ago

Merged release 0.13.1 into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.9 KB
Line 
1#ifndef _theplu_yat_statistics_fisher_
2#define _theplu_yat_statistics_fisher_
3
4// $Id: Fisher.h 3561 2017-01-04 00:55:10Z peter $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2009, 2011, 2013, 2014, 2015 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include <yat/utility/deprecate.h>
28
29namespace theplu {
30namespace yat {
31namespace statistics {
32
33  /**
34     @brief Fisher's exact test.
35
36     Fisher's Exact test is a procedure that you can use for data
37     in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
38     \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
39     \end{tabular} \f] Fisher's Exact Test is based on exact
40     probabilities from a specific distribution (the hypergeometric
41     distribution). There's really no lower bound on the amount of
42     data that is needed for Fisher's Exact Test. You do have to
43     have at least one data value in each row and one data value in
44     each column. If an entire row or column is zero, then you
45     don't really have a 2 by 2 table. But you can use Fisher's
46     Exact Test when one of the cells in your table has a zero in
47     it. Fisher's Exact Test is also very useful for highly
48     imbalanced tables. If one or two of the cells in a two by two
49     table have numbers in the thousands and one or two of the
50     other cells has numbers less than 5, you can still use
51     Fisher's Exact Test. For very large tables (where all four
52     entries in the two by two table are large), your computer may
53     take too much time to compute Fisher's Exact Test. In these
54     situations, though, you might as well use the Chi-square test
55     because a large sample approximation (that the Chi-square test
56     relies on) is very reasonable. If all elements are larger than
57     10 a Chi-square test is reasonable to use.
58
59     @note The statistica assumes that each column and row sum,
60     respectively, are fixed. Just because you have a 2x2 table, this
61     assumtion does not necessarily match you experimental setup. See
62     e.g. Barnard's test for alternative.
63  */
64
65  class Fisher
66  {
67
68  public:
69    ///
70    /// Default Constructor.
71    ///
72    /// \param yates if true Yates's correction is used for
73    /// chi-squared calculation
74    ///
75    /// \since Constructor with argument was introduced in yat 0.13
76    ///
77    Fisher(bool yates=false);
78
79    ///
80    /// Destructor
81    ///
82    virtual ~Fisher(void);
83
84
85    /**
86       The Chi2 score is calculated as \f$ \sum
87       \frac{(O_i-E_i)^2}{E_i}\f$ where \a E is expected value and \a
88       O is observed value.
89
90       If indicated in constructor, Yates's correction is used, i.e.,
91       Chi2 is calculated as \f$ \frac{(|O_i-E_i|-0.5)^2}{E_i} \f$
92
93
94       \see expected(double&, double&, double&, double&)
95
96       \return Chi2 score
97    */
98    double Chi2(void) const;
99
100    /**
101       Calculates the expected values under the null hypothesis.
102       \f$ a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$,
103       \f$ b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$,
104       \f$ c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$,
105       \f$ d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$,
106    */
107    void expected(double& a, double& b, double& c, double& d) const;
108
109    ///
110    /// If all elements in table is at least minimum_size(), a Chi2
111    /// approximation is used for p-value calculation.
112    ///
113    /// @return reference to minimum_size
114    ///
115    unsigned int& minimum_size(void);
116
117    ///
118    /// If all elements in table is at least minimum_size(), a Chi2
119    /// approximation is used for p-value calculation.
120    ///
121    /// @return const reference to minimum_size
122    ///
123    const unsigned int& minimum_size(void) const;
124
125    /**
126       Calculates probability to get oddsratio (or smaller).
127
128       If all elements in table is at least minimum_size(), a Chi2
129       approximation is used.
130
131       \since New in yat 0.11
132     */
133    double p_left(void) const;
134
135    /**
136       Calculates probability to get oddsratio (or greater).
137
138       If all elements in table is at least minimum_size(), a Chi2
139       approximation is used.
140
141       \since New in yat 0.11
142     */
143    double p_right(void) const;
144
145    /**
146       If all elements in table is at least minimum_size(), a Chi2
147       approximation is used.
148
149       Otherwise a two-sided p-value is calculated using the
150       hypergeometric distribution
151       \f$ \sum_k P(k) \f$ where summation runs over \a k such that
152       \f$ P(k) \le P(a) \f$.
153
154       \return two-sided p-value
155    */
156    double p_value(void) const;
157
158    ///
159    /// One-sided p-value is probability to get larger (or equal) oddsratio.
160    ///
161    /// If all elements in table is at least minimum_size(), a Chi2
162    /// approximation is used.
163    ///
164    /// @return One-sided p-value
165    ///
166    /// \deprecated Provided for backward compatibility with the 0.10
167    /// API. Use p_right() instead.
168    ///
169    double p_value_one_sided() const YAT_DEPRECATE;
170
171    /**
172       Function calculating odds ratio from 2x2 table
173       \f[ \begin{tabular}{|c|c|}
174       \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
175       \end{tabular} \f] as \f$ \frac{ad}{bc} \f$
176
177       Object will remember the values of \a a, \a b, \a c, and \a d.
178
179       @return odds ratio.
180
181       @throw If table is invalid a runtime_error is thrown. A table
182       is invalid if a row or column sum is zero.
183    */
184    double oddsratio(const unsigned int a, const unsigned int b,
185                     const unsigned int c, const unsigned int d);
186
187    /**
188       \return oddsratio loaded via oddsratio(4)
189
190       \since New in yat 0.8
191     */
192    double oddsratio(void) const;
193
194  private:
195    bool calculate_p_exact(void) const;
196
197    // two-sided
198    double p_value_approximative(void) const;
199    // two-sided
200    double p_value_exact(void) const;
201    // calculate two-sided p-value to get k (or fewer) wins when
202    // drawing t samples out of of a population of n1 wins and n2 losses
203    double p_value_exact(unsigned int k, unsigned int n1, unsigned int n2,
204                         unsigned int t) const;
205
206    double lower_tail(unsigned int k, unsigned int n1, unsigned int n2,
207                      unsigned int t) const;
208
209    // return P(X=k+1) / P(X=k)
210    double hypergeometric_ratio(unsigned int k, unsigned int n1,
211                                unsigned int n2, unsigned int t) const;
212    double choose_ratio(unsigned int n, unsigned int k) const;
213
214    double p_left_exact(void) const;
215    double p_right_exact(void) const;
216
217    double yates(unsigned int o, double e) const;
218
219    unsigned int a_;
220    unsigned int b_;
221    unsigned int c_;
222    unsigned int d_;
223    unsigned int minimum_size_;
224    double oddsratio_;
225    bool yates_;
226  };
227
228}}} // of namespace statistics, yat, and theplu
229
230#endif
Note: See TracBrowser for help on using the repository browser.