source: trunk/yat/statistics/Fisher.h @ 3004

Last change on this file since 3004 was 3004, checked in by Peter, 10 years ago

refs #689. Deprecate Fisher::one_sided_p; implement Fisher::left_p and right_p.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.0 KB
Line 
1#ifndef _theplu_yat_statistics_fisher_
2#define _theplu_yat_statistics_fisher_
3
4// $Id: Fisher.h 3004 2013-03-24 00:51:14Z peter $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2009, 2011, 2013 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include <yat/utility/deprecate.h>
28
29namespace theplu {
30namespace yat {
31namespace statistics {
32
33  /**
34     @brief Fisher's exact test.
35
36     Fisher's Exact test is a procedure that you can use for data
37     in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
38     \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
39     \end{tabular} \f] Fisher's Exact Test is based on exact
40     probabilities from a specific distribution (the hypergeometric
41     distribution). There's really no lower bound on the amount of
42     data that is needed for Fisher's Exact Test. You do have to
43     have at least one data value in each row and one data value in
44     each column. If an entire row or column is zero, then you
45     don't really have a 2 by 2 table. But you can use Fisher's
46     Exact Test when one of the cells in your table has a zero in
47     it. Fisher's Exact Test is also very useful for highly
48     imbalanced tables. If one or two of the cells in a two by two
49     table have numbers in the thousands and one or two of the
50     other cells has numbers less than 5, you can still use
51     Fisher's Exact Test. For very large tables (where all four
52     entries in the two by two table are large), your computer may
53     take too much time to compute Fisher's Exact Test. In these
54     situations, though, you might as well use the Chi-square test
55     because a large sample approximation (that the Chi-square test
56     relies on) is very reasonable. If all elements are larger than
57     10 a Chi-square test is reasonable to use.
58
59     @note The statistica assumes that each column and row sum,
60     respectively, are fixed. Just because you have a 2x2 table, this
61     assumtion does not necessarily match you experimental setup. See
62     e.g. Barnard's test for alternative.
63  */
64
65  class Fisher
66  {
67
68  public:
69    ///
70    /// Default Constructor.
71    ///
72    Fisher(void);
73
74    ///
75    /// Destructor
76    ///
77    virtual ~Fisher(void);
78
79
80    /**
81       The Chi2 score is calculated as \f$ \sum
82       \frac{(O_i-E_i)^2}{E_i}\f$ where \a E is expected value and \a
83       O is observed value.
84
85       \see expected(double&, double&, double&, double&)
86
87       \return Chi2 score
88    */
89    double Chi2(void) const;
90
91    /**
92       Calculates the expected values under the null hypothesis.
93       \f$ a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$,
94       \f$ b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$,
95       \f$ c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$,
96       \f$ d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$,
97    */
98    void expected(double& a, double& b, double& c, double& d) const;
99
100    ///
101    /// If all elements in table is at least minimum_size(), a Chi2
102    /// approximation is used for p-value calculation.
103    ///
104    /// @return reference to minimum_size
105    ///
106    unsigned int& minimum_size(void);
107
108    ///
109    /// If all elements in table is at least minimum_size(), a Chi2
110    /// approximation is used for p-value calculation.
111    ///
112    /// @return const reference to minimum_size
113    ///
114    const unsigned int& minimum_size(void) const;
115
116    /**
117       Calculates probability to get oddsratio (or smaller).
118
119       If all elements in table is at least minimum_size(), a Chi2
120       approximation is used.
121
122       \since New in yat 0.11
123     */
124    double p_left(void) const;
125
126    /**
127       Calculates probability to get oddsratio (or greater).
128
129       If all elements in table is at least minimum_size(), a Chi2
130       approximation is used.
131
132       \since New in yat 0.11
133     */
134    double p_right(void) const;
135
136    /**
137       If all elements in table is at least minimum_size(), a Chi2
138       approximation is used.
139
140       Otherwise a two-sided p-value is calculated using the
141       hypergeometric distribution
142       \f$ \sum_k P(k) \f$ where summation runs over \a k such that
143       \f$ |k-<a>| \ge |a-<a>| \f$.
144
145       \return two-sided p-value
146    */
147    double p_value(void) const;
148
149    ///
150    /// One-sided p-value is probability to get larger (or equal) oddsratio.
151    ///
152    /// If all elements in table is at least minimum_size(), a Chi2
153    /// approximation is used.
154    ///
155    /// @return One-sided p-value
156    ///
157    /// \deprecated Provided for backward compatibility with the 0.10
158    /// API. Use p_right() instead.
159    ///
160    double p_value_one_sided() const YAT_DEPRECATE;
161
162    /**
163       Function calculating odds ratio from 2x2 table
164       \f[ \begin{tabular}{|c|c|}
165       \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
166       \end{tabular} \f] as \f$ \frac{ad}{bc} \f$
167
168       Object will remember the values of \a a, \a b, \a c, and \a d.
169
170       @return odds ratio.
171
172       @throw If table is invalid a runtime_error is thrown. A table
173       is invalid if a row or column sum is zero.
174    */
175    double oddsratio(const unsigned int a, const unsigned int b,
176                     const unsigned int c, const unsigned int d);
177
178    /**
179       \return oddsratio loaded via oddsratio(4)
180
181       \since New in yat 0.8
182     */
183    double oddsratio(void) const;
184
185  private:
186    bool calculate_p_exact(void) const;
187
188    // two-sided
189    double p_value_approximative(void) const;
190    double p_left_exact(void) const;
191    double p_right_exact(void) const;
192    //two-sided
193    double p_value_exact(void) const;
194
195    unsigned int a_;
196    unsigned int b_;
197    unsigned int c_;
198    unsigned int d_;
199    unsigned int minimum_size_;
200    double oddsratio_;
201  };
202
203}}} // of namespace statistics, yat, and theplu
204
205#endif
Note: See TracBrowser for help on using the repository browser.