# source:trunk/yat/statistics/Fisher.h@2555

Last change on this file since 2555 was 2555, checked in by Peter, 11 years ago

doc typo

• Property svn:eol-style set to native
• Property svn:keywords set to Author Date Id Revision
File size: 5.3 KB
Line
1#ifndef _theplu_yat_statistics_fisher_
2#define _theplu_yat_statistics_fisher_
3
4// $Id: Fisher.h 2555 2011-08-18 15:44:59Z peter$
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2009, 2011 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27namespace theplu {
28namespace yat {
29namespace statistics {
30  /**
31     @brief Fisher's exact test.
32
33     Fisher's Exact test is a procedure that you can use for data
34     in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
35     \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
36     \end{tabular} \f] Fisher's Exact Test is based on exact
37     probabilities from a specific distribution (the hypergeometric
38     distribution). There's really no lower bound on the amount of
39     data that is needed for Fisher's Exact Test. You do have to
40     have at least one data value in each row and one data value in
41     each column. If an entire row or column is zero, then you
42     don't really have a 2 by 2 table. But you can use Fisher's
43     Exact Test when one of the cells in your table has a zero in
44     it. Fisher's Exact Test is also very useful for highly
45     imbalanced tables. If one or two of the cells in a two by two
46     table have numbers in the thousands and one or two of the
47     other cells has numbers less than 5, you can still use
48     Fisher's Exact Test. For very large tables (where all four
49     entries in the two by two table are large), your computer may
50     take too much time to compute Fisher's Exact Test. In these
51     situations, though, you might as well use the Chi-square test
52     because a large sample approximation (that the Chi-square test
53     relies on) is very reasonable. If all elements are larger than
54     10 a Chi-square test is reasonable to use.
55
56     @note The statistica assumes that each column and row sum,
57     respectively, are fixed. Just because you have a 2x2 table, this
58     assumtion does not necessarily match you experimental setup. See
59     e.g. Barnard's test for alternative.
60  */
61
62  class Fisher
63  {
64
65  public:
66    ///
67    /// Default Constructor.
68    ///
69    Fisher(void);
70
71    ///
72    /// Destructor
73    ///
74    virtual ~Fisher(void);
75
76
77    /**
78       The Chi2 score is calculated as \f$\sum 79 \frac{(O_i-E_i)^2}{E_i}\f$ where \a E is expected value and \a
80       O is observed value.
81
82       \return Chi2 score
83    */
84    double Chi2(void) const;
85
86    /**
87       Calculates the expected values under the null hypothesis.
88       \f$a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$,
89       \f$b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$,
90       \f$c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$,
91       \f$d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$,
92    */
93    void expected(double& a, double& b, double& c, double& d) const;
94
95    ///
96    /// If all elements in table is at least minimum_size(), a Chi2
97    /// approximation is used for p-value calculation.
98    ///
99    /// @return reference to minimum_size
100    ///
101    unsigned int& minimum_size(void);
102
103    ///
104    /// If all elements in table is at least minimum_size(), a Chi2
105    /// approximation is used for p-value calculation.
106    ///
107    /// @return const reference to minimum_size
108    ///
109    const unsigned int& minimum_size(void) const;
110
111    /**
112       If all elements in table is at least minimum_size(), a Chi2
113       approximation is used.
114
115       Otherwise a two-sided p-value is calculated using the
116       hypergeometric distribution
117       \f$\sum_k P(k) \f$ where summation runs over \a k such that
118       \f$|k-<a>| \ge |a-<a>| \f$.
119
120       \return two-sided p-value
121    */
122    double p_value() const;
123
124    ///
125    /// One-sided p-value is probability to get larger (or equal) oddsratio.
126    ///
127    /// If all elements in table is at least minimum_size(), a Chi2
128    /// approximation is used.
129    ///
130    /// @return One-sided p-value
131    ///
132    double p_value_one_sided() const;
133
134    /**
135       Function calculating odds ratio from 2x2 table
136       \f[ \begin{tabular}{|c|c|}
137       \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
138       \end{tabular} \f] as \f$\frac{ad}{bc} \f$
139
140       Object will remember the values of \a a, \a b, \a c, and \a d.
141
142       @return odds ratio.
143
144       @throw If table is invalid a runtime_error is thrown. A table
145       is invalid if a row or column sum is zero.
146    */
147    double oddsratio(const unsigned int a, const unsigned int b,
148                     const unsigned int c, const unsigned int d);
149
150    /**
151       \return oddsratio loaded via oddsratio(4)
152
153       \since New in yat 0.8
154     */
155    double oddsratio(void) const;
156
157  private:
158    bool calculate_p_exact() const;
159
160    // two-sided
161    double p_value_approximative(void) const;
162    //two-sided
163    double p_value_exact(void) const;
164
165    unsigned int a_;
166    unsigned int b_;
167    unsigned int c_;
168    unsigned int d_;
169    unsigned int minimum_size_;
170    double oddsratio_;
171  };
172
173}}} // of namespace statistics, yat, and theplu
174
175#endif
Note: See TracBrowser for help on using the repository browser.