source:trunk/yat/statistics/Fisher.h@1701

Last change on this file since 1701 was 1487, checked in by Jari Häkkinen, 13 years ago

• Property svn:eol-style set to native
• Property svn:keywords set to Author Date Id Revision
File size: 5.1 KB
Line
1#ifndef _theplu_yat_statistics_fisher_
2#define _theplu_yat_statistics_fisher_
3
4// $Id: Fisher.h 1487 2008-09-10 08:41:36Z jari$
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006, 2007 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2008 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include "Score.h"
28
29#include <cmath>
30
31namespace theplu {
32namespace yat {
33namespace utility {
34  class vector;
35}
36namespace statistics {
37  /**
38     @brief Fisher's exact test.
39
40     Fisher's Exact test is a procedure that you can use for data
41     in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
42     \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
43     \end{tabular} \f] Fisher's Exact Test is based on exact
44     probabilities from a specific distribution (the hypergeometric
45     distribution). There's really no lower bound on the amount of
46     data that is needed for Fisher's Exact Test. You do have to
47     have at least one data value in each row and one data value in
48     each column. If an entire row or column is zero, then you
49     don't really have a 2 by 2 table. But you can use Fisher's
50     Exact Test when one of the cells in your table has a zero in
51     it. Fisher's Exact Test is also very useful for highly
52     imbalanced tables. If one or two of the cells in a two by two
53     table have numbers in the thousands and one or two of the
54     other cells has numbers less than 5, you can still use
55     Fisher's Exact Test. For very large tables (where all four
56     entries in the two by two table are large), your computer may
57     take too much time to compute Fisher's Exact Test. In these
58     situations, though, you might as well use the Chi-square test
59     because a large sample approximation (that the Chi-square test
60     relies on) is very reasonable. If all elements are larger than
61     10 a Chi-square test is reasonable to use.
62
63     @note The statistica assumes that each column and row sum,
64     respectively, are fixed. Just because you have a 2x2 table, this
65     assumtion does not necessarily match you experimental upset. See
66     e.g. Barnard's test for alternative.
67  */
68
69  class Fisher
70  {
71
72  public:
73    ///
74    /// Default Constructor.
75    ///
76    Fisher(void);
77
78    ///
79    /// Destructor
80    ///
81    virtual ~Fisher(void);
82
83
84    ///
85    /// @return Chi2 score
86    ///
87    double Chi2(void) const;
88
89    /**
90       Calculates the expected values under the null hypothesis.
91       \f$a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$,
92       \f$b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$,
93       \f$c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$,
94       \f$d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$,
95    */
96    void expected(double& a, double& b, double& c, double& d) const;
97
98    ///
99    /// If all elements in table is at least minimum_size(), a Chi2
100    /// approximation is used for p-value calculation.
101    ///
102    /// @return reference to minimum_size
103    ///
104    unsigned int& minimum_size(void);
105
106    ///
107    /// If all elements in table is at least minimum_size(), a Chi2
108    /// approximation is used for p-value calculation.
109    ///
110    /// @return const reference to minimum_size
111    ///
112    const unsigned int& minimum_size(void) const;
113
114    ///
115    /// If oddsratio is larger than unity, two-sided p-value is equal
116    /// to 2*p_value_one_sided(). If oddsratio is smaller than unity
117    /// two-sided p-value is equal to 2*(1-p_value_one_sided()). If
118    /// oddsratio is unity two-sided p-value is equal to unity.
119    ///
120    /// If all elements in table is at least minimum_size(), a Chi2
121    /// approximation is used.
122    ///
123    /// @return 2-sided p-value
124    ///
125    double p_value() const;
126
127    ///
128    /// One-sided p-value is probability to get larger (or equal) oddsratio.
129    ///
130    /// If all elements in table is at least minimum_size(), a Chi2
131    /// approximation is used.
132    ///
133    /// @return One-sided p-value
134    ///
135    double p_value_one_sided() const;
136
137    /**
138       Function calculating odds ratio from 2x2 table
139       \f[ \begin{tabular}{|c|c|}
140       \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
141       \end{tabular} \f] as \f$\frac{ad}{bc} \f$
142
143       @return odds ratio.
144
145       @throw If table is invalid a runtime_error is thrown. A table
146       is invalid if a row or column sum is zero.
147    */
148    double oddsratio(const unsigned int a, const unsigned int b,
149                     const unsigned int c, const unsigned int d);
150
151  private:
152    bool calculate_p_exact() const;
153
154    // two-sided
155    double p_value_approximative(void) const;
156    //two-sided
157    double p_value_exact(void) const;
158
159    unsigned int a_;
160    unsigned int b_;
161    unsigned int c_;
162    unsigned int d_;
163    unsigned int minimum_size_;
164    double oddsratio_;
165  };
166
167}}} // of namespace statistics, yat, and theplu
168
169#endif
Note: See TracBrowser for help on using the repository browser.