source: trunk/yat/statistics/Fisher.h @ 1000

Last change on this file since 1000 was 1000, checked in by Jari Häkkinen, 14 years ago

trac moved to new location.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.1 KB
Line 
1#ifndef _theplu_yat_statistics_fisher_
2#define _theplu_yat_statistics_fisher_
3
4// $Id: Fisher.h 1000 2007-12-23 20:09:15Z jari $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "Score.h"
30
31#include <sys/types.h>
32
33#include <cmath>
34
35namespace theplu {
36namespace yat {
37namespace utility {
38  class vector;
39}
40namespace statistics { 
41  /**
42     @brief Fisher's exact test.   
43
44     Fisher's Exact test is a procedure that you can use for data
45     in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
46     \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
47     \end{tabular} \f] Fisher's Exact Test is based on exact
48     probabilities from a specific distribution (the hypergeometric
49     distribution). There's really no lower bound on the amount of
50     data that is needed for Fisher's Exact Test. You do have to
51     have at least one data value in each row and one data value in
52     each column. If an entire row or column is zero, then you
53     don't really have a 2 by 2 table. But you can use Fisher's
54     Exact Test when one of the cells in your table has a zero in
55     it. Fisher's Exact Test is also very useful for highly
56     imbalanced tables. If one or two of the cells in a two by two
57     table have numbers in the thousands and one or two of the
58     other cells has numbers less than 5, you can still use
59     Fisher's Exact Test. For very large tables (where all four
60     entries in the two by two table are large), your computer may
61     take too much time to compute Fisher's Exact Test. In these
62     situations, though, you might as well use the Chi-square test
63     because a large sample approximation (that the Chi-square test
64     relies on) is very reasonable. If all elements are larger than
65     10 a Chi-square test is reasonable to use.
66     
67     @note The statistica assumes that each column and row sum,
68     respectively, are fixed. Just because you have a 2x2 table, this
69     assumtion does not necessarily match you experimental upset. See
70     e.g. Barnard's test for alternative.
71  */
72 
73  class Fisher
74  {
75 
76  public:
77    ///
78    /// Default Constructor.
79    ///
80    Fisher(void);
81
82    ///
83    /// Destructor
84    ///
85    virtual ~Fisher(void);
86         
87   
88    ///
89    /// @return Chi2 score
90    ///
91    double Chi2(void) const;
92
93    /**
94       Calculates the expected values under the null hypothesis.
95       \f$ a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$,
96       \f$ b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$,
97       \f$ c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$,
98       \f$ d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$,
99    */
100    void expected(double& a, double& b, double& c, double& d) const;
101
102    ///
103    /// If all elements in table is at least minimum_size(), a Chi2
104    /// approximation is used for p-value calculation.
105    ///
106    /// @return reference to minimum_size
107    ///
108    u_int& minimum_size(void);
109
110    ///
111    /// If all elements in table is at least minimum_size(), a Chi2
112    /// approximation is used for p-value calculation.
113    ///
114    /// @return const reference to minimum_size
115    ///
116    const u_int& minimum_size(void) const;
117
118    ///
119    /// If oddsratio is larger than unity, two-sided p-value is equal
120    /// to 2*p_value_one_sided(). If oddsratio is smaller than unity
121    /// two-sided p-value is equal to 2*(1-p_value_one_sided()). If
122    /// oddsratio is unity two-sided p-value is equal to unity.
123    ///
124    /// If all elements in table is at least minimum_size(), a Chi2
125    /// approximation is used.
126    ///
127    /// @return 2-sided p-value
128    ///
129    double p_value() const;
130   
131    ///
132    /// One-sided p-value is probability to get larger (or equal) oddsratio.
133    ///
134    /// If all elements in table is at least minimum_size(), a Chi2
135    /// approximation is used.
136    ///
137    /// @return One-sided p-value
138    ///
139    double p_value_one_sided() const;
140   
141    /**
142       Function calculating odds ratio from 2x2 table
143       \f[ \begin{tabular}{|c|c|}
144       \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
145       \end{tabular} \f] as \f$ \frac{ad}{bc} \f$
146
147       @return odds ratio.
148
149       @throw If table is invalid a runtime_error is thrown. A table
150       is invalid if a row or column sum is zero.
151    */
152    double oddsratio(const u_int a, const u_int b, 
153                     const u_int c, const u_int d);
154
155  private:
156    bool calculate_p_exact() const;
157
158    // two-sided
159    double p_value_approximative(void) const;
160    //two-sided
161    double p_value_exact(void) const;
162
163    u_int a_;
164    u_int b_;
165    u_int c_;
166    u_int d_;
167    u_int minimum_size_;
168    double oddsratio_;
169  };
170
171}}} // of namespace statistics, yat, and theplu
172
173#endif
Note: See TracBrowser for help on using the repository browser.