source: trunk/yat/statistics/Fisher.h @ 777

Last change on this file since 777 was 777, checked in by Peter, 15 years ago

Changed Fisher interface dramatically. No longer inherited from Score. Removed several functions since they encourage inappropriate usage. Removed support for weights. refs #101

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.1 KB
Line 
1#ifndef _theplu_yat_statistics_fisher_
2#define _theplu_yat_statistics_fisher_
3
4// $Id: Fisher.h 777 2007-03-04 12:34:17Z peter $
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "Score.h"
28
29#include <sys/types.h>
30
31#include <cmath>
32
33namespace theplu {
34namespace yat {
35namespace utility {
36  class vector;
37}
38namespace statistics { 
39  /**
40     @brief Fisher's exact test.   
41
42     Fisher's Exact test is a procedure that you can use for data
43     in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
44     \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
45     \end{tabular} \f] Fisher's Exact Test is based on exact
46     probabilities from a specific distribution (the hypergeometric
47     distribution). There's really no lower bound on the amount of
48     data that is needed for Fisher's Exact Test. You do have to
49     have at least one data value in each row and one data value in
50     each column. If an entire row or column is zero, then you
51     don't really have a 2 by 2 table. But you can use Fisher's
52     Exact Test when one of the cells in your table has a zero in
53     it. Fisher's Exact Test is also very useful for highly
54     imbalanced tables. If one or two of the cells in a two by two
55     table have numbers in the thousands and one or two of the
56     other cells has numbers less than 5, you can still use
57     Fisher's Exact Test. For very large tables (where all four
58     entries in the two by two table are large), your computer may
59     take too much time to compute Fisher's Exact Test. In these
60     situations, though, you might as well use the Chi-square test
61     because a large sample approximation (that the Chi-square test
62     relies on) is very reasonable. If all elements are larger than
63     10 a Chi-square test is reasonable to use.
64     
65     @note The statistica assumes that each column and row sum,
66     respectively, are fixed. Just because you have a 2x2 table, this
67     assumtion does not necessarily match you experimental upset. See
68     e.g. Barnard's test for alternative.
69  */
70 
71  class Fisher
72  {
73 
74  public:
75    ///
76    /// Default Constructor.
77    ///
78    Fisher(void);
79
80    ///
81    /// Destructor
82    ///
83    virtual ~Fisher(void);
84         
85   
86    ///
87    /// @return Chi2 score
88    ///
89    double Chi2(void) const;
90
91    /**
92       Calculates the expected values under the null hypothesis.
93       \f$ a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$,
94       \f$ b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$,
95       \f$ c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$,
96       \f$ d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$,
97    */
98    void expected(double& a, double& b, double& c, double& d) const;
99
100    ///
101    /// If all elements in table is at least minimum_size(), a Chi2
102    /// approximation is used for p-value calculation.
103    ///
104    /// @return reference to minimum_size
105    ///
106    u_int& minimum_size(void);
107
108    ///
109    /// If all elements in table is at least minimum_size(), a Chi2
110    /// approximation is used for p-value calculation.
111    ///
112    /// @return const reference to minimum_size
113    ///
114    const u_int& minimum_size(void) const;
115
116    ///
117    /// If oddsratio is larger than unity, two-sided p-value is equal
118    /// to 2*p_value_one_sided(). If oddsratio is smaller than unity
119    /// two-sided p-value is equal to 2*(1-p_value_one_sided()). If
120    /// oddsratio is unity two-sided p-value is equal to unity.
121    ///
122    /// If all elements in table is at least minimum_size(), a Chi2
123    /// approximation is used.
124    ///
125    /// @return 2-sided p-value
126    ///
127    double p_value() const;
128   
129    ///
130    /// One-sided p-value is probability to get larger (or equal) oddsratio.
131    ///
132    /// If all elements in table is at least minimum_size(), a Chi2
133    /// approximation is used.
134    ///
135    /// @return One-sided p-value
136    ///
137    double p_value_one_sided() const;
138   
139    /**
140       Function calculating odds ratio from 2x2 table
141       \f[ \begin{tabular}{|c|c|}
142       \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
143       \end{tabular} \f] as \f$ \frac{ad}{bc} \f$
144
145       @return odds ratio.
146
147       @throw If table is invalid a runtime_error is thrown. A table
148       is invalid if a row or column sum is zero.
149    */
150    double oddsratio(const u_int a, const u_int b, 
151                     const u_int c, const u_int d);
152
153  private:
154    bool calculate_p_exact() const;
155
156    // two-sided
157    double p_value_approximative(void) const;
158    //two-sided
159    double p_value_exact(void) const;
160
161    u_int a_;
162    u_int b_;
163    u_int c_;
164    u_int d_;
165    u_int minimum_size_;
166    double oddsratio_;
167  };
168
169}}} // of namespace statistics, yat, and theplu
170
171#endif
Note: See TracBrowser for help on using the repository browser.