source: trunk/c++_tools/statistics/Fisher.h @ 648

Last change on this file since 648 was 648, checked in by Peter, 15 years ago

fixes #133 removed all errors reported from Doxygen. Only one error left which says Index is not documented but I don't want it to be documented actually we use the Doxygens preprocessor to skip documenting that class, yet Doxygen complains that class is not documented huh. Only solution would be to move that class to its own file and not keep it together with SVM.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.9 KB
Line 
1#ifndef _theplu_statistics_fisher_
2#define _theplu_statistics_fisher_
3
4// $Id: Fisher.h 648 2006-09-14 03:04:17Z peter $
5
6#include <c++_tools/statistics/Score.h>
7#include <c++_tools/utility/vector.h>
8
9#include <cmath>
10
11namespace theplu {
12namespace statistics { 
13  /**
14     @brief Fisher's exact test.   
15
16     Fisher's Exact test is a procedure that you can use for data
17     in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
18     \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
19     \end{tabular} \f] Fisher's Exact Test is based on exact
20     probabilities from a specific distribution (the hypergeometric
21     distribution). There's really no lower bound on the amount of
22     data that is needed for Fisher's Exact Test. You do have to
23     have at least one data value in each row and one data value in
24     each column. If an entire row or column is zero, then you
25     don't really have a 2 by 2 table. But you can use Fisher's
26     Exact Test when one of the cells in your table has a zero in
27     it. Fisher's Exact Test is also very useful for highly
28     imbalanced tables. If one or two of the cells in a two by two
29     table have numbers in the thousands and one or two of the
30     other cells has numbers less than 5, you can still use
31     Fisher's Exact Test. For very large tables (where all four
32     entries in the two by two table are large), your computer may
33     take too much time to compute Fisher's Exact Test. In these
34     situations, though, you might as well use the Chi-square test
35     because a large sample approximation (that the Chi-square test
36     relies on) is very reasonable. If all elements are larger than
37     10 a Chi-square test is reasonable to use.
38     
39     @note The statistica assumes that each column and row sum,
40     respectively, are fixed. Just because you have a 2x2 table, this
41     assumtion does not necessarily match you experimental upset. See
42     e.g. Barnard's test for alternative.
43  */
44 
45  class Fisher : public Score
46  {
47 
48  public:
49    ///
50    /// Default Constructor.
51    ///
52    Fisher(bool absolute=true);
53
54    ///
55    /// Destructor
56    ///
57    virtual ~Fisher(void) {};
58         
59   
60    ///
61    /// @return Chi2 score
62    ///
63    double Chi2(void) const;
64
65    ///
66    /// Cutoff sets the limit whether a value should go into the left
67    /// or the right row. @see score
68    ///
69    /// @return reference to cutoff for row
70    ///
71    inline double& value_cutoff(void) { return value_cutoff_; }
72
73    ///
74    /// Calculates the expected values under the null hypothesis.
75    /// a' = (a+c)(a+b)/(a+b+c+d)
76    ///
77    void expected(double& a, double& b, double& c, double& d) const;
78
79    ///
80    /// minimum_size is the threshold for when the p-value calculation
81    /// is performed using a Chi2 approximation.
82    ///
83    /// @return reference to minimum_size
84    ///
85    inline u_int& minimum_size(void){ return minimum_size_; } 
86
87    ///
88    /// If absolute, the p-value is the two-sided p-value. If all
89    /// elements in table is at least minimum_size, a Chi2
90    /// approximation is used.
91    ///
92    /// @return p-value
93    ///
94    /// @note in weighted case, approximation Chi2 is always used.
95    ///
96    double p_value() const;
97   
98    ///
99    /// Function calculating score from 2x2 table for which the
100    /// elements are calculated as follows \n
101    /// target.binary(i) sample i in group a or c otherwise in b or d
102    /// \f$ value(i) > \f$ value_cutoff() sample i in group a or b
103    /// otherwise c or d\n
104    ///
105    /// @return odds ratio. If absolute_ is true and odds ratio is
106    /// less than unity 1 divided by odds ratio is returned
107    ///
108    double score(const classifier::Target& target, 
109                 const utility::vector& value);
110
111    ///
112    /// Weighted version of score. Each element in 2x2 table is
113    /// calculated as \f$ \sum w_i \f$, so when each weight is
114    /// unitary the same table is created as in the unweighted version
115    ///
116    /// @return odds ratio
117    ///
118    /// @see score
119    ///
120    double score(const classifier::Target& target, 
121                 const classifier::DataLookupWeighted1D& value);
122
123
124    ///
125    /// Weighted version of score. Each element in 2x2 table is
126    /// calculated as \f$ \sum w_i \f$, so when each weight is
127    /// unitary the same table is created as in the unweighted version
128    ///
129    /// @return odds ratio
130    ///
131    /// @see score
132    ///
133    double score(const classifier::Target& target, 
134                 const utility::vector& value,
135                 const utility::vector& weight); 
136
137    ///
138    /// \f$ \frac{ad}{bc} \f$
139    ///
140    /// @return odds ratio. If absolute_ is true and odds ratio is
141    /// less than unity, 1 divided by odds ratio is returned
142    ///
143    double score(const u_int a, const u_int b, 
144                 const u_int c, const u_int d); 
145   
146
147         
148  private:
149    double oddsratio(const double a, const double b, 
150                     const double c, const double d);
151
152    // two-sided
153    double p_value_approximative(void) const;
154    //two-sided
155    double p_value_exact(void) const;
156
157    double a_;
158    double b_;
159    double c_;
160    double d_;
161    u_int minimum_size_;
162    double oddsratio_;
163    double value_cutoff_;
164  };
165
166}} // of namespace statistics and namespace theplu
167
168#endif
169
Note: See TracBrowser for help on using the repository browser.