source: trunk/src/Fisher.h @ 186

Last change on this file since 186 was 186, checked in by Peter, 17 years ago

Moving Fisher's exact test from Statistics.cc to having its own class inherit from abstract score base class

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.1 KB
Line 
1// $Id: Fisher.h 186 2004-10-07 18:51:13Z peter $
2
3#ifndef _theplu_cpptools_fisher_
4#define _theplu_cpptools_fisher_
5
6// C++ tools include
7/////////////////////
8#include "Score.h"
9#include "vector.h"
10//#include <gsl/gsl_cdf.h>
11
12// Standard C++ includes
13////////////////////////
14//#include <utility>
15//#include <vector>
16
17namespace theplu {
18namespace cpptools { 
19  ///
20  /// Class for Fisher's exact test.   
21  /// Fisher's Exact test is a procedure that you can use for data
22  /// in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
23  /// \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
24  /// \end{tabular} \f] Fisher's Exact Test is based on exact
25  /// probabilities from a specific distribution (the hypergeometric
26  /// distribution). There's really no lower bound on the amount of
27  /// data that is needed for Fisher's Exact Test. You do have to
28  /// have at least one data value in each row and one data value in
29  /// each column. If an entire row or column is zero, then you
30  /// don't really have a 2 by 2 table. But you can use Fisher's
31  /// Exact Test when one of the cells in your table has a zero in
32  /// it. Fisher's Exact Test is also very useful for highly
33  /// imbalanced tables. If one or two of the cells in a two by two
34  /// table have numbers in the thousands and one or two of the
35  /// other cells has numbers less than 5, you can still use
36  /// Fisher's Exact Test. For very large tables (where all four
37  /// entries in the two by two table are large), your computer may
38  /// take too much time to compute Fisher's Exact Test. In these
39  /// situations, though, you might as well use the Chi-square test
40  /// because a large sample approximation (that the Chi-square test
41  /// relies on) is very reasonable. If all elements are larger than
42  /// 10 a Chi-square test is reasonable to use.
43  ///
44
45 
46  class Fisher : public Score
47  {
48 
49  public:
50    ///
51    /// Default Constructor.
52    ///
53    Fisher(bool absolute=true);
54
55    ///
56    /// Destructor
57    ///
58    virtual ~Fisher(void) {};
59         
60   
61    ///
62    /// @return p-value
63    ///
64    double p_value();
65   
66    ///
67    /// @return odds ratio. If absolute_ is true and odds ratio is
68    /// less than unity 1 divided by odds ratio is returned
69    ///
70    double score(const gslapi::vector&, const gslapi::vector&, 
71                 const std::vector<size_t>& = std::vector<size_t>());
72
73    ///
74    /// Weighted version of odds ratio. Each element in 2x2 table is
75    /// calculated as \f$ \sum weight_i \f$, so when each weight is
76    /// unitary the same table is created as in the unweighted version
77    /// @return odds ratio
78    ///
79    double score(const gslapi::vector&, const gslapi::vector&, 
80                 const gslapi::vector&,
81                 const std::vector<size_t>& = std::vector<size_t>());
82
83    ///
84    /// @return odds ratio  \f$ ad/bc\f$ 
85    ///
86    double score(const u_int, const u_int, const u_int, const u_int); 
87   
88    double oddsratio(const double, const double, 
89                     const double, const double) const;
90
91         
92  private:
93    std::vector<size_t> train_set_;
94    gslapi::vector weight_;
95    u_int a_;
96    u_int b_;
97    u_int c_;
98    u_int d_;
99
100
101  };
102
103}} // of namespace cpptools and namespace theplu
104
105#endif
106
Note: See TracBrowser for help on using the repository browser.