source: trunk/src/Fisher.h @ 187

Last change on this file since 187 was 187, checked in by Peter, 17 years ago

fixed bug with inheritance of p_value()

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.1 KB
Line 
1// $Id: Fisher.h 187 2004-10-18 16:06:05Z peter $
2
3#ifndef _theplu_cpptools_fisher_
4#define _theplu_cpptools_fisher_
5
6// C++ tools include
7/////////////////////
8#include "Score.h"
9#include "vector.h"
10//#include <gsl/gsl_cdf.h>
11
12// Standard C++ includes
13////////////////////////
14//#include <utility>
15//#include <vector>
16
17namespace theplu {
18namespace cpptools { 
19  ///
20  /// Class for Fisher's exact test.   
21  /// Fisher's Exact test is a procedure that you can use for data
22  /// in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
23  /// \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
24  /// \end{tabular} \f] Fisher's Exact Test is based on exact
25  /// probabilities from a specific distribution (the hypergeometric
26  /// distribution). There's really no lower bound on the amount of
27  /// data that is needed for Fisher's Exact Test. You do have to
28  /// have at least one data value in each row and one data value in
29  /// each column. If an entire row or column is zero, then you
30  /// don't really have a 2 by 2 table. But you can use Fisher's
31  /// Exact Test when one of the cells in your table has a zero in
32  /// it. Fisher's Exact Test is also very useful for highly
33  /// imbalanced tables. If one or two of the cells in a two by two
34  /// table have numbers in the thousands and one or two of the
35  /// other cells has numbers less than 5, you can still use
36  /// Fisher's Exact Test. For very large tables (where all four
37  /// entries in the two by two table are large), your computer may
38  /// take too much time to compute Fisher's Exact Test. In these
39  /// situations, though, you might as well use the Chi-square test
40  /// because a large sample approximation (that the Chi-square test
41  /// relies on) is very reasonable. If all elements are larger than
42  /// 10 a Chi-square test is reasonable to use.
43  ///
44
45 
46  class Fisher : public Score
47  {
48 
49  public:
50    ///
51    /// Default Constructor.
52    ///
53    Fisher(bool absolute=true);
54
55    ///
56    /// Destructor
57    ///
58    virtual ~Fisher(void) {};
59         
60   
61    ///
62    /// @return p-value
63    ///
64    double p_value() const;
65   
66    ///
67    /// @return odds ratio. If absolute_ is true and odds ratio is
68    /// less than unity 1 divided by odds ratio is returned
69    ///
70    double score(const gslapi::vector&, const gslapi::vector&, 
71                 const std::vector<size_t>& = std::vector<size_t>());
72
73    ///
74    /// Weighted version of odds ratio. Each element in 2x2 table is
75    /// calculated as \f$ \sum weight_i \f$, so when each weight is
76    /// unitary the same table is created as in the unweighted version
77    /// @return odds ratio
78    ///
79    double score(const gslapi::vector&, const gslapi::vector&, 
80                 const gslapi::vector&,
81                 const std::vector<size_t>& = std::vector<size_t>());
82
83    ///
84    /// @return odds ratio  \f$ ad/bc\f$ 
85    ///
86    double score(const u_int, const u_int, const u_int, const u_int); 
87   
88    double oddsratio(const double, const double, 
89                     const double, const double) const;
90
91         
92  private:
93    std::vector<size_t> train_set_;
94    gslapi::vector weight_;
95    u_int a_;
96    u_int b_;
97    u_int c_;
98    u_int d_;
99
100
101  };
102
103}} // of namespace cpptools and namespace theplu
104
105#endif
106
Note: See TracBrowser for help on using the repository browser.