# source:trunk/lib/statistics/Fisher.h@295

Last change on this file since 295 was 295, checked in by Peter, 17 years ago

file structure modifications. NOTE, this revision is not working, please wait for the next...

• Property svn:eol-style set to native
• Property svn:keywords set to Author Date Id Revision
File size: 3.4 KB
Line
1// $Id: Fisher.h 295 2005-04-29 09:15:58Z peter$
2
3#ifndef _theplu_statistics_fisher_
4#define _theplu_statistics_fisher_
5
6#include <c++_tools/statistics/Score.h>
7#include <c++_tools/gslapi/vector.h>
8
9
10namespace theplu {
11namespace statistics {
12  ///
13  /// Class for Fisher's exact test.
14  /// Fisher's Exact test is a procedure that you can use for data
15  /// in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
16  /// \hline a&b \tabularnewline \hline c&d \tabularnewline \hline
17  /// \end{tabular} \f] Fisher's Exact Test is based on exact
18  /// probabilities from a specific distribution (the hypergeometric
19  /// distribution). There's really no lower bound on the amount of
20  /// data that is needed for Fisher's Exact Test. You do have to
21  /// have at least one data value in each row and one data value in
22  /// each column. If an entire row or column is zero, then you
23  /// don't really have a 2 by 2 table. But you can use Fisher's
24  /// Exact Test when one of the cells in your table has a zero in
25  /// it. Fisher's Exact Test is also very useful for highly
26  /// imbalanced tables. If one or two of the cells in a two by two
27  /// table have numbers in the thousands and one or two of the
28  /// other cells has numbers less than 5, you can still use
29  /// Fisher's Exact Test. For very large tables (where all four
30  /// entries in the two by two table are large), your computer may
31  /// take too much time to compute Fisher's Exact Test. In these
32  /// situations, though, you might as well use the Chi-square test
33  /// because a large sample approximation (that the Chi-square test
34  /// relies on) is very reasonable. If all elements are larger than
35  /// 10 a Chi-square test is reasonable to use.
36  ///
37
38
39  class Fisher : public Score
40  {
41
42  public:
43    ///
44    /// Default Constructor.
45    ///
46    Fisher(bool absolute=true);
47
48    ///
49    /// Destructor
50    ///
51    virtual ~Fisher(void) {};
52
53
54    ///
55    /// @return p-value
56    ///
57    double p_value() const;
58
59    ///
60    /// Function calculating score from 2x2 table for which the
61    /// elements are calculated as follows \n
62    /// a: #data \f$x=1 \f$ AND \f$y=1 \f$ \n
63    /// b: #data \f$x=-1 \f$ AND \f$y=1 \f$ \n
64    /// c: #data \f$x=1 \f$ AND \f$y=-1 \f$ \n
65    /// d: #data \f$x=-1 \f$ AND \f$y=1 \f$ \n
66    ///
67    /// @return odds ratio. If absolute_ is true and odds ratio is
68    /// less than unity 1 divided by odds ratio is returned
69    ///
70    double score(const gslapi::vector& x, const gslapi::vector& y,
71                 const std::vector<size_t>& = std::vector<size_t>());
72
73    ///
74    /// Weighted version of score. Each element in 2x2 table is
75    /// calculated as \f$\sum w_i \f$, so when each weight is
76    /// unitary the same table is created as in the unweighted version
77    /// @return odds ratio
78    ///
79    double score(const gslapi::vector& x, const gslapi::vector& y,
80                 const gslapi::vector& w,
81                 const std::vector<size_t>& = std::vector<size_t>());
82
83    ///
84    /// \f$\frac{ad}{bc} \f$
85    ///
86    /// @return odds ratio. If absolute_ is true and odds ratio is
87    /// less than unity, 1 divided by odds ratio is returned
88    ///
89    double score(const u_int a, const u_int b,
90                 const u_int c, const u_int d);
91
92
93
94  private:
95    std::vector<size_t> train_set_;
96    gslapi::vector weight_;
97    u_int a_;
98    u_int b_;
99    u_int c_;
100    u_int d_;
101
102    double oddsratio(const double a, const double b,
103                     const double c, const double d) const;
104
105  };
106
107}} // of namespace statistics and namespace theplu
108
109#endif
110
Note: See TracBrowser for help on using the repository browser.