source: trunk/yat/statistics/Fisher.cc @ 3004

Last change on this file since 3004 was 3004, checked in by Peter, 10 years ago

refs #689. Deprecate Fisher::one_sided_p; implement Fisher::left_p and right_p.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1// $Id: Fisher.cc 3004 2013-03-24 00:51:14Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
7  Copyright (C) 2009, 2010, 2011, 2012, 2013 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <config.h>
26
27#include "Fisher.h"
28#include "utility.h"
29
30#include "yat/utility/Exception.h"
31
32#include <gsl/gsl_cdf.h>
33#include <gsl/gsl_randist.h>
34
35#include <algorithm>
36#include <cassert>
37
38namespace theplu {
39namespace yat {
40namespace statistics {
41
42
43  Fisher::Fisher()
44    : a_(0), b_(0), c_(0), d_(0), minimum_size_(10), oddsratio_(1.0)
45  {
46  }
47
48
49  Fisher::~Fisher()
50  {
51  }
52
53
54  bool Fisher::calculate_p_exact(void) const
55  {
56    return ( a_<minimum_size_ || b_<minimum_size_ ||
57             c_<minimum_size_ || d_<minimum_size_);
58  }
59
60  double Fisher::Chi2(void) const
61  {
62    double a,b,c,d;
63    expected(a,b,c,d);
64    return (a-a_)*(a-a_)/a + (b-b_)*(b-b_)/b +
65      (c-c_)*(c-c_)/c + (d-d_)*(d-d_)/d;
66  }
67
68
69  void Fisher::expected(double& a, double& b, double& c, double& d) const
70  {
71    // use floting point arithmetic to avoid overflow
72    double a1 = a_;
73    double b1 = b_;
74    double c1 = c_;
75    double d1 = d_;
76    double N = a1 + b1 + c1 + d1;
77    a =((a1+b1)*(a1+c1)) / N;
78    b =((a1+b1)*(b1+d1)) / N;
79    c =((c1+d1)*(a1+c1)) / N;
80    d =((c1+d1)*(b1+d1)) / N;
81  }
82
83
84  unsigned int& Fisher::minimum_size(void)
85  {
86    return minimum_size_;
87  }
88
89
90  const unsigned int& Fisher::minimum_size(void) const
91  {
92    return minimum_size_;
93  }
94
95
96  double Fisher::oddsratio(const unsigned int a,
97                           const unsigned int b,
98                           const unsigned int c,
99                           const unsigned int d)
100  {
101    // If a column sum or a row sum is zero, the table is nonsense
102    if ((a==0 || d==0) && (c==0 || b==0)){
103      throw utility::runtime_error("Table in Fisher is not valid\n");
104    }
105    a_ = a;
106    b_ = b;
107    c_ = c;
108    d_ = d;
109
110    oddsratio_= (static_cast<double>(a) / static_cast<double>(b) *
111                 static_cast<double>(d) / static_cast<double>(c) );
112    return oddsratio_;
113  }
114
115
116  double Fisher::oddsratio(void) const
117  {
118    return oddsratio_;
119  }
120
121
122  double Fisher::p_left(void) const
123  {
124    if (!calculate_p_exact()) {
125      if (oddsratio_>1.0)
126        return 1.0-p_value_approximative()/2.0;
127      return p_value_approximative()/2.0;
128    }
129    // check for overflow
130    assert(c_ <= c_+d_ && d_ <= c_+d_);
131    assert(a_ <= a_+b_ && b_ <= a_+b_);
132    assert(a_ <= a_+c_ && c_ <= a_+c_);
133
134    return cdf_hypergeometric_P(a_, a_+b_, c_+d_, a_+c_);
135  }
136
137
138  double Fisher::p_value(void) const
139  {
140    if (calculate_p_exact())
141      return p_value_exact();
142    return p_value_approximative();
143  }
144
145
146  double Fisher::p_right(void) const
147  {
148    if (!calculate_p_exact()) {
149      if (oddsratio_<1.0)
150        return 1.0-p_value_approximative()/2.0;
151      return p_value_approximative()/2.0;
152    }
153    // check for overflow
154    assert(c_ <= c_+d_ && d_ <= c_+d_);
155    assert(a_ <= a_+b_ && b_ <= a_+b_);
156    assert(a_ <= a_+c_ && c_ <= a_+c_);
157
158    return cdf_hypergeometric_P(c_, c_+d_, a_+b_, a_+c_);
159  }
160
161
162  double Fisher::p_value_one_sided(void) const
163  {
164    return p_right();
165  }
166
167
168  double Fisher::p_value_approximative(void) const
169  {
170    return gsl_cdf_chisq_Q(Chi2(), 1.0);
171  }
172
173
174  double Fisher::p_left_exact(void) const
175  {
176    return 0;
177  }
178
179
180  double Fisher::p_right_exact(void) const
181  {
182    return 0;
183  }
184
185
186  double Fisher::p_value_exact(void) const
187  {
188    double res=0;
189    double a, c, tmp;
190    expected(a, tmp, c, tmp);
191    // sum P(x) over x for which abs(x-E(a))>=abs(a-E(a))
192
193    // counting left tail
194    int k = static_cast<int>(a - std::abs(a_-a));
195    if (k>=0)
196      res = cdf_hypergeometric_P(k, a_+b_, c_+d_, a_+c_);
197    // counting right tail
198    k = static_cast<int>(c - std::abs(c_-c));
199    if (k>=0)
200      res += cdf_hypergeometric_P(k, c_+d_, a_+b_, a_+c_);
201
202    // avoid p>1 due to double counting middle one
203    return std::min(1.0, res);
204  }
205
206}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.