source: trunk/yat/statistics/Fisher.cc @ 2523

Last change on this file since 2523 was 2523, checked in by Peter, 10 years ago

added function to retrieve stored oddsratio in Fisher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.8 KB
Line 
1// $Id: Fisher.cc 2523 2011-07-18 21:32:36Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
7  Copyright (C) 2009, 2010, 2011 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "Fisher.h"
26#include "utility.h"
27
28#include "yat/utility/Exception.h"
29
30#include <gsl/gsl_cdf.h>
31#include <gsl/gsl_randist.h>
32
33#include <algorithm>
34#include <cassert>
35
36namespace theplu {
37namespace yat {
38namespace statistics {
39
40 
41  Fisher::Fisher()
42    : a_(0), b_(0), c_(0), d_(0), minimum_size_(10), oddsratio_(1.0)
43  {
44  }
45
46
47  Fisher::~Fisher()
48  {
49  }
50
51
52  bool Fisher::calculate_p_exact(void) const
53  {
54    return ( a_<minimum_size_ || b_<minimum_size_ || 
55             c_<minimum_size_ || d_<minimum_size_); 
56  }
57
58  double Fisher::Chi2() const
59  {
60    double a,b,c,d;
61    expected(a,b,c,d);
62    return (a-a_)*(a-a_)/a + (b-b_)*(b-b_)/b + 
63      (c-c_)*(c-c_)/c + (d-d_)*(d-d_)/d;
64  }
65
66
67  void Fisher::expected(double& a, double& b, double& c, double& d) const
68  {
69    // use floting point arithmetic to avoid overflow
70    double a1 = a_;
71    double b1 = b_;
72    double c1 = c_;
73    double d1 = d_;
74    double N = a1 + b1 + c1 + d1;
75    a =((a1+b1)*(a1+c1)) / N;
76    b =((a1+b1)*(b1+d1)) / N;
77    c =((c1+d1)*(a1+c1)) / N;
78    d =((c1+d1)*(b1+d1)) / N;
79  }
80
81
82  unsigned int& Fisher::minimum_size(void)
83  {
84    return minimum_size_;
85  }
86
87
88  const unsigned int& Fisher::minimum_size(void) const
89  {
90    return minimum_size_;
91  }
92
93
94  double Fisher::oddsratio(const unsigned int a,
95                           const unsigned int b,
96                           const unsigned int c,
97                           const unsigned int d) 
98  {
99    // If a column sum or a row sum is zero, the table is nonsense
100    if ((a==0 || d==0) && (c==0 || b==0)){
101      throw utility::runtime_error("Table in Fisher is not valid\n");
102    }
103    a_ = a;
104    b_ = b;
105    c_ = c;
106    d_ = d;
107
108    oddsratio_= (static_cast<double>(a) / static_cast<double>(b) * 
109                 static_cast<double>(d) / static_cast<double>(c) );
110    return oddsratio_;
111  }
112
113
114  double Fisher::oddsratio(void) const
115  {
116    return oddsratio_;
117  }
118
119
120  double Fisher::p_value() const
121  {
122    if (calculate_p_exact())
123      return p_value_exact();
124    return p_value_approximative();
125  }
126
127
128  double Fisher::p_value_one_sided() const
129  {
130    if (!calculate_p_exact()) {
131      if (oddsratio_<1.0)
132        return 1.0-p_value_approximative()/2.0;
133      return p_value_approximative()/2.0;
134    }
135    // check for overflow
136    assert(c_ <= c_+d_ && d_ <= c_+d_);
137    assert(a_ <= a_+b_ && b_ <= a_+b_);
138    assert(a_ <= a_+c_ && c_ <= a_+c_);
139
140    return statistics::cdf_hypergeometric_P(c_, c_+d_, a_+b_, a_+c_);
141  }
142
143
144  double Fisher::p_value_approximative() const
145  {
146    return gsl_cdf_chisq_Q(Chi2(), 1.0);
147  }
148
149  double Fisher::p_value_exact() const
150  { 
151    double res=0;
152    double a, c, tmp;
153    expected(a, tmp, c, tmp);
154    // sum P(x) over x for which abs(x-E(a))>=abs(a-E(a)) 
155
156    // counting left tail
157    int k = static_cast<int>(a - std::abs(a_-a));
158    if (k>=0)
159      res = cdf_hypergeometric_P(k, a_+b_, c_+d_, a_+c_);
160    // counting right tail
161    k = static_cast<int>(c - std::abs(c_-c));
162    if (k>=0)
163      res += cdf_hypergeometric_P(k, c_+d_, a_+b_, a_+c_);
164
165    // avoid p>1 due to double counting middle one
166    return std::min(1.0, res);
167  }
168
169}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.