source: branches/0.6-stable/yat/statistics/Fisher.cc @ 2317

Last change on this file since 2317 was 2317, checked in by Peter, 13 years ago

updating copyright years

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.7 KB
Line 
1// $Id: Fisher.cc 2317 2010-08-20 04:10:07Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
7  Copyright (C) 2009, 2010 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "Fisher.h"
26#include "utility.h"
27
28#include <gsl/gsl_cdf.h>
29#include <gsl/gsl_randist.h>
30
31#include <algorithm>
32#include <cassert>
33
34namespace theplu {
35namespace yat {
36namespace statistics {
37
38 
39  Fisher::Fisher()
40    : a_(0), b_(0), c_(0), d_(0), minimum_size_(10), oddsratio_(1.0)
41  {
42  }
43
44
45  Fisher::~Fisher()
46  {
47  }
48
49
50  bool Fisher::calculate_p_exact(void) const
51  {
52    return ( a_<minimum_size_ || b_<minimum_size_ || 
53             c_<minimum_size_ || d_<minimum_size_); 
54  }
55
56  double Fisher::Chi2() const
57  {
58    double a,b,c,d;
59    expected(a,b,c,d);
60    return (a-a_)*(a-a_)/a + (b-b_)*(b-b_)/b + 
61      (c-c_)*(c-c_)/c + (d-d_)*(d-d_)/d;
62  }
63
64
65  void Fisher::expected(double& a, double& b, double& c, double& d) const
66  {
67    // use floting point arithmetic to avoid overflow
68    double N = static_cast<double>(a_) + static_cast<double>(b_) 
69      + static_cast<double>(c_) + static_cast<double>(d_);
70    a =((a_+b_)*(a_+c_)) / N;
71    b =((a_+b_)*(b_+d_)) / N;
72    c =((c_+d_)*(a_+c_)) / N;
73    d =((c_+d_)*(b_+d_)) / N;
74  }
75
76
77  unsigned int& Fisher::minimum_size(void)
78  {
79    return minimum_size_;
80  }
81
82
83  const unsigned int& Fisher::minimum_size(void) const
84  {
85    return minimum_size_;
86  }
87
88
89  double Fisher::oddsratio(const unsigned int a,
90                           const unsigned int b,
91                           const unsigned int c,
92                           const unsigned int d) 
93  {
94    // If a column sum or a row sum is zero, the table is nonsense
95    if ((a==0 || d==0) && (c==0 || b==0)){
96      throw std::runtime_error("runtime_error: Table in Fisher is not valid\n");
97    }
98    a_ = a;
99    b_ = b;
100    c_ = c;
101    d_ = d;
102
103    oddsratio_= (static_cast<double>(a) / static_cast<double>(b) * 
104                 static_cast<double>(d) / static_cast<double>(c) );
105    return oddsratio_;
106  }
107
108
109  double Fisher::p_value() const
110  {
111    if (calculate_p_exact())
112      return p_value_exact();
113    return p_value_approximative();
114  }
115
116
117  double Fisher::p_value_one_sided() const
118  {
119    if (!calculate_p_exact()) {
120      if (oddsratio_<1.0)
121        return 1.0-p_value_approximative()/2.0;
122      return p_value_approximative()/2.0;
123    }
124    // check for overflow
125    assert(c_ <= c_+d_ && d_ <= c_+d_);
126    assert(a_ <= a_+b_ && b_ <= a_+b_);
127    assert(a_ <= a_+c_ && c_ <= a_+c_);
128
129    return statistics::cdf_hypergeometric_P(c_, c_+d_, a_+b_, a_+c_);
130  }
131
132
133  double Fisher::p_value_approximative() const
134  {
135    return gsl_cdf_chisq_Q(Chi2(), 1.0);
136  }
137
138  double Fisher::p_value_exact() const
139  { 
140    double res=0;
141    double a, c, tmp;
142    expected(a, tmp, c, tmp);
143    // sum P(x) over x for which abs(x-E(a))>=abs(a-E(a)) 
144
145    // counting left tail
146    int k = static_cast<int>(a - std::abs(a_-a));
147    if (k>=0)
148      res = cdf_hypergeometric_P(k, a_+b_, c_+d_, a_+c_);
149    // counting right tail
150    k = static_cast<int>(c - std::abs(c_-c));
151    if (k>=0)
152      res += cdf_hypergeometric_P(k, c_+d_, a_+b_, a_+c_);
153
154    // avoid p>1 due to double counting middle one
155    return std::min(1.0, res);
156  }
157
158}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.