source: branches/0.4-stable/yat/statistics/Fisher.cc @ 1624

Last change on this file since 1624 was 1624, checked in by Peter, 15 years ago

fixes #461. Also modified implementation of cdf_hypergeometric_P, which may cause conflict with modifications done in trunk (refs #87). If so, go with the trunk version (which uses GSL 1.8).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.4 KB
Line 
1// $Id: Fisher.cc 1624 2008-11-12 22:10:52Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006, 2007 Jari Häkkinen, Peter Johansson
7  Copyright (C) 2008 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "Fisher.h"
28#include "utility.h"
29
30#include <gsl/gsl_cdf.h>
31#include <gsl/gsl_randist.h>
32
33#include <algorithm>
34
35namespace theplu {
36namespace yat {
37namespace statistics {
38
39 
40  Fisher::Fisher()
41    : a_(0), b_(0), c_(0), d_(0), minimum_size_(10), oddsratio_(1.0)
42  {
43  }
44
45
46  Fisher::~Fisher()
47  {
48  }
49
50
51  bool Fisher::calculate_p_exact(void) const
52  {
53    return ( a_<minimum_size_ || b_<minimum_size_ || 
54             c_<minimum_size_ || d_<minimum_size_); 
55  }
56
57  double Fisher::Chi2() const
58  {
59    double a,b,c,d;
60    expected(a,b,c,d);
61    return (a-a_)*(a-a_)/a + (b-b_)*(b-b_)/b + 
62      (c-c_)*(c-c_)/c + (d-d_)*(d-d_)/d;
63  }
64
65
66  void Fisher::expected(double& a, double& b, double& c, double& d) const
67  {
68    double N = a_+b_+c_+d_;
69    a =((a_+b_)*(a_+c_)) / N;
70    b =((a_+b_)*(b_+d_)) / N;
71    c =((c_+d_)*(a_+c_)) / N;
72    d =((c_+d_)*(b_+d_)) / N;
73  }
74
75
76  unsigned int& Fisher::minimum_size(void)
77  {
78    return minimum_size_;
79  }
80
81
82  const unsigned int& Fisher::minimum_size(void) const
83  {
84    return minimum_size_;
85  }
86
87
88  double Fisher::oddsratio(const unsigned int a,
89                           const unsigned int b,
90                           const unsigned int c,
91                           const unsigned int d) 
92  {
93    // If a column sum or a row sum is zero, the table is nonsense
94    if ((a==0 || d==0) && (c==0 || b==0)){
95      throw std::runtime_error("runtime_error: Table in Fisher is not valid\n");
96    }
97    a_ = a;
98    b_ = b;
99    c_ = c;
100    d_ = d;
101
102    oddsratio_=static_cast<double>(a*d)/static_cast<double>(b*c);
103    return oddsratio_;
104  }
105
106
107  double Fisher::p_value() const
108  {
109    if (calculate_p_exact())
110      return p_value_exact();
111    return p_value_approximative();
112  }
113
114
115  double Fisher::p_value_one_sided() const
116  {
117    if (!calculate_p_exact()) {
118      if (oddsratio_<1.0)
119        return 1.0-p_value_approximative()/2.0;
120      return p_value_approximative()/2.0;
121    }
122    return statistics::cdf_hypergeometric_P(c_, c_+d_, a_+b_, a_+c_);
123  }
124
125
126  double Fisher::p_value_approximative() const
127  {
128    return gsl_cdf_chisq_Q(Chi2(), 1.0);
129  }
130
131  double Fisher::p_value_exact() const
132  { 
133    double res=0;
134    double a, c, tmp;
135    expected(a, tmp, c, tmp);
136    // sum P(x) over x for which abs(x-E(a))>=abs(a-E(a)) 
137
138    // counting left tail
139    int k = static_cast<int>(a - std::abs(a_-a));
140    if (k>=0)
141      res = cdf_hypergeometric_P(k, a_+b_, c_+d_, a_+c_);
142    // counting right tail
143    k = static_cast<int>(c - std::abs(c_-c));
144    if (k>=0)
145      res += cdf_hypergeometric_P(k, c_+d_, a_+b_, a_+c_);
146
147    // avoid p>1 due to double counting middle one
148    return std::min(1.0, res);
149  }
150
151
152}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.