source: branches/0.4-stable/yat/statistics/Fisher.cc @ 1621

Last change on this file since 1621 was 1621, checked in by Peter, 15 years ago

fixes #460

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.8 KB
Line 
1// $Id: Fisher.cc 1621 2008-11-10 21:39:48Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006, 2007 Jari Häkkinen, Peter Johansson
7  Copyright (C) 2008 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "Fisher.h"
28#include "utility.h"
29
30#include <gsl/gsl_cdf.h>
31#include <gsl/gsl_randist.h>
32
33namespace theplu {
34namespace yat {
35namespace statistics {
36
37 
38  Fisher::Fisher()
39    : a_(0), b_(0), c_(0), d_(0), minimum_size_(10), oddsratio_(1.0)
40  {
41  }
42
43
44  Fisher::~Fisher()
45  {
46  }
47
48
49  bool Fisher::calculate_p_exact(void) const
50  {
51    return ( a_<minimum_size_ || b_<minimum_size_ || 
52             c_<minimum_size_ || d_<minimum_size_); 
53  }
54
55  double Fisher::Chi2() const
56  {
57    double a,b,c,d;
58    expected(a,b,c,d);
59    return (a-a_)*(a-a_)/a + (b-b_)*(b-b_)/b + 
60      (c-c_)*(c-c_)/c + (d-d_)*(d-d_)/d;
61  }
62
63
64  void Fisher::expected(double& a, double& b, double& c, double& d) const
65  {
66    double N = a_+b_+c_+d_;
67    a =((a_+b_)*(a_+c_)) / N;
68    b =((a_+b_)*(b_+d_)) / N;
69    c =((c_+d_)*(a_+c_)) / N;
70    d =((c_+d_)*(b_+d_)) / N;
71  }
72
73
74  unsigned int& Fisher::minimum_size(void)
75  {
76    return minimum_size_;
77  }
78
79
80  const unsigned int& Fisher::minimum_size(void) const
81  {
82    return minimum_size_;
83  }
84
85
86  double Fisher::oddsratio(const unsigned int a,
87                           const unsigned int b,
88                           const unsigned int c,
89                           const unsigned int d) 
90  {
91    // If a column sum or a row sum is zero, the table is nonsense
92    if ((a==0 || d==0) && (c==0 || b==0)){
93      throw std::runtime_error("runtime_error: Table in Fisher is not valid\n");
94    }
95    a_ = a;
96    b_ = b;
97    c_ = c;
98    d_ = d;
99
100    oddsratio_=static_cast<double>(a*d)/static_cast<double>(b*c);
101    return oddsratio_;
102  }
103
104
105  double Fisher::p_value() const
106  {
107    if (oddsratio_>1.0)
108      return 2*p_value_one_sided();
109    if (oddsratio_<1.0){
110      // If oddsratio is less than unity
111      // Two-sided p-value is
112      // P(X <= oddsratio) + P(X >= 1/oddsratio) =
113      // P(X <= oddsratio) + P(X <= oddsratio)
114      // 2 * (P(X < oddsratio) + P(X = oddsratio))
115      // 2 * (1 - P(X >= oddsratio) + P(X = oddsratio))
116      if (calculate_p_exact())
117        return 2*(1-p_value_one_sided()+
118                  gsl_ran_hypergeometric_pdf(a_,a_+b_,c_+d_, a_+c_)
119                  );
120      // if p is calculated approximatively correction is not needed
121      return 2*(1-p_value_one_sided());
122
123    }
124    return 1.0;
125  }
126
127
128  double Fisher::p_value_one_sided() const
129  {
130    if ( calculate_p_exact() )
131      return p_value_exact();
132    return p_value_approximative();
133  }
134
135
136  double Fisher::p_value_approximative() const
137  {
138    return gsl_cdf_chisq_Q(Chi2(), 1.0);
139  }
140
141  double Fisher::p_value_exact() const
142  { 
143    // Since the calculation is symmetric and cdf_hypergeometric_P
144    // loops up to k we choose the smallest number to be k and mirror
145    // the matrix. This choice makes the p-value two-sided.
146
147    if (a_<b_ && a_<c_ && a_<d_)
148      return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);
149    else if (b_<a_ && b_<c_ && b_<d_)
150      return  statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
151    else if (c_<a_ && c_<b_ && c_<d_)
152      return  statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
153
154    return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
155
156  }
157
158
159}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.