source: trunk/yat/statistics/Fisher.cc @ 1703

Last change on this file since 1703 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.7 KB
Line 
1// $Id: Fisher.cc 1487 2008-09-10 08:41:36Z jari $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006, 2007 Jari Häkkinen, Peter Johansson
7  Copyright (C) 2008 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "Fisher.h"
26#include "utility.h"
27
28#include <gsl/gsl_cdf.h>
29#include <gsl/gsl_randist.h>
30
31namespace theplu {
32namespace yat {
33namespace statistics {
34
35 
36  Fisher::Fisher()
37    : a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0)
38  {
39  }
40
41
42  Fisher::~Fisher()
43  {
44  }
45
46
47  bool Fisher::calculate_p_exact(void) const
48  {
49    return ( a_<minimum_size_ || b_<minimum_size_ || 
50             c_<minimum_size_ || d_<minimum_size_); 
51  }
52
53  double Fisher::Chi2() const
54  {
55    double a,b,c,d;
56    expected(a,b,c,d);
57    return (a-a_)*(a-a_)/a + (b-b_)*(b-b_)/b + 
58      (c-c_)*(c-c_)/c + (d-d_)*(d-d_)/d;
59  }
60
61
62  void Fisher::expected(double& a, double& b, double& c, double& d) const
63  {
64    double N = a_+b_+c_+d_;
65    a =((a_+b_)*(a_+c_)) / N;
66    b =((a_+b_)*(b_+d_)) / N;
67    c =((c_+d_)*(a_+c_)) / N;
68    d =((c_+d_)*(b_+d_)) / N;
69  }
70
71
72  unsigned int& Fisher::minimum_size(void)
73  {
74    return minimum_size_;
75  }
76
77
78  const unsigned int& Fisher::minimum_size(void) const
79  {
80    return minimum_size_;
81  }
82
83
84  double Fisher::oddsratio(const unsigned int a,
85                           const unsigned int b,
86                           const unsigned int c,
87                           const unsigned int d) 
88  {
89    // If a column sum or a row sum is zero, the table is nonsense
90    if ((a==0 || d==0) && (c==0 || b==0)){
91      throw std::runtime_error("runtime_error: Table in Fisher is not valid\n");
92    }
93
94    oddsratio_=(a*d)/(b*d);
95    return oddsratio_;
96  }
97
98
99  double Fisher::p_value() const
100  {
101    if (oddsratio_>1.0)
102      return 2*p_value_one_sided();
103    if (oddsratio_<1.0){
104      // If oddsratio is less than unity
105      // Two-sided p-value is
106      // P(X <= oddsratio) + P(X >= 1/oddsratio) =
107      // P(X <= oddsratio) + P(X <= oddsratio)
108      // 2 * (P(X < oddsratio) + P(X = oddsratio))
109      // 2 * (1 - P(X >= oddsratio) + P(X = oddsratio))
110      if (calculate_p_exact())
111        return 2*(1-p_value_one_sided()+
112                  gsl_ran_hypergeometric_pdf(a_,a_+b_,c_+d_, a_+c_)
113                  );
114      // if p is calculated approximatively correction is not needed
115      return 2*(1-p_value_one_sided());
116
117    }
118    return 1.0;
119  }
120
121
122  double Fisher::p_value_one_sided() const
123  {
124    if ( calculate_p_exact() )
125      return p_value_exact();
126    return p_value_approximative();
127  }
128
129
130  double Fisher::p_value_approximative() const
131  {
132    return gsl_cdf_chisq_Q(Chi2(), 1.0);
133  }
134
135  double Fisher::p_value_exact() const
136  { 
137    // Since the calculation is symmetric and cdf_hypergeometric_P
138    // loops up to k we choose the smallest number to be k and mirror
139    // the matrix. This choice makes the p-value two-sided.
140
141    if (a_<b_ && a_<c_ && a_<d_)
142      return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);
143    else if (b_<a_ && b_<c_ && b_<d_)
144      return  statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
145    else if (c_<a_ && c_<b_ && c_<d_)
146      return  statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
147
148    return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
149
150  }
151
152
153}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.