source: trunk/yat/statistics/Fisher.cc @ 777

Last change on this file since 777 was 777, checked in by Peter, 15 years ago

Changed Fisher interface dramatically. No longer inherited from Score. Removed several functions since they encourage inappropriate usage. Removed support for weights. refs #101

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1// $Id: Fisher.cc 777 2007-03-04 12:34:17Z peter $
2
3/*
4  Copyright (C) The authors contributing to this file.
5
6  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "Fisher.h"
25#include "utility.h"
26
27#include <gsl/gsl_cdf.h>
28#include <gsl/gsl_randist.h>
29
30namespace theplu {
31namespace yat {
32namespace statistics {
33
34 
35  Fisher::Fisher()
36    : a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0)
37  {
38  }
39
40
41  Fisher::~Fisher()
42  {
43  }
44
45
46  bool Fisher::calculate_p_exact(void) const
47  {
48    return ( a_<minimum_size_ || b_<minimum_size_ || 
49             c_<minimum_size_ || d_<minimum_size_); 
50  }
51
52  double Fisher::Chi2() const
53  {
54    double a,b,c,d;
55    expected(a,b,c,d);
56    return (a-a_)*(a-a_)/a + (b-b_)*(b-b_)/b + 
57      (c-c_)*(c-c_)/c + (d-d_)*(d-d_)/d;
58  }
59
60
61  void Fisher::expected(double& a, double& b, double& c, double& d) const
62  {
63    double N = a_+b_+c_+d_;
64    a =((a_+b_)*(a_+c_)) / N;
65    b =((a_+b_)*(b_+d_)) / N;
66    c =((c_+d_)*(a_+c_)) / N;
67    d =((c_+d_)*(b_+d_)) / N;
68  }
69
70
71  u_int& Fisher::minimum_size(void)
72  {
73    return minimum_size_;
74  }
75
76
77  const u_int& Fisher::minimum_size(void) const
78  {
79    return minimum_size_;
80  }
81
82
83  double Fisher::oddsratio(const u_int a,
84                           const u_int b,
85                           const u_int c,
86                           const u_int d) 
87  {
88    // If a column sum or a row sum is zero, the table is nonsense
89    if ((a==0 || d==0) && (c==0 || b==0)){
90      throw std::runtime_error("runtime_error: Table in Fisher is not valid\n");
91    }
92
93    oddsratio_=(a*d)/(b*d);
94    return oddsratio_;
95  }
96
97
98  double Fisher::p_value() const
99  {
100    if (oddsratio_>1.0)
101      return 2*p_value_one_sided();
102    if (oddsratio_<1.0){
103      // If oddsratio is less than unity
104      // Two-sided p-value is
105      // P(X <= oddsratio) + P(X >= 1/oddsratio) =
106      // P(X <= oddsratio) + P(X <= oddsratio)
107      // 2 * (P(X < oddsratio) + P(X = oddsratio))
108      // 2 * (1 - P(X >= oddsratio) + P(X = oddsratio))
109      if (calculate_p_exact())
110        return 2*(1-p_value_one_sided()+
111                  gsl_ran_hypergeometric_pdf(a_,a_+b_,c_+d_, a_+c_)
112                  );
113      // if p is calculated approximatively correction is not needed
114      return 2*(1-p_value_one_sided());
115
116    }
117    return 1.0;
118  }
119
120
121  double Fisher::p_value_one_sided() const
122  {
123    if ( calculate_p_exact() )
124      return p_value_exact();
125    return p_value_approximative();
126  }
127
128
129  double Fisher::p_value_approximative() const
130  {
131    return gsl_cdf_chisq_Q(Chi2(), 1.0);
132  }
133
134  double Fisher::p_value_exact() const
135  { 
136    // Since the calculation is symmetric and cdf_hypergeometric_P
137    // loops up to k we choose the smallest number to be k and mirror
138    // the matrix. This choice makes the p-value two-sided.
139
140    if (a_<b_ && a_<c_ && a_<d_)
141      return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);
142    else if (b_<a_ && b_<c_ && b_<d_)
143      return  statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
144    else if (c_<a_ && c_<b_ && c_<d_)
145      return  statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
146
147    return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
148
149  }
150
151
152}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.