Changeset 777 for trunk/yat/statistics/Fisher.cc
 Timestamp:
 Mar 4, 2007, 1:34:17 PM (16 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/yat/statistics/Fisher.cc
r760 r777 23 23 24 24 #include "Fisher.h" 25 #include "Score.h"26 25 #include "utility.h" 27 #include "yat/classifier/DataLookupWeighted1D.h"28 #include "yat/classifier/Target.h"29 26 30 27 #include <gsl/gsl_cdf.h> … … 36 33 37 34 38 Fisher::Fisher( bool b)39 : Score(b), a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0), value_cutoff_(0)35 Fisher::Fisher() 36 : a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0) 40 37 { 41 38 } 42 39 40 41 Fisher::~Fisher() 42 { 43 } 44 45 46 bool Fisher::calculate_p_exact(void) const 47 { 48 return ( a_<minimum_size_  b_<minimum_size_  49 c_<minimum_size_  d_<minimum_size_); 50 } 43 51 44 52 double Fisher::Chi2() const … … 67 75 68 76 69 double Fisher::oddsratio(const double a, 70 const double b, 71 const double c, 72 const double d) 77 const u_int& Fisher::minimum_size(void) const 78 { 79 return minimum_size_; 80 } 81 82 83 double Fisher::oddsratio(const u_int a, 84 const u_int b, 85 const u_int c, 86 const u_int d) 73 87 { 74 88 // If a column sum or a row sum is zero, the table is nonsense … … 78 92 79 93 oddsratio_=(a*d)/(b*d); 80 if (absolute_ && oddsratio_<1)81 return 1/oddsratio_;82 94 return oddsratio_; 83 95 } … … 86 98 double Fisher::p_value() const 87 99 { 88 double p=1; 89 if ( ( a_<minimum_size_  b_<minimum_size_  90 c_<minimum_size_  d_<minimum_size_) && !weighted_ ){ 100 if (oddsratio_>1.0) 101 return 2*p_value_one_sided(); 102 if (oddsratio_<1.0){ 103 // If oddsratio is less than unity 104 // Twosided pvalue is 105 // P(X <= oddsratio) + P(X >= 1/oddsratio) = 106 // P(X <= oddsratio) + P(X <= oddsratio) 107 // 2 * (P(X < oddsratio) + P(X = oddsratio)) 108 // 2 * (1  P(X >= oddsratio) + P(X = oddsratio)) 109 if (calculate_p_exact()) 110 return 2*(1p_value_one_sided()+ 111 gsl_ran_hypergeometric_pdf(a_,a_+b_,c_+d_, a_+c_) 112 ); 113 // if p is calculated approximatively correction is not needed 114 return 2*(1p_value_one_sided()); 91 115 92 p=p_value_exact();93 116 } 94 else95 p=p_value_approximative();117 return 1.0; 118 } 96 119 97 if (!absolute_){ 98 p=p/2; 99 if (oddsratio_<0.5){ 100 // last term because >= not equal to !(<=) 101 u_int a = static_cast<u_int>(a_); 102 u_int b = static_cast<u_int>(b_); 103 u_int c = static_cast<u_int>(c_); 104 u_int d = static_cast<u_int>(d_); 105 return 1p+gsl_ran_hypergeometric_pdf(a, a+b, c+d, a+c); 106 } 107 } 108 return p; 120 121 double Fisher::p_value_one_sided() const 122 { 123 if ( calculate_p_exact() ) 124 return p_value_exact(); 125 return p_value_approximative(); 109 126 } 110 127 … … 117 134 double Fisher::p_value_exact() const 118 135 { 119 u_int a = static_cast<u_int>(a_);120 u_int b = static_cast<u_int>(b_);121 u_int c = static_cast<u_int>(c_);122 u_int d = static_cast<u_int>(d_);123 124 136 // Since the calculation is symmetric and cdf_hypergeometric_P 125 137 // loops up to k we choose the smallest number to be k and mirror 126 138 // the matrix. This choice makes the pvalue twosided. 127 139 128 if (a <b && a<c && a<d)129 return statistics::cdf_hypergeometric_P(a ,a+b,c+d,a+c);130 else if (b <a && b<c && b<d)131 return statistics::cdf_hypergeometric_P(b ,a+b,c+d,b+d);132 else if (c <a && c<b && c<d)133 return statistics::cdf_hypergeometric_P(c ,c+d,a+b,a+c);140 if (a_<b_ && a_<c_ && a_<d_) 141 return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_); 142 else if (b_<a_ && b_<c_ && b_<d_) 143 return statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_); 144 else if (c_<a_ && c_<b_ && c_<d_) 145 return statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_); 134 146 135 return statistics::cdf_hypergeometric_P(d ,c+d,a+b,b+d);147 return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_); 136 148 137 149 } 138 150 139 151 140 double Fisher::score(const classifier::Target& target,141 const utility::vector& value)142 {143 weighted_=false;144 a_=b_=c_=d_=0;145 for (size_t i=0; i<target.size(); i++)146 if (target.binary(i))147 if (value(i)>value_cutoff_)148 a_++;149 else150 c_++;151 else152 if (value(i)>value_cutoff_)153 b_++;154 else155 d_++;156 157 return oddsratio(a_,b_,c_,d_);158 }159 160 double Fisher::score(const classifier::Target& target,161 const classifier::DataLookupWeighted1D& value)162 {163 weighted_=true;164 a_=b_=c_=d_=0;165 for (size_t i=0; i<target.size(); i++)166 if (target.binary(i))167 if (value.data(i)>value_cutoff_)168 a_+=value.weight(i);169 else170 c_+=value.weight(i);171 else172 if (value.data(i)>value_cutoff_)173 b_+=value.weight(i);174 else175 d_+=value.weight(i);176 177 return oddsratio(a_,b_,c_,d_);178 }179 180 double Fisher::score(const classifier::Target& target,181 const utility::vector& value,182 const utility::vector& weight)183 {184 weighted_=true;185 a_=b_=c_=d_=0;186 for (size_t i=0; i<target.size(); i++)187 if (target.binary(i))188 if (value(i)>value_cutoff_)189 a_+=weight(i);190 else191 c_+=weight(i);192 else193 if (value(i)>value_cutoff_)194 b_+=weight(i);195 else196 d_+=weight(i);197 198 return oddsratio(a_,b_,c_,d_);199 }200 201 double Fisher::score(const u_int a, const u_int b,202 const u_int c, const u_int d)203 {204 a_=a;205 b_=b;206 c_=c;207 d_=d;208 return oddsratio(a,b,c,d);209 }210 211 212 double& Fisher::value_cutoff(void)213 {214 return value_cutoff_;215 }216 217 152 }}} // of namespace statistics, yat, and theplu
Note: See TracChangeset
for help on using the changeset viewer.