- Timestamp:
- Mar 4, 2007, 1:34:17 PM (16 years ago)
- Location:
- trunk/yat/statistics
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/statistics/Fisher.cc
r760 r777 23 23 24 24 #include "Fisher.h" 25 #include "Score.h"26 25 #include "utility.h" 27 #include "yat/classifier/DataLookupWeighted1D.h"28 #include "yat/classifier/Target.h"29 26 30 27 #include <gsl/gsl_cdf.h> … … 36 33 37 34 38 Fisher::Fisher( bool b)39 : Score(b), a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0), value_cutoff_(0)35 Fisher::Fisher() 36 : a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0) 40 37 { 41 38 } 42 39 40 41 Fisher::~Fisher() 42 { 43 } 44 45 46 bool Fisher::calculate_p_exact(void) const 47 { 48 return ( a_<minimum_size_ || b_<minimum_size_ || 49 c_<minimum_size_ || d_<minimum_size_); 50 } 43 51 44 52 double Fisher::Chi2() const … … 67 75 68 76 69 double Fisher::oddsratio(const double a, 70 const double b, 71 const double c, 72 const double d) 77 const u_int& Fisher::minimum_size(void) const 78 { 79 return minimum_size_; 80 } 81 82 83 double Fisher::oddsratio(const u_int a, 84 const u_int b, 85 const u_int c, 86 const u_int d) 73 87 { 74 88 // If a column sum or a row sum is zero, the table is nonsense … … 78 92 79 93 oddsratio_=(a*d)/(b*d); 80 if (absolute_ && oddsratio_<1)81 return 1/oddsratio_;82 94 return oddsratio_; 83 95 } … … 86 98 double Fisher::p_value() const 87 99 { 88 double p=1; 89 if ( ( a_<minimum_size_ || b_<minimum_size_ || 90 c_<minimum_size_ || d_<minimum_size_) && !weighted_ ){ 100 if (oddsratio_>1.0) 101 return 2*p_value_one_sided(); 102 if (oddsratio_<1.0){ 103 // If oddsratio is less than unity 104 // Two-sided p-value is 105 // P(X <= oddsratio) + P(X >= 1/oddsratio) = 106 // P(X <= oddsratio) + P(X <= oddsratio) 107 // 2 * (P(X < oddsratio) + P(X = oddsratio)) 108 // 2 * (1 - P(X >= oddsratio) + P(X = oddsratio)) 109 if (calculate_p_exact()) 110 return 2*(1-p_value_one_sided()+ 111 gsl_ran_hypergeometric_pdf(a_,a_+b_,c_+d_, a_+c_) 112 ); 113 // if p is calculated approximatively correction is not needed 114 return 2*(1-p_value_one_sided()); 91 115 92 p=p_value_exact();93 116 } 94 else95 p=p_value_approximative();117 return 1.0; 118 } 96 119 97 if (!absolute_){ 98 p=p/2; 99 if (oddsratio_<0.5){ 100 // last term because >= not equal to !(<=) 101 u_int a = static_cast<u_int>(a_); 102 u_int b = static_cast<u_int>(b_); 103 u_int c = static_cast<u_int>(c_); 104 u_int d = static_cast<u_int>(d_); 105 return 1-p+gsl_ran_hypergeometric_pdf(a, a+b, c+d, a+c); 106 } 107 } 108 return p; 120 121 double Fisher::p_value_one_sided() const 122 { 123 if ( calculate_p_exact() ) 124 return p_value_exact(); 125 return p_value_approximative(); 109 126 } 110 127 … … 117 134 double Fisher::p_value_exact() const 118 135 { 119 u_int a = static_cast<u_int>(a_);120 u_int b = static_cast<u_int>(b_);121 u_int c = static_cast<u_int>(c_);122 u_int d = static_cast<u_int>(d_);123 124 136 // Since the calculation is symmetric and cdf_hypergeometric_P 125 137 // loops up to k we choose the smallest number to be k and mirror 126 138 // the matrix. This choice makes the p-value two-sided. 127 139 128 if (a <b && a<c && a<d)129 return statistics::cdf_hypergeometric_P(a ,a+b,c+d,a+c);130 else if (b <a && b<c && b<d)131 return statistics::cdf_hypergeometric_P(b ,a+b,c+d,b+d);132 else if (c <a && c<b && c<d)133 return statistics::cdf_hypergeometric_P(c ,c+d,a+b,a+c);140 if (a_<b_ && a_<c_ && a_<d_) 141 return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_); 142 else if (b_<a_ && b_<c_ && b_<d_) 143 return statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_); 144 else if (c_<a_ && c_<b_ && c_<d_) 145 return statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_); 134 146 135 return statistics::cdf_hypergeometric_P(d ,c+d,a+b,b+d);147 return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_); 136 148 137 149 } 138 150 139 151 140 double Fisher::score(const classifier::Target& target,141 const utility::vector& value)142 {143 weighted_=false;144 a_=b_=c_=d_=0;145 for (size_t i=0; i<target.size(); i++)146 if (target.binary(i))147 if (value(i)>value_cutoff_)148 a_++;149 else150 c_++;151 else152 if (value(i)>value_cutoff_)153 b_++;154 else155 d_++;156 157 return oddsratio(a_,b_,c_,d_);158 }159 160 double Fisher::score(const classifier::Target& target,161 const classifier::DataLookupWeighted1D& value)162 {163 weighted_=true;164 a_=b_=c_=d_=0;165 for (size_t i=0; i<target.size(); i++)166 if (target.binary(i))167 if (value.data(i)>value_cutoff_)168 a_+=value.weight(i);169 else170 c_+=value.weight(i);171 else172 if (value.data(i)>value_cutoff_)173 b_+=value.weight(i);174 else175 d_+=value.weight(i);176 177 return oddsratio(a_,b_,c_,d_);178 }179 180 double Fisher::score(const classifier::Target& target,181 const utility::vector& value,182 const utility::vector& weight)183 {184 weighted_=true;185 a_=b_=c_=d_=0;186 for (size_t i=0; i<target.size(); i++)187 if (target.binary(i))188 if (value(i)>value_cutoff_)189 a_+=weight(i);190 else191 c_+=weight(i);192 else193 if (value(i)>value_cutoff_)194 b_+=weight(i);195 else196 d_+=weight(i);197 198 return oddsratio(a_,b_,c_,d_);199 }200 201 double Fisher::score(const u_int a, const u_int b,202 const u_int c, const u_int d)203 {204 a_=a;205 b_=b;206 c_=c;207 d_=d;208 return oddsratio(a,b,c,d);209 }210 211 212 double& Fisher::value_cutoff(void)213 {214 return value_cutoff_;215 }216 217 152 }}} // of namespace statistics, yat, and theplu -
trunk/yat/statistics/Fisher.h
r760 r777 69 69 */ 70 70 71 class Fisher : public Score71 class Fisher 72 72 { 73 73 … … 76 76 /// Default Constructor. 77 77 /// 78 Fisher( bool absolute=true);78 Fisher(void); 79 79 80 80 /// 81 81 /// Destructor 82 82 /// 83 virtual ~Fisher(void) {};83 virtual ~Fisher(void); 84 84 85 85 … … 89 89 double Chi2(void) const; 90 90 91 /// 92 /// Calculates the expected values under the null hypothesis. 93 /// a' = (a+c)(a+b)/(a+b+c+d) 94 /// 91 /** 92 Calculates the expected values under the null hypothesis. 93 \f$ a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$, 94 \f$ b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$, 95 \f$ c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$, 96 \f$ d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$, 97 */ 95 98 void expected(double& a, double& b, double& c, double& d) const; 96 99 97 100 /// 98 /// minimum_size is the threshold for when the p-value calculation99 /// is performed using a Chi2 approximation.101 /// If all elements in table is at least minimum_size(), a Chi2 102 /// approximation is used for p-value calculation. 100 103 /// 101 104 /// @return reference to minimum_size … … 104 107 105 108 /// 106 /// If absolute, the p-value is the two-sided p-value. If all 107 /// elements in table is at least minimum_size, a Chi2 109 /// If all elements in table is at least minimum_size(), a Chi2 110 /// approximation is used for p-value calculation. 111 /// 112 /// @return const reference to minimum_size 113 /// 114 const u_int& minimum_size(void) const; 115 116 /// 117 /// If oddsratio is larger than unity, two-sided p-value is equal 118 /// to 2*p_value_one_sided(). If oddsratio is smaller than unity 119 /// two-sided p-value is equal to 2*(1-p_value_one_sided()). If 120 /// oddsratio is unity two-sided p-value is equal to unity. 121 /// 122 /// If all elements in table is at least minimum_size(), a Chi2 108 123 /// approximation is used. 109 124 /// 110 /// @return p-value 111 /// 112 /// @note in weighted case, approximation Chi2 is always used. 125 /// @return 2-sided p-value 113 126 /// 114 127 double p_value() const; 115 128 116 129 /// 117 /// Function calculating score from 2x2 table for which the 118 /// elements are calculated as follows \n 119 /// target.binary(i) sample i in group a or c otherwise in b or d 120 /// \f$ value(i) > \f$ value_cutoff() sample i in group a or b 121 /// otherwise c or d\n 130 /// One-sided p-value is probability to get larger (or equal) oddsratio. 122 131 /// 123 /// @return odds ratio. If absolute_ is true and odds ratio is124 /// less than unity 1 divided by odds ratio is returned132 /// If all elements in table is at least minimum_size(), a Chi2 133 /// approximation is used. 125 134 /// 126 /// @ throw If table is invalid a runtime_error is thrown.135 /// @return One-sided p-value 127 136 /// 128 double score(const classifier::Target& target, 129 const utility::vector& value); 137 double p_value_one_sided() const; 138 139 /** 140 Function calculating odds ratio from 2x2 table 141 \f[ \begin{tabular}{|c|c|} 142 \hline a&b \tabularnewline \hline c&d \tabularnewline \hline 143 \end{tabular} \f] as \f$ \frac{ad}{bc} \f$ 130 144 131 /// 132 /// Weighted version of score. Each element in 2x2 table is 133 /// calculated as \f$ \sum w_i \f$, so when each weight is 134 /// unitary the same table is created as in the unweighted version 135 /// 136 /// @return odds ratio 137 /// 138 /// @see score 139 /// 140 /// @throw If table is invalid a runtime_error is thrown. 141 /// 142 double score(const classifier::Target& target, 143 const classifier::DataLookupWeighted1D& value); 145 @return odds ratio. 144 146 145 146 /// 147 /// Weighted version of score. Each element in 2x2 table is 148 /// calculated as \f$ \sum w_i \f$, so when each weight is 149 /// unitary the same table is created as in the unweighted version 150 /// 151 /// @return odds ratio 152 /// 153 /// @see score 154 /// 155 /// @throw If table is invalid a runtime_error is thrown. 156 /// 157 double score(const classifier::Target& target, 158 const utility::vector& value, 159 const utility::vector& weight); 160 161 /// 162 /// \f$ \frac{ad}{bc} \f$ 163 /// 164 /// @return odds ratio. If absolute_ is true and odds ratio is 165 /// less than unity, 1 divided by odds ratio is returned 166 /// 167 /// @throw If table is invalid a runtime_error is thrown. 168 /// 169 double score(const u_int a, const u_int b, 170 const u_int c, const u_int d); 171 172 /// 173 /// Cutoff sets the limit whether a value should go into the left 174 /// or the right row. @see score 175 /// 176 /// @return reference to cutoff for row 177 /// 178 double& value_cutoff(void); 147 @throw If table is invalid a runtime_error is thrown. A table 148 is invalid if a row or column sum is zero. 149 */ 150 double oddsratio(const u_int a, const u_int b, 151 const u_int c, const u_int d); 179 152 180 153 private: 181 double oddsratio(const double a, const double b, 182 const double c, const double d); 154 bool calculate_p_exact() const; 183 155 184 156 // two-sided … … 187 159 double p_value_exact(void) const; 188 160 189 doublea_;190 doubleb_;191 doublec_;192 doubled_;161 u_int a_; 162 u_int b_; 163 u_int c_; 164 u_int d_; 193 165 u_int minimum_size_; 194 166 double oddsratio_; 195 double value_cutoff_;196 167 }; 197 168
Note: See TracChangeset
for help on using the changeset viewer.