Changeset 3441
- Timestamp:
- Nov 23, 2015, 9:20:08 AM (7 years ago)
- Location:
- trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/test/fisher.cc
r3298 r3441 126 126 f.minimum_size() = 1000; 127 127 f.oddsratio(10,20,10,20); 128 suite.add(suite.equal(f.p_value(), 1.0 ));128 suite.add(suite.equal(f.p_value(), 1.0, 20)); 129 129 130 130 f.oddsratio(10, 20, 20, 200); -
trunk/yat/statistics/Fisher.cc
r3425 r3441 86 86 87 87 88 /* 89 ( n ) 90 (k+1) (k+1)!(n-k)! 91 ----- = ------------ 92 (n) k!(n-k-1)! 93 (k) 94 */ 95 double Fisher::choose_ratio(unsigned int n, unsigned int k) const 96 { 97 if (k==0) 98 return n; 99 if (k==n-1) 100 return k+1; 101 return static_cast<double>(n-k) / (k+1); 102 } 103 104 105 double Fisher::hypergeometric_ratio(unsigned int k, unsigned int n1, 106 unsigned int n2, unsigned int t) const 107 { 108 return choose_ratio(n1, k) / choose_ratio(n2, t-k-1); 109 } 110 111 double Fisher::lower_tail(unsigned int k, unsigned int n1, unsigned int n2, 112 unsigned int t) const 113 { 114 double sum = 0; 115 116 // P(X=k) 117 double p0 = gsl_ran_hypergeometric_pdf(k, n1, n2, t); 118 119 // minimum possible outcome for X is max(0, t-n2) 120 unsigned int i = std::max(n2, t)-n2; 121 double p = gsl_ran_hypergeometric_pdf(i, n1, n2, t); 122 // avoid double dipping P(k) 123 for ( ; i<k; ++i) { 124 if (p<=p0) 125 sum += p; 126 else 127 break; 128 129 // calculate p(i+1) using recursive function 130 p *= hypergeometric_ratio(i, n1, n2, t); 131 } 132 133 return sum; 134 } 135 136 88 137 unsigned int& Fisher::minimum_size(void) 89 138 { … … 131 180 return p_value_approximative()/2.0; 132 181 } 182 return p_left_exact(); 183 } 184 185 186 double Fisher::p_left_exact(void) const 187 { 133 188 // check for overflow 134 189 assert(c_ <= c_+d_ && d_ <= c_+d_); … … 155 210 return p_value_approximative()/2.0; 156 211 } 212 return p_right_exact(); 213 } 214 215 216 double Fisher::p_right_exact(void) const 217 { 157 218 // check for overflow 158 219 assert(c_ <= c_+d_ && d_ <= c_+d_); … … 178 239 double Fisher::p_value_exact(void) const 179 240 { 180 double res=0; 181 double a, c, tmp; 182 expected(a, tmp, c, tmp); 183 // sum P(x) over x for which abs(x-E(a))>=abs(a-E(a)) 184 185 // counting left tail 186 int k = static_cast<int>(a - std::abs(a_-a)); 187 if (k>=0) 188 res = cdf_hypergeometric_P(k, a_+b_, c_+d_, a_+c_); 189 // counting right tail 190 k = static_cast<int>(c - std::abs(c_-c)); 191 if (k>=0) 192 res += cdf_hypergeometric_P(k, c_+d_, a_+b_, a_+c_); 193 194 // avoid p>1 due to double counting middle one 195 return std::min(1.0, res); 241 // The hypergeometric distribution is unimodal (only one peak) and the 242 // peak (mode) occurs at: (t+1)*(n1+1)/(n+1) 243 244 double mode = static_cast<double>(a_+c_+1)*static_cast<double>(a_+b_+1)/ 245 static_cast<double>(a_+b_+c_+d_+1); 246 if (a_ >= mode) 247 return p_value_exact(a_, a_+b_, c_+d_, a_+c_); 248 return p_value_exact(c_, c_+d_, a_+b_, a_+c_); 249 } 250 251 252 /* 253 a | b | n1 254 c | d | n2 255 ------------------- 256 t | | n 257 258 259 The p is calculated as the sum of all P(x) for all x such that 260 P(x)<=P(k) 261 */ 262 double Fisher::p_value_exact(unsigned int k, unsigned int n1, 263 unsigned int n2, unsigned int t) const 264 { 265 assert(k<=t); 266 assert(t<=n1+n2); 267 assert(n1); 268 assert(n2); 269 270 assert(k); 271 // P(X >= k) = P(X > k-1) 272 double sum = gsl_cdf_hypergeometric_Q(k-1, n1, n2, t); 273 // calculate the other tail 274 sum += lower_tail(k, n1, n2, t); 275 return sum; 196 276 } 197 277 -
trunk/yat/statistics/Fisher.h
r3425 r3441 148 148 hypergeometric distribution 149 149 \f$ \sum_k P(k) \f$ where summation runs over \a k such that 150 \f$ |k-<a>| \ge |a-<a>|\f$.150 \f$ P(k) \le P(a) \f$. 151 151 152 152 \return two-sided p-value … … 195 195 // two-sided 196 196 double p_value_approximative(void) const; 197 // two-sided197 // two-sided 198 198 double p_value_exact(void) const; 199 // calculate two-sided p-value to get k (or fewer) wins when 200 // drawing t samples out of of a population of n1 wins and n2 losses 201 double p_value_exact(unsigned int k, unsigned int n1, unsigned int n2, 202 unsigned int t) const; 203 204 double lower_tail(unsigned int k, unsigned int n1, unsigned int n2, 205 unsigned int t) const; 206 207 // return P(X=k+1) / P(X=k) 208 double hypergeometric_ratio(unsigned int k, unsigned int n1, 209 unsigned int n2, unsigned int t) const; 210 double choose_ratio(unsigned int n, unsigned int k) const; 211 212 double p_left_exact(void) const; 213 double p_right_exact(void) const; 199 214 200 215 double yates(unsigned int o, double e) const;
Note: See TracChangeset
for help on using the changeset viewer.