Changeset 3739 for branches/0.15-stable
- Timestamp:
- Jul 5, 2018, 7:59:40 AM (5 years ago)
- Location:
- branches/0.15-stable
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/0.15-stable/test/fisher.cc
r3455 r3739 26 26 #include "yat/statistics/Fisher.h" 27 27 28 #include <gsl/gsl_cdf.h> 29 #include <gsl/gsl_randist.h> 28 30 #include <climits> 29 31 … … 34 36 void test_large_numbers(test::Suite&); 35 37 void test_yates(test::Suite&); 38 void test_cdf_hypergeometric_Q(test::Suite&); 39 void test_ticket908(test::Suite&); 36 40 37 41 int main(int argc, char* argv[]) … … 73 77 test_large_numbers(suite); 74 78 test_yates(suite); 79 test_cdf_hypergeometric_Q(suite); 80 test_ticket908(suite); 75 81 return suite.return_value(); 76 82 } … … 168 174 } 169 175 } 176 177 178 void test_cdf_hypergeometric_Q(test::Suite& suite) 179 { 180 suite.out() << "\ntest " << __func__ << "\n"; 181 unsigned int n1 = 16; 182 unsigned int n2 = 3; 183 unsigned int t = 16; 184 for (unsigned int k=0; k<=t; ++k) { 185 // P(X=k) 186 double p = gsl_ran_hypergeometric_pdf(k, n1, n2, t); 187 // P(X<=k) 188 double P = gsl_cdf_hypergeometric_P(k, n1, n2, t); 189 // P(X>k) 190 double Q = gsl_cdf_hypergeometric_Q(k, n1, n2, t); 191 suite.out() << k << " " << p << " " << P << " " << Q << "\n"; 192 double sum = 0; 193 for (unsigned int x=0; x<=k; ++x) 194 sum += gsl_ran_hypergeometric_pdf(x, n1, n2, t); 195 if (!suite.equal(sum, P, 20)) { 196 suite.add(false); 197 suite.err() << "error: gsl_cdf_hypergeometric_P for k=" << k << "\n"; 198 } 199 if (!suite.equal(Q, 1-P, 1000)) { 200 suite.add(false); 201 suite.err() << "failed: Q = 1-P for k=" << k << "\n"; 202 } 203 } 204 } 205 206 207 // test bug #908 reported in http://dev.thep.lu.se/yat/ticket/908 208 void test_ticket908(test::Suite& suite) 209 { 210 suite.out() << "===\ntesting ticket 908\n"; 211 statistics::Fisher f; 212 int a1 = 13; 213 int a2 = 3; 214 int b1 = 3; 215 int b2 = 0; 216 217 double correct_p = 0; 218 unsigned int n1 = a1 + a2; 219 unsigned int n2 = b1 + b2; 220 unsigned int t = a1+b1; 221 for (unsigned int k=0; k<=n1; ++k) { 222 double P = gsl_ran_hypergeometric_pdf(k, n1, n2, t); 223 if (P <= gsl_ran_hypergeometric_pdf(a1, n1, n2, t)) { 224 suite.out() << "counting " << k << " " << P << "\n"; 225 correct_p += P; 226 } 227 } 228 statistics::Fisher fisher; 229 std::vector<double> p; 230 fisher.oddsratio(a1, a2, b1, b2); 231 p.push_back(fisher.p_value()); 232 fisher.oddsratio(a2, a1, b2, b1); 233 p.push_back(fisher.p_value()); 234 fisher.oddsratio(b1, b2, a1, a2); 235 p.push_back(fisher.p_value()); 236 fisher.oddsratio(b2, b1, a2, a1); 237 p.push_back(fisher.p_value()); 238 for (size_t i=0; i<p.size(); ++i) { 239 if (!suite.add(suite.equal(correct_p, p[i], 20))) 240 suite.err() << "error: incorrect p: " << p[i] << " for case " 241 << i << "; expected: " << correct_p << "\n"; 242 } 243 } -
branches/0.15-stable/yat/statistics/Fisher.cc
r3455 r3739 87 87 88 88 /* 89 (n) n! 90 ( ) = -------- 91 (k) k!(n-k)! 92 93 94 89 95 ( n ) 90 (k+1) (k+1)!(n-k)!91 ----- = ------------ 92 (n) k!(n-k-1)!96 (k+1) k!(n-k)! 97 ----- = ------------ = (n-k) / (k+1) 98 (n) (k+1)!(n-k-1)! 93 99 (k) 94 100 */ 95 101 double Fisher::choose_ratio(unsigned int n, unsigned int k) const 96 102 { 97 if (k==0) 98 return n; 99 if (k==n-1) 100 return k+1; 103 assert(k+1 <= n); 101 104 return static_cast<double>(n-k) / (k+1); 102 105 } … … 106 109 unsigned int n2, unsigned int t) const 107 110 { 111 // P(X = k+1) / P(X = k) = 112 // (choose(n1, k+1) * choose(n2, t-k-1) / choose(n1+n2, t) ) / 113 // (choose(n1, k) * choose(n2, t-k) / choose(n1+n2, t) ) = 114 // choose(n1, k+1) / choose(n1, k) /( choose(n2, t-k)/choose(n2,t-k-1) ) = 115 // choose(n1,k+1)/choose(n1,k) / (choose(n2,(t-k-1)+1)/choose(n2,t-k-1)) 116 // choose_ratio(n1,k) / choose_ratio(n2, t-k-1) 117 // where choose_ratio(a, b) = choose(a, b+1) / choose(a,b) 108 118 return choose_ratio(n1, k) / choose_ratio(n2, t-k-1); 109 119 } … … 240 250 { 241 251 // The hypergeometric distribution is unimodal (only one peak) and the 242 // peak (mode) occurs at: (t+1)*(n1+1)/(n+1) 243 244 double mode = static_cast<double>(a_+c_+1)*static_cast<double>(a_+b_+1)/ 245 static_cast<double>(a_+b_+c_+d_+1); 252 // peak (mode) occurs at: floor((t+1)*(n1+1)/(n+1)) 253 254 double mode = std::floor((a_+c_+1.0)*(a_+b_+1.0)/(a_+b_+c_+d_+1.0)); 246 255 if (a_ >= mode) 247 256 return p_value_exact(a_, a_+b_, c_+d_, a_+c_); … … 267 276 assert(n1); 268 277 assert(n2); 278 279 // special case when k=0 and mode (peak of distribution) is at 280 // zero as well. If mode is not zero 2x2 table should have been 281 // mirrored before calling this function. 282 if (k == 0) { 283 assert(std::floor((a_+c_+1.0)*(a_+b_+1.0)/(a_+b_+c_+d_+1.0)) == 0.0); 284 return 1.0; 285 } 269 286 270 287 assert(k);
Note: See TracChangeset
for help on using the changeset viewer.