Changeset 2708
- Timestamp:
- Mar 14, 2012, 8:06:01 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/test/roc.cc
r2707 r2708 24 24 25 25 #include "yat/classifier/DataLookupWeighted1D.h" 26 #include "yat/random/random.h" 26 27 #include "yat/classifier/Target.h" 28 #include "yat/statistics/Averager.h" 27 29 #include "yat/statistics/Fisher.h" 28 30 #include "yat/statistics/ROC.h" … … 30 32 #include "yat/utility/Vector.h" 31 33 34 #include <gsl/gsl_cdf.h> 35 32 36 #include <cassert> 33 37 #include <cmath> 38 #include <deque> 34 39 #include <fstream> 35 40 #include <iostream> 36 41 #include <set> 42 #include <vector> 37 43 38 44 using namespace theplu::yat; 39 45 40 46 void test_empty(test::Suite&); 47 void test_p_approx_weighted(test::Suite& suite); 48 void test_p_approx_with_ties(test::Suite& suite); 49 void test_p_approx(test::Suite& suite); 50 void test_p_double_weights(test::Suite& suite); 51 void test_p_exact(test::Suite& suite); 52 void test_p_exact_weighted(test::Suite& suite); 53 void test_p_exact_with_ties(test::Suite& suite); 54 void test_p_with_weights(test::Suite& suite); 41 55 void test_ties(test::Suite& suite); 42 void test_p_exact(test::Suite& suite);43 void test_p_approx(test::Suite& suite);44 void test_p_exact_with_ties(test::Suite& suite);45 void test_p_approx_with_ties(test::Suite& suite);46 56 47 57 int main(int argc, char* argv[]) … … 108 118 test_p_exact(suite); 109 119 test_empty(suite); 120 test_p_with_weights(suite); 110 121 return suite.return_value(); 111 122 } … … 227 238 suite.err() << "test empty done\n"; 228 239 } 240 241 242 void test_p_with_weights(test::Suite& suite) 243 { 244 suite.out() << "test p with weights\n"; 245 test_p_double_weights(suite); 246 test_p_exact_weighted(suite); 247 test_p_approx_weighted(suite); 248 } 249 250 251 void test_p_double_weights(test::Suite& suite) 252 { 253 suite.out() << "test p with double weights\n"; 254 std::vector<double> w(5,1.0); 255 w[0]=0.1; 256 w[4]=10; 257 std::vector<double> x(5); 258 for (size_t i=0; i<x.size(); ++i) 259 x[i] = i; 260 statistics::ROC roc; 261 statistics::ROC roc2; 262 for (size_t i=0; i<x.size(); ++i) { 263 roc.add(x[i], i<2, w[i]); 264 roc2.add(x[i], i<2, 2*w[i]); 265 } 266 if (!suite.equal(roc.area(), roc2.area())) { 267 suite.add(false); 268 suite.err() << "area failed\n"; 269 } 270 if (!suite.equal(roc.p_value_one_sided(), roc2.p_value_one_sided())) { 271 suite.add(false); 272 suite.err() << "exact p failed\n"; 273 } 274 roc.minimum_size() = 0; 275 roc2.minimum_size() = 0; 276 if (!suite.equal(roc.p_value_one_sided(), roc2.p_value_one_sided())) { 277 suite.xadd(false); 278 suite.err() << "approximative p failed\n"; 279 } 280 } 281 282 283 void test_p_exact_weighted(test::Suite& suite) 284 { 285 suite.out() << "test p exact weighted\n"; 286 std::vector<double> x(10); 287 std::vector<std::pair<bool, double> > w(10); 288 for (size_t i=0; i<x.size(); ++i) { 289 x[i] = i; 290 w[i].first = false; 291 w[i].second = 1.0; 292 } 293 w[3].first = true; 294 w[7].first = true; 295 w[8].first = true; 296 w[1].second = 10; 297 w[7].second = 10; 298 w[9].second = 0.1; 299 300 statistics::ROC roc; 301 for (size_t i=0; i<x.size(); ++i) 302 roc.add(x[i], w[i].first, w[i].second); 303 roc.minimum_size() = 0; 304 305 std::sort(w.begin(), w.end()); 306 unsigned long perm = 0; 307 unsigned long k = 0; 308 while (true) { 309 ++perm; 310 statistics::ROC roc2; 311 for (size_t i=0; i<x.size(); ++i) 312 roc2.add(x[i], w[i].first, w[i].second); 313 if (roc2.area() >= roc.area()) 314 ++k; 315 316 if (!next_permutation(w.begin(), w.end())) 317 break; 318 } 319 if (!suite.xadd(suite.equal(roc.p_value_one_sided(), 320 static_cast<double>(k)/perm))) { 321 suite.out() << "area: " << roc.area() << "\n" 322 << perm << " permutations of which\n" 323 << k << " with larger (or equal) area " 324 << "corresponding to P=" << static_cast<double>(k)/perm << "\n" 325 << "p_value_one_sided() returned: " << roc.p_value_one_sided() 326 << "\n"; 327 } 328 329 } 330 331 332 void test_p_approx_weighted(test::Suite& suite) 333 { 334 suite.out() << "test p approx weighted\n"; 335 std::vector<double> x(100); 336 std::vector<double> w(100, 1.0); 337 std::deque<bool> label(100); 338 339 for (size_t i=0; i<x.size(); ++i) { 340 x[i] = i; 341 label[i] = i>30 && i<70; 342 w[i] = 100.0 / (i+100); 343 } 344 345 statistics::ROC roc; 346 for (size_t i=0; i<x.size(); ++i) 347 roc.add(x[i], label[i], w[i]); 348 suite.out() << "area: " << roc.area() << std::endl; 349 roc.minimum_size() = 0; 350 double p = roc.p_value_one_sided(); 351 suite.out() << "p: " << p << std::endl; 352 353 std::set<size_t> checkpoints; 354 size_t perm = 1000000; 355 checkpoints.insert(10); 356 checkpoints.insert(100); 357 checkpoints.insert(1000); 358 checkpoints.insert(10000); 359 checkpoints.insert(100000); 360 checkpoints.insert(perm); 361 statistics::Averager averager; 362 for (size_t i=1; i<=perm; ++i) { 363 random::random_shuffle(x.begin(), x.end()); 364 statistics::ROC roc2; 365 for (size_t j=0; j<x.size(); ++j) 366 roc2.add(x[j], label[j], w[j]); 367 if (roc2.area()>=roc.area()) 368 averager.add(1.0); 369 else 370 averager.add(0.0); 371 if (checkpoints.find(i)!=checkpoints.end()) { 372 if (gsl_cdf_binomial_P(averager.sum_x(), p, averager.n())<1e-10 || 373 gsl_cdf_binomial_Q(averager.sum_x(), p, averager.n())<1e-10) { 374 suite.err() << "error: approx p value and permutation p-value " 375 << "deviate more than expected\n" 376 << "approx p: " << p << "\n" 377 << "permutations: " << averager.n() << "\n" 378 << "successful: " << averager.sum_x() << "\n" 379 << "corresponds to P=" << averager.mean() << "\n"; 380 suite.xadd(false); 381 return; 382 } 383 } 384 } 385 }
Note: See TracChangeset
for help on using the changeset viewer.