Changeset 2708


Ignore:
Timestamp:
Mar 14, 2012, 8:06:01 AM (11 years ago)
Author:
Peter
Message:

tests for weighted ROC p-value. refs #211

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/test/roc.cc

    r2707 r2708  
    2424
    2525#include "yat/classifier/DataLookupWeighted1D.h"
     26#include "yat/random/random.h"
    2627#include "yat/classifier/Target.h"
     28#include "yat/statistics/Averager.h"
    2729#include "yat/statistics/Fisher.h"
    2830#include "yat/statistics/ROC.h"
     
    3032#include "yat/utility/Vector.h"
    3133
     34#include <gsl/gsl_cdf.h>
     35
    3236#include <cassert>
    3337#include <cmath>
     38#include <deque>
    3439#include <fstream>
    3540#include <iostream>
    36 
     41#include <set>
     42#include <vector>
    3743
    3844using namespace theplu::yat;
    3945
    4046void test_empty(test::Suite&);
     47void test_p_approx_weighted(test::Suite& suite);
     48void test_p_approx_with_ties(test::Suite& suite);
     49void test_p_approx(test::Suite& suite);
     50void test_p_double_weights(test::Suite& suite);
     51void test_p_exact(test::Suite& suite);
     52void test_p_exact_weighted(test::Suite& suite);
     53void test_p_exact_with_ties(test::Suite& suite);
     54void test_p_with_weights(test::Suite& suite);
    4155void test_ties(test::Suite& suite);
    42 void test_p_exact(test::Suite& suite);
    43 void test_p_approx(test::Suite& suite);
    44 void test_p_exact_with_ties(test::Suite& suite);
    45 void test_p_approx_with_ties(test::Suite& suite);
    4656
    4757int main(int argc, char* argv[])
     
    108118  test_p_exact(suite);
    109119  test_empty(suite);
     120  test_p_with_weights(suite);
    110121  return suite.return_value();
    111122}
     
    227238  suite.err() << "test empty done\n";
    228239}
     240
     241
     242void test_p_with_weights(test::Suite& suite)
     243{
     244  suite.out() << "test p with weights\n";
     245  test_p_double_weights(suite);
     246  test_p_exact_weighted(suite);
     247  test_p_approx_weighted(suite);
     248}
     249
     250
     251void test_p_double_weights(test::Suite& suite)
     252{
     253  suite.out() << "test p with double weights\n";
     254  std::vector<double> w(5,1.0);
     255  w[0]=0.1;
     256  w[4]=10;
     257  std::vector<double> x(5);
     258  for (size_t i=0; i<x.size(); ++i)
     259    x[i] = i;
     260  statistics::ROC roc;
     261  statistics::ROC roc2;
     262  for (size_t i=0; i<x.size(); ++i) {
     263    roc.add(x[i], i<2, w[i]);
     264    roc2.add(x[i], i<2, 2*w[i]);
     265  }
     266  if (!suite.equal(roc.area(), roc2.area())) {
     267    suite.add(false);
     268    suite.err() << "area failed\n";
     269  }
     270  if (!suite.equal(roc.p_value_one_sided(), roc2.p_value_one_sided())) {
     271    suite.add(false);
     272    suite.err() << "exact p failed\n";
     273  }
     274  roc.minimum_size() = 0;
     275  roc2.minimum_size() = 0;
     276  if (!suite.equal(roc.p_value_one_sided(), roc2.p_value_one_sided())) {
     277    suite.xadd(false);
     278    suite.err() << "approximative p failed\n";
     279  }
     280}
     281
     282
     283void test_p_exact_weighted(test::Suite& suite)
     284{
     285  suite.out() << "test p exact weighted\n";
     286  std::vector<double> x(10);
     287  std::vector<std::pair<bool, double> > w(10);
     288  for (size_t i=0; i<x.size(); ++i) {
     289    x[i] = i;
     290    w[i].first = false;
     291    w[i].second = 1.0;
     292  }
     293  w[3].first = true;
     294  w[7].first = true;
     295  w[8].first = true;
     296  w[1].second = 10;
     297  w[7].second = 10;
     298  w[9].second = 0.1;
     299
     300  statistics::ROC roc;
     301  for (size_t i=0; i<x.size(); ++i)
     302    roc.add(x[i], w[i].first, w[i].second);
     303  roc.minimum_size() = 0;
     304
     305  std::sort(w.begin(), w.end());
     306  unsigned long perm = 0;
     307  unsigned long k = 0;
     308  while (true) {
     309    ++perm;
     310    statistics::ROC roc2;
     311    for (size_t i=0; i<x.size(); ++i)
     312      roc2.add(x[i], w[i].first, w[i].second);
     313    if (roc2.area() >= roc.area())
     314      ++k;
     315
     316    if (!next_permutation(w.begin(), w.end()))
     317      break;
     318  }
     319  if (!suite.xadd(suite.equal(roc.p_value_one_sided(),
     320                              static_cast<double>(k)/perm))) {
     321    suite.out() << "area: " << roc.area() << "\n"
     322                << perm << " permutations of which\n"
     323                << k << " with larger (or equal) area "
     324                << "corresponding to P=" << static_cast<double>(k)/perm << "\n"
     325                << "p_value_one_sided() returned: " << roc.p_value_one_sided()
     326                << "\n";
     327  }
     328
     329}
     330
     331
     332void test_p_approx_weighted(test::Suite& suite)
     333{
     334  suite.out() << "test p approx weighted\n";
     335  std::vector<double> x(100);
     336  std::vector<double> w(100, 1.0);
     337  std::deque<bool> label(100);
     338
     339  for (size_t i=0; i<x.size(); ++i) {
     340    x[i] = i;
     341    label[i] = i>30 && i<70;
     342    w[i] = 100.0 / (i+100);
     343  }
     344
     345  statistics::ROC roc;
     346  for (size_t i=0; i<x.size(); ++i)
     347    roc.add(x[i], label[i], w[i]);
     348  suite.out() << "area: " << roc.area() << std::endl;
     349  roc.minimum_size() = 0;
     350  double p = roc.p_value_one_sided();
     351  suite.out() << "p: " << p << std::endl;
     352
     353  std::set<size_t> checkpoints;
     354  size_t perm = 1000000;
     355  checkpoints.insert(10);
     356  checkpoints.insert(100);
     357  checkpoints.insert(1000);
     358  checkpoints.insert(10000);
     359  checkpoints.insert(100000);
     360  checkpoints.insert(perm);
     361  statistics::Averager averager;
     362  for (size_t i=1; i<=perm; ++i) {
     363    random::random_shuffle(x.begin(), x.end());
     364    statistics::ROC roc2;
     365    for (size_t j=0; j<x.size(); ++j)
     366      roc2.add(x[j], label[j], w[j]);
     367    if (roc2.area()>=roc.area())
     368      averager.add(1.0);
     369    else
     370      averager.add(0.0);
     371    if (checkpoints.find(i)!=checkpoints.end()) {
     372      if (gsl_cdf_binomial_P(averager.sum_x(), p, averager.n())<1e-10 ||
     373          gsl_cdf_binomial_Q(averager.sum_x(), p, averager.n())<1e-10) {
     374        suite.err() << "error: approx p value and permutation p-value "
     375                    << "deviate more than expected\n"
     376                    << "approx p: " << p << "\n"
     377                    << "permutations: " << averager.n() << "\n"
     378                    << "successful: " << averager.sum_x() << "\n"
     379                    << "corresponds to P=" << averager.mean() << "\n";
     380        suite.xadd(false);
     381        return;
     382      }
     383    }
     384  }
     385}
Note: See TracChangeset for help on using the changeset viewer.