source: trunk/test/roc.cc @ 2595

Last change on this file since 2595 was 2595, checked in by Peter, 11 years ago

fixes #678. ROC p_value

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.6 KB
Line 
1// $Id: roc.cc 2595 2011-10-30 03:27:20Z peter $
2
3/*
4  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2011 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "Suite.h"
24
25#include "yat/classifier/DataLookupWeighted1D.h"
26#include "yat/classifier/Target.h"
27#include "yat/statistics/Fisher.h"
28#include "yat/statistics/ROC.h"
29#include "yat/statistics/utility.h"
30#include "yat/utility/Vector.h"
31
32#include <cassert>
33#include <cmath>
34#include <fstream>
35#include <iostream>
36
37
38using namespace theplu::yat;
39
40void test_ties(test::Suite& suite);
41void test_p_exact(test::Suite& suite);
42void test_p_approx(test::Suite& suite);
43void test_p_exact_with_ties(test::Suite& suite);
44void test_p_approx_with_ties(test::Suite& suite);
45
46int main(int argc, char* argv[])
47{ 
48  test::Suite suite(argc, argv);
49
50  suite.err() << "testing ROC" << std::endl;
51  utility::Vector value(31);
52  std::vector<std::string> label(31,"negative");
53  for (size_t i=0; i<16; i++) 
54    label[i] = "positive";
55  classifier::Target target(label);
56  for (size_t i=0; i<value.size(); i++) 
57    value(i)=i;
58  statistics::ROC roc;
59  add(roc, value.begin(), value.end(), target);
60  double area = roc.area();
61  if (!suite.equal(area,0.0)){
62    suite.err() << "test_roc: area is " << area << " should be 0.0" 
63           << std::endl;
64    suite.add(false);
65  }
66  target.set_binary(0,false);
67  target.set_binary(1,true);
68  roc.reset();
69  add(roc, value.begin(), value.end(), target);
70  area = roc.area();
71  if (!suite.equal(area,1.0)){
72    suite.err() << "test_roc: area is " << area << " should be 1.0" 
73           << std::endl;
74    suite.add(false);
75  }
76 
77  double p = roc.p_value_one_sided();
78  double p2 = roc.p_value();
79  double p_matlab = 0.00000115;
80  if (!(p/p_matlab < 1.01 && p/p_matlab > 0.99)){
81    suite.err() << "get_p_approx: p-value not correct" << std::endl;
82    suite.err() << p << " expected " << p_matlab << std::endl;
83    suite.add(false);
84  }
85  if (!(p2==2*p)) {
86    suite.add(false);
87    suite.err() << "Two-sided P-value should equal 2 * one-sided P-value.\n";
88  }
89  roc.minimum_size() = 20;
90  p = roc.p_value_one_sided();
91  p2 = roc.p_value();
92  if (!( p < 1e-8 && p > 1e-9) ){
93    suite.err() << "get_p_exact: p-value not correct" << std::endl;
94    suite.add(false);
95  }
96  if (!( p2==2*p)) {
97    suite.add(false);
98    suite.err() << "Two-sided P-value should equal 2 * one-sided P-value.\n";
99  }
100 
101  classifier::DataLookupWeighted1D dlw(target.size(),1.3);
102  add(roc, dlw.begin(), dlw.end(), target);
103  test_ties(suite);
104  test_p_approx_with_ties(suite);
105  test_p_exact_with_ties(suite);
106  test_p_approx(suite);
107  test_p_exact(suite);
108
109  return suite.return_value();
110}
111
112
113void test_p_exact_with_ties(test::Suite& suite)
114{
115  suite.out() << "test p exact with ties\n";
116  statistics::ROC roc;
117  /*
118    +++-- 6
119    ++-+- 5 4.5 *** our case ***
120    +-++- 4 4.5
121    ++--+ 4 3.5
122    +-+-+ 3 3.5
123    +--++ 2 2
124    -+++- 3 3
125    -++-+ 2 2
126    -+-++ 1 0.5 *** our second case ***
127    --+++ 0 0.5
128   */
129  roc.add(2, true);
130  roc.add(1, true);
131  roc.add(1, false);
132  roc.add(0, true);
133  roc.add(-1, false);
134  roc.area();
135  if (!suite.equal(roc.p_value_one_sided(), 3.0/10.0)) {
136    suite.add(false);
137    suite.out() << "  p_value_one_sided: expected 0.3\n";
138  }
139  else
140    suite.add(true);
141  if (!suite.equal(roc.p_value(), 5.0/10.0)) {
142    suite.add(false);
143    suite.out() << "  (two-sided) p_value: expected 0.5\n";
144  }
145  else
146    suite.add(true);
147
148  suite.out() << "test p exact with ties II\n";
149  roc.reset();
150  roc.add(2, false);
151  roc.add(1, true);
152  roc.add(1, false);
153  roc.add(0, true);
154  roc.add(-1, true);
155  suite.add(suite.equal(roc.area(), 0.5/6));
156  if (!suite.add(suite.equal(roc.p_value_one_sided(), 10.0/10.0)))
157    suite.out() << "  p_value_one_sided: expected 0.3\n";
158  if (!suite.add(suite.equal(roc.p_value(), 3.0/10.0)))
159    suite.out() << "  (two-sided) p_value: expected 0.5\n";
160}
161
162
163void test_p_approx_with_ties(test::Suite& suite)
164{
165  suite.out() << "test p approx with ties\n";
166  statistics::ROC roc;
167  for (size_t i=0; i<100; ++i) {
168    roc.add(1, i<60);
169    roc.add(0, i<40);
170  }
171  suite.add(suite.equal(roc.area(), 0.6));
172  // Having only two data values, 0 and 1, data can be represented as
173  // a 2x2 contigency table, and ROC test is same as Fisher's exact
174  // test.
175  statistics::Fisher fisher;
176  fisher.oddsratio(60, 40, 40, 60);
177  suite.add(suite.equal_fix(roc.p_value(), fisher.p_value(), 0.0002));
178}
179
180void test_ties(test::Suite& suite)
181{
182  suite.out() << "test ties\n";
183  statistics::ROC roc;
184  for (size_t i=0; i<20; ++i)
185    roc.add(10.0, i<10);
186  if (!suite.add(suite.equal(roc.area(), 0.5))) {
187    suite.err() << "error: roc with ties: area: " << roc.area() << "\n";
188  }
189}
190
191void test_p_exact(test::Suite& suite)
192{
193  suite.out() << "test_p_exact\n";
194  statistics::ROC roc;
195  for (size_t i=0; i<9; ++i)
196    roc.add(i, i<5);
197  if (roc.p_value_one_sided()<0.5) {
198    suite.add(false);
199    suite.err() << "error: expected p-value>0.5\n  found: " 
200                << roc.p_value_one_sided() << "\n";
201  }
202}
203
204
205void test_p_approx(test::Suite& suite)
206{
207  suite.out() << "test_p_approx\n";
208  statistics::ROC roc;
209  for (size_t i=0; i<100; ++i)
210    roc.add(i, i<50);
211  if (roc.p_value_one_sided()<0.5) {
212    suite.add(false);
213    suite.err() << "error: expected p-value>0.5\n  found: " 
214                << roc.p_value_one_sided() << "\n";
215  }
216}
Note: See TracBrowser for help on using the repository browser.