Changeset 2709
- Timestamp:
- Mar 15, 2012, 4:26:47 AM (11 years ago)
- Location:
- trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/test/roc.cc
r2708 r2709 275 275 roc2.minimum_size() = 0; 276 276 if (!suite.equal(roc.p_value_one_sided(), roc2.p_value_one_sided())) { 277 suite. xadd(false);277 suite.add(false); 278 278 suite.err() << "approximative p failed\n"; 279 279 } … … 333 333 { 334 334 suite.out() << "test p approx weighted\n"; 335 std::vector<double> x( 100);336 std::vector<double> w( 100, 1.0);337 std::deque<bool> label( 100);335 std::vector<double> x(200); 336 std::vector<double> w(200, 1.0); 337 std::deque<bool> label(200); 338 338 339 339 for (size_t i=0; i<x.size(); ++i) { 340 340 x[i] = i; 341 341 label[i] = i>30 && i<70; 342 w[i] = 100.0 / (i+100); 342 if (i<100) 343 w[i] = 100.0 / (100+i); 344 else 345 w[i] = 0.0001; 343 346 } 344 347 … … 346 349 for (size_t i=0; i<x.size(); ++i) 347 350 roc.add(x[i], label[i], w[i]); 348 suite.out() << "area: " << roc.area() << std::endl;349 351 roc.minimum_size() = 0; 350 352 double p = roc.p_value_one_sided(); 351 suite.out() << "p: " << p << std::endl;352 353 353 354 std::set<size_t> checkpoints; 354 size_t perm = 100000 0;355 size_t perm = 100000; 355 356 checkpoints.insert(10); 356 357 checkpoints.insert(100); 357 358 checkpoints.insert(1000); 358 359 checkpoints.insert(10000); 359 checkpoints.insert(100000);360 360 checkpoints.insert(perm); 361 361 statistics::Averager averager; … … 374 374 suite.err() << "error: approx p value and permutation p-value " 375 375 << "deviate more than expected\n" 376 << "area: " << roc.area() << "\n" 376 377 << "approx p: " << p << "\n" 377 378 << "permutations: " << averager.n() << "\n" 378 379 << "successful: " << averager.sum_x() << "\n" 379 380 << "corresponds to P=" << averager.mean() << "\n"; 380 suite. xadd(false);381 suite.add(false); 381 382 return; 382 383 } -
trunk/yat/statistics/ROC.cc
r2697 r2709 4 4 Copyright (C) 2004, 2005 Peter Johansson 5 5 Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson 6 Copyright (C) 2011 Peter Johansson6 Copyright (C) 2011, 2012 Peter Johansson 7 7 8 8 This file is part of the yat library, http://dev.thep.lu.se/yat … … 54 54 multimap_.insert(lower, element); 55 55 if (target) 56 w_pos_+=w;56 pos_weights_.add(w); 57 57 else 58 w_neg_+=w;58 neg_weights_.add(w); 59 59 area_ = std::numeric_limits<double>::quiet_NaN(); 60 60 } … … 73 73 double ROC::get_p_approx(double x) const 74 74 { 75 size_t n_pos = nof_points(pos_weights_); 76 size_t n_neg = nof_points(neg_weights_); 77 size_t nof_samples = n_pos + n_neg; 75 78 // make x standard normal 76 79 x -= 0.5; 77 80 // Not integrating from the middle of the bin, but from the inner edge. 78 81 if (x>0) 79 x -= 0.5/(n_pos ()*n_neg());82 x -= 0.5/(n_pos*n_neg); 80 83 else if(x<0) 81 x += 0.5/(n_pos ()*n_neg());84 x += 0.5/(n_pos*n_neg); 82 85 else 83 86 return 0.5; 84 double var = 1.0+n ();87 double var = 1.0+nof_samples; 85 88 if (has_ties_) { 86 89 double correction = 0; … … 100 103 mn/12 [ N+1 - 1/(N(N-1)) * sum(t(t-1)(t+1)) ] 101 104 */ 102 var -= correction/(n () * (n()-1));103 } 104 var = var / (12*n_pos ()*n_neg());105 var -= correction/(nof_samples * (nof_samples-1)); 106 } 107 var = var / (12*n_pos*n_neg); 105 108 return gsl_cdf_gaussian_Q(x, std::sqrt(var)); 106 109 } … … 127 130 double ROC::n_neg(void) const 128 131 { 129 return w_neg_;132 return neg_weights_.sum_x(); 130 133 } 131 134 … … 133 136 double ROC::n_pos(void) const 134 137 { 135 return w_pos_; 138 return pos_weights_.sum_x(); 139 } 140 141 142 size_t ROC::nof_points(const Averager& a) const 143 { 144 return a.sum_x()*a.sum_x()/a.sum_xx(); 136 145 } 137 146 … … 140 149 { 141 150 return p_exact_with_ties(multimap_.begin(), multimap_.end(), 142 area* w_pos_*w_neg_, w_pos_, w_neg_);151 area*n_pos()*n_neg(), n_pos(), n_neg()); 143 152 } 144 153 … … 159 168 if (has_ties_) { 160 169 p += p_exact_with_ties(multimap_.rbegin(), multimap_.rend(), 161 abs_area* w_pos_*w_neg_, w_pos_, w_neg_);170 abs_area*n_pos()*n_neg(), n_pos(), n_neg()); 162 171 } 163 172 else … … 189 198 area_ = std::numeric_limits<double>::quiet_NaN(); 190 199 has_ties_ = false; 191 w_pos_=0;192 w_neg_=0;200 neg_weights_.reset(); 201 pos_weights_.reset(); 193 202 multimap_.clear(); 194 203 } -
trunk/yat/statistics/ROC.h
r2696 r2709 24 24 along with yat. If not, see <http://www.gnu.org/licenses/>. 25 25 */ 26 27 #include "Averager.h" 26 28 27 29 #include <gsl/gsl_randist.h> … … 154 156 155 157 where sum runs over different data values (of ties) and \f$ n_x 156 \f$ is number data points with that value. The sum i a158 \f$ is number data points with that value. The sum is a 157 159 correction term for ties and is zero if there are no ties. 158 160 161 The number of samples in a group, \f$ n^+ \f$, is calculated as 162 \f$ n = (\sum w)^2 / \sum w^2 \f$ 163 159 164 \return \f$ P(a \ge \textrm{area}) \f$ 160 161 \note Weights should be -1, 0, or 1; otherwise the p-value is162 undefined and may change in future versions.163 165 */ 164 166 double p_value_one_sided(void) const; … … 194 196 double get_p_approx(double) const; 195 197 198 /** 199 return (sum x)^2 / sum x^2 200 */ 201 size_t nof_points(const Averager& a) const; 202 196 203 /* 197 204 */ … … 214 221 bool has_ties_; 215 222 unsigned int minimum_size_; 216 double w_neg_;217 double w_pos_;223 Averager neg_weights_; 224 Averager pos_weights_; 218 225 Map multimap_; 219 226 };
Note: See TracChangeset
for help on using the changeset viewer.