source: branches/0.7-stable/yat/statistics/ROC.cc @ 2548

Last change on this file since 2548 was 2548, checked in by Peter, 11 years ago

avoid crash. return nan when area is nan. fixes #669

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.5 KB
Line 
1// $Id: ROC.cc 2548 2011-08-07 16:36:44Z peter $
2
3/*
4  Copyright (C) 2004, 2005 Peter Johansson
5  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
6  Copyright (C) 2011 Peter Johansson
7
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22*/
23
24#include "ROC.h"
25#include "AUC.h"
26#include "yat/classifier/DataLookupWeighted1D.h"
27#include "yat/classifier/Target.h"
28#include "yat/utility/stl_utility.h"
29#include "yat/utility/VectorBase.h"
30
31#include <gsl/gsl_cdf.h>
32
33#include <cassert>
34#include <cmath>
35#include <limits>
36#include <utility>
37#include <vector>
38
39namespace theplu {
40namespace yat {
41namespace statistics { 
42
43  ROC::ROC(void) 
44    :minimum_size_(10)
45  {
46    reset();
47  }
48
49  ROC::~ROC(void)
50  {
51  }
52
53
54  void ROC::add(double x, bool target, double w)
55  {
56    if (!w)
57      return;
58    multimap_.insert(std::make_pair(x, std::make_pair(target, w)));
59    if (target)
60      w_pos_+=w;
61    else
62      w_neg_+=w;
63    area_=-1;
64  }
65
66
67  double ROC::area(void)
68  {
69    if (area_==-1){
70      AUC auc(false);
71      area_=auc.score(multimap_);
72    }
73    return area_;
74  }
75
76
77  double ROC::get_p_approx(double x) const
78  {
79    // make x standard normal
80    x -= 0.5;
81    // Not integrating from the middle of the bin, but from the inner edge.
82    if (x>0)
83      x -= 0.5/(n_pos()*n_neg());
84    else if(x<0)
85      x += 0.5/(n_pos()*n_neg());
86    else if (x==0)
87      return 0.5;
88    double sigma = std::sqrt( ( n()+1.0 )/(12*n_pos()*n_neg()) );
89    return gsl_cdf_gaussian_Q(x, sigma);
90  }
91
92  double ROC::get_p_exact(const double block, const double nof_pos, 
93                          const double nof_neg) const
94  {
95    if (block <= 0.0)
96      return 1.0;
97    if (block > nof_neg*nof_pos)
98      return 0.0;
99    assert(nof_pos>0);
100    assert(nof_neg>0);
101    double p1 = get_p_exact(block-nof_neg, nof_pos-1, nof_neg);
102    double p2 = get_p_exact(block, nof_pos, nof_neg-1);
103    return nof_pos/(nof_pos+nof_neg)*p1 + nof_neg/(nof_pos+nof_neg)*p2;
104  }
105
106  unsigned int& ROC::minimum_size(void)
107  {
108    return minimum_size_;
109  }
110
111
112  const unsigned int& ROC::minimum_size(void) const
113  {
114    return minimum_size_;
115  }
116
117
118  double ROC::n(void) const
119  {
120    return n_pos()+n_neg();
121  }
122
123
124  double ROC::n_neg(void) const
125  {
126    return w_neg_;
127  }
128
129
130  double ROC::n_pos(void) const
131  {
132    return w_pos_;
133  }
134
135
136  double ROC::p_value() const
137  {
138    double p = 2*p_value_one_sided();
139    return std::min(p, 2-p); 
140  }
141
142
143  double ROC::p_value_one_sided() const
144  {
145    double area(area_);
146    if (area_==-1){
147      AUC auc(false);
148      area = auc.score(multimap_);
149    }
150    if (std::isnan(area))
151      return std::numeric_limits<double>::quiet_NaN();
152    if (n_pos() < minimum_size_ && n_neg() < minimum_size_) {
153      // for small areas we calculate probabilitu to get larger area -
154      // not larger or equal.
155      if (area<0.5)
156        return 1-get_p_exact((1-area)*n_pos()*n_neg(), n_pos(), n_neg());
157      return get_p_exact(area*n_pos()*n_neg(), n_pos(), n_neg());
158    }
159    return get_p_approx(area);
160  }
161
162
163  void ROC::reset(void)
164  {
165    area_=-1;
166    w_pos_=0;
167    w_neg_=0;
168    multimap_.clear();
169  }
170
171}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.