source: trunk/yat/statistics/ROC.cc @ 2601

Last change on this file since 2601 was 2601, checked in by Peter, 11 years ago

merge release 0.7.3 into trunk. A couple of conflict and needed fix test related to #669

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1// $Id: ROC.cc 2601 2011-10-30 05:08:14Z peter $
2
3/*
4  Copyright (C) 2004, 2005 Peter Johansson
5  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
6  Copyright (C) 2011 Peter Johansson
7
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22*/
23
24#include "ROC.h"
25#include "AUC.h"
26
27#include <gsl/gsl_cdf.h>
28
29#include <algorithm>
30#include <cassert>
31#include <cmath>
32#include <limits>
33#include <utility>
34
35namespace theplu {
36namespace yat {
37namespace statistics { 
38
39  ROC::ROC(void) 
40    :minimum_size_(10)
41  {
42    reset();
43  }
44
45
46  void ROC::add(double x, bool target, double w)
47  {
48    if (!w)
49      return;
50    ROC::Map::value_type element(x, std::make_pair(target, w));
51    ROC::Map::iterator lower = multimap_.lower_bound(x);
52    if (lower!=multimap_.end() && lower->first == x)
53      has_ties_ = true;
54    multimap_.insert(lower, element);
55    if (target)
56      w_pos_+=w;
57    else
58      w_neg_+=w;
59    area_=-1;
60  }
61
62
63  double ROC::area(void)
64  {
65    if (area_==-1){
66      AUC auc(false);
67      area_=auc.score(multimap_);
68    }
69    return area_;
70  }
71
72
73  double ROC::get_p_approx(double x) const
74  {
75    // make x standard normal
76    x -= 0.5;
77    // Not integrating from the middle of the bin, but from the inner edge.
78    if (x>0)
79      x -= 0.5/(n_pos()*n_neg());
80    else if(x<0)
81      x += 0.5/(n_pos()*n_neg());
82    else
83      return 0.5;
84    double var = 1.0+n();
85    if (has_ties_) {
86      double correction = 0;
87      Map::const_iterator first = multimap_.begin();
88      Map::const_iterator last = multimap_.begin();
89      while (first!=multimap_.end()) {
90        size_t n = 0;
91        while (first->first == last->first) {
92          ++n;
93          ++last;
94        }
95        correction += n * (n-1) * (n+1);
96        first = last;
97      }
98      /*
99        mn(N+1)/12-[mn/(12N(N-1)) * sum(t(t-1)(t+1))] =
100        mn/12 [ N+1 - 1/(N(N-1)) * sum(t(t-1)(t+1)) ]
101      */
102      var -= correction/(n() * (n()-1));
103    }
104    var = var / (12*n_pos()*n_neg());
105    return gsl_cdf_gaussian_Q(x, std::sqrt(var));
106  }
107
108
109  unsigned int& ROC::minimum_size(void)
110  {
111    return minimum_size_;
112  }
113
114
115  const unsigned int& ROC::minimum_size(void) const
116  {
117    return minimum_size_;
118  }
119
120
121  double ROC::n(void) const
122  {
123    return n_pos()+n_neg();
124  }
125
126
127  double ROC::n_neg(void) const
128  {
129    return w_neg_;
130  }
131
132
133  double ROC::n_pos(void) const
134  {
135    return w_pos_;
136  }
137
138
139  double ROC::p_exact(double area) const
140  {
141    return p_exact_with_ties(multimap_.begin(), multimap_.end(),
142                             area*w_pos_*w_neg_, w_pos_, w_neg_);
143  }
144
145
146  double ROC::p_value() const
147  {
148    double area(area_);
149    if (area_==-1){
150      AUC auc(false);
151      area = auc.score(multimap_);
152    }
153    if (std::isnan(area))
154      return std::numeric_limits<double>::quiet_NaN();
155    if (use_exact_method()) {
156      double p = 0;
157      double abs_area = std::max(area, 1-area);
158      p = p_exact(abs_area);
159      if (has_ties_) {
160        p += p_exact_with_ties(multimap_.rbegin(), multimap_.rend(),
161                               abs_area*w_pos_*w_neg_, w_pos_, w_neg_);
162      }
163      else
164        p *= 2.0;
165      // avoid double counting when area is 0.5
166      return std::min(p, 1.0);
167    }
168    return 2*get_p_approx(area);
169  }
170
171
172  double ROC::p_value_one_sided() const
173  {
174    double area(area_);
175    if (area_==-1){
176      AUC auc(false);
177      area = auc.score(multimap_);
178    }
179    if (std::isnan(area))
180      return std::numeric_limits<double>::quiet_NaN();
181    if (use_exact_method())
182      return p_exact(area);
183    return get_p_approx(area);
184  }
185
186
187  void ROC::reset(void)
188  {
189    area_=-1;
190    has_ties_ = false;
191    w_pos_=0;
192    w_neg_=0;
193    multimap_.clear();
194  }
195
196
197  bool ROC::use_exact_method(void) const
198  {
199    return (n_pos() < minimum_size_) && (n_neg() < minimum_size_);
200  }
201
202}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.