source: trunk/yat/statistics/ROC.cc @ 2556

Last change on this file since 2556 was 2556, checked in by Peter, 10 years ago

ROC: implement correction for ties in large sample approximation o p-value. refs #144.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.9 KB
Line 
1// $Id: ROC.cc 2556 2011-08-20 02:28:52Z peter $
2
3/*
4  Copyright (C) 2004, 2005 Peter Johansson
5  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include <iostream>
24
25#include "ROC.h"
26#include "AUC.h"
27
28#include <gsl/gsl_cdf.h>
29
30#include <cassert>
31#include <cmath>
32#include <utility>
33#include <vector>
34
35namespace theplu {
36namespace yat {
37namespace statistics { 
38
39  ROC::ROC(void) 
40    :minimum_size_(10)
41  {
42    reset();
43  }
44
45
46  void ROC::add(double x, bool target, double w)
47  {
48    if (!w)
49      return;
50    typedef std::multimap<double, std::pair<bool, double> > Map;
51    Map::value_type element(x, std::make_pair(target, w));
52    Map::iterator lower = multimap_.lower_bound(x);
53    if (lower!=multimap_.end() && lower->first == x)
54      has_ties_ = true;
55    multimap_.insert(lower, element);
56    //    multimap_.insert(std::make_pair(x, std::make_pair(target, w)));
57    if (target)
58      w_pos_+=w;
59    else
60      w_neg_+=w;
61    area_=-1;
62  }
63
64
65  double ROC::area(void)
66  {
67    if (area_==-1){
68      AUC auc(false);
69      area_=auc.score(multimap_);
70    }
71    return area_;
72  }
73
74
75  double ROC::get_p_approx(double x) const
76  {
77    // make x standard normal
78    x -= 0.5;
79    // Not integrating from the middle of the bin, but from the inner edge.
80    if (x>0)
81      x -= 0.5/(n_pos()*n_neg());
82    else if(x<0)
83      x += 0.5/(n_pos()*n_neg());
84    else
85      return 0.5;
86    double var = 1.0+n();
87    if (has_ties_) {
88      double correction = 0;
89      Map::const_iterator first = multimap_.begin();
90      Map::const_iterator last = multimap_.begin();
91      while (first!=multimap_.end()) {
92        size_t n = 0;
93        while (first->first == last->first) {
94          ++n;
95          ++last;
96        }
97        correction += n * (n-1) * (n+1);
98        first = last;
99      }
100      /*
101        mn(N+1)/12-[mn/(12N(N-1)) * sum(t(t-1)(t+1))] =
102        mn/12 [ N+1 - 1/(N(N-1)) * sum(t(t-1)(t+1)) ]
103      */
104      var -= correction/(n() * (n()-1));
105    }
106    var = var / (12*n_pos()*n_neg());
107    return gsl_cdf_gaussian_Q(x, std::sqrt(var));
108  }
109
110  double ROC::get_p_exact(const double block, const double nof_pos, 
111                          const double nof_neg) const
112  {
113    if (block <= 0.0)
114      return 1.0;
115    if (block > nof_neg*nof_pos)
116      return 0.0;
117    double p1 = get_p_exact(block-nof_neg, nof_pos-1, nof_neg);
118    double p2 = get_p_exact(block, nof_pos, nof_neg-1);
119    return nof_pos/(nof_pos+nof_neg)*p1 + nof_neg/(nof_pos+nof_neg)*p2;
120  }
121
122  unsigned int& ROC::minimum_size(void)
123  {
124    return minimum_size_;
125  }
126
127
128  const unsigned int& ROC::minimum_size(void) const
129  {
130    return minimum_size_;
131  }
132
133
134  double ROC::n(void) const
135  {
136    return n_pos()+n_neg();
137  }
138
139
140  double ROC::n_neg(void) const
141  {
142    return w_neg_;
143  }
144
145
146  double ROC::n_pos(void) const
147  {
148    return w_pos_;
149  }
150
151
152  double ROC::p_value() const
153  {
154    double p = 2*p_value_one_sided();
155    return std::min(p, 2-p); 
156  }
157
158
159  double ROC::p_value_one_sided() const
160  {
161    double area(area_);
162    if (area_==-1){
163      AUC auc(false);
164      area = auc.score(multimap_);
165    }
166    if (n_pos() < minimum_size_ && n_neg() < minimum_size_) {
167      // for small areas we calculate probabilitu to get larger area -
168      // not larger or equal.
169      if (area<0.5)
170        return 1-get_p_exact((1-area)*n_pos()*n_neg(), n_pos(), n_neg());
171      return get_p_exact(area*n_pos()*n_neg(), n_pos(), n_neg());
172    }
173    return get_p_approx(area);
174  }
175
176
177  void ROC::reset(void)
178  {
179    area_=-1;
180    has_ties_ = false;
181    w_pos_=0;
182    w_neg_=0;
183    multimap_.clear();
184  }
185
186}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.