source: trunk/yat/statistics/ROC.cc @ 2572

Last change on this file since 2572 was 2572, checked in by Peter, 10 years ago

remove debug include

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.9 KB
Line 
1// $Id: ROC.cc 2572 2011-09-28 14:36:29Z peter $
2
3/*
4  Copyright (C) 2004, 2005 Peter Johansson
5  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
6  Copyright (C) 2011 Peter Johansson
7
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22*/
23
24#include "ROC.h"
25#include "AUC.h"
26
27#include <gsl/gsl_cdf.h>
28
29#include <cassert>
30#include <cmath>
31#include <utility>
32#include <vector>
33
34namespace theplu {
35namespace yat {
36namespace statistics { 
37
38  ROC::ROC(void) 
39    :minimum_size_(10)
40  {
41    reset();
42  }
43
44
45  void ROC::add(double x, bool target, double w)
46  {
47    if (!w)
48      return;
49    typedef std::multimap<double, std::pair<bool, double> > Map;
50    Map::value_type element(x, std::make_pair(target, w));
51    Map::iterator lower = multimap_.lower_bound(x);
52    if (lower!=multimap_.end() && lower->first == x)
53      has_ties_ = true;
54    multimap_.insert(lower, element);
55    //    multimap_.insert(std::make_pair(x, std::make_pair(target, w)));
56    if (target)
57      w_pos_+=w;
58    else
59      w_neg_+=w;
60    area_=-1;
61  }
62
63
64  double ROC::area(void)
65  {
66    if (area_==-1){
67      AUC auc(false);
68      area_=auc.score(multimap_);
69    }
70    return area_;
71  }
72
73
74  double ROC::get_p_approx(double x) const
75  {
76    // make x standard normal
77    x -= 0.5;
78    // Not integrating from the middle of the bin, but from the inner edge.
79    if (x>0)
80      x -= 0.5/(n_pos()*n_neg());
81    else if(x<0)
82      x += 0.5/(n_pos()*n_neg());
83    else
84      return 0.5;
85    double var = 1.0+n();
86    if (has_ties_) {
87      double correction = 0;
88      Map::const_iterator first = multimap_.begin();
89      Map::const_iterator last = multimap_.begin();
90      while (first!=multimap_.end()) {
91        size_t n = 0;
92        while (first->first == last->first) {
93          ++n;
94          ++last;
95        }
96        correction += n * (n-1) * (n+1);
97        first = last;
98      }
99      /*
100        mn(N+1)/12-[mn/(12N(N-1)) * sum(t(t-1)(t+1))] =
101        mn/12 [ N+1 - 1/(N(N-1)) * sum(t(t-1)(t+1)) ]
102      */
103      var -= correction/(n() * (n()-1));
104    }
105    var = var / (12*n_pos()*n_neg());
106    return gsl_cdf_gaussian_Q(x, std::sqrt(var));
107  }
108
109  double ROC::get_p_exact(const double block, const double nof_pos, 
110                          const double nof_neg) const
111  {
112    if (block <= 0.0)
113      return 1.0;
114    if (block > nof_neg*nof_pos)
115      return 0.0;
116    double p1 = get_p_exact(block-nof_neg, nof_pos-1, nof_neg);
117    double p2 = get_p_exact(block, nof_pos, nof_neg-1);
118    return nof_pos/(nof_pos+nof_neg)*p1 + nof_neg/(nof_pos+nof_neg)*p2;
119  }
120
121  unsigned int& ROC::minimum_size(void)
122  {
123    return minimum_size_;
124  }
125
126
127  const unsigned int& ROC::minimum_size(void) const
128  {
129    return minimum_size_;
130  }
131
132
133  double ROC::n(void) const
134  {
135    return n_pos()+n_neg();
136  }
137
138
139  double ROC::n_neg(void) const
140  {
141    return w_neg_;
142  }
143
144
145  double ROC::n_pos(void) const
146  {
147    return w_pos_;
148  }
149
150
151  double ROC::p_value() const
152  {
153    double p = 2*p_value_one_sided();
154    return std::min(p, 2-p); 
155  }
156
157
158  double ROC::p_value_one_sided() const
159  {
160    double area(area_);
161    if (area_==-1){
162      AUC auc(false);
163      area = auc.score(multimap_);
164    }
165    if (n_pos() < minimum_size_ && n_neg() < minimum_size_) {
166      // for small areas we calculate probabilitu to get larger area -
167      // not larger or equal.
168      if (area<0.5)
169        return 1-get_p_exact((1-area)*n_pos()*n_neg(), n_pos(), n_neg());
170      return get_p_exact(area*n_pos()*n_neg(), n_pos(), n_neg());
171    }
172    return get_p_approx(area);
173  }
174
175
176  void ROC::reset(void)
177  {
178    area_=-1;
179    has_ties_ = false;
180    w_pos_=0;
181    w_neg_=0;
182    multimap_.clear();
183  }
184
185}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.