source: trunk/yat/statistics/ROC.cc @ 2564

Last change on this file since 2564 was 2564, checked in by Peter, 10 years ago

update copyright years

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.9 KB
Line 
1// $Id: ROC.cc 2564 2011-09-25 20:03:41Z peter $
2
3/*
4  Copyright (C) 2004, 2005 Peter Johansson
5  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
6  Copyright (C) 2011 Peter Johansson
7
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22*/
23
24#include <iostream>
25
26#include "ROC.h"
27#include "AUC.h"
28
29#include <gsl/gsl_cdf.h>
30
31#include <cassert>
32#include <cmath>
33#include <utility>
34#include <vector>
35
36namespace theplu {
37namespace yat {
38namespace statistics { 
39
40  ROC::ROC(void) 
41    :minimum_size_(10)
42  {
43    reset();
44  }
45
46
47  void ROC::add(double x, bool target, double w)
48  {
49    if (!w)
50      return;
51    typedef std::multimap<double, std::pair<bool, double> > Map;
52    Map::value_type element(x, std::make_pair(target, w));
53    Map::iterator lower = multimap_.lower_bound(x);
54    if (lower!=multimap_.end() && lower->first == x)
55      has_ties_ = true;
56    multimap_.insert(lower, element);
57    //    multimap_.insert(std::make_pair(x, std::make_pair(target, w)));
58    if (target)
59      w_pos_+=w;
60    else
61      w_neg_+=w;
62    area_=-1;
63  }
64
65
66  double ROC::area(void)
67  {
68    if (area_==-1){
69      AUC auc(false);
70      area_=auc.score(multimap_);
71    }
72    return area_;
73  }
74
75
76  double ROC::get_p_approx(double x) const
77  {
78    // make x standard normal
79    x -= 0.5;
80    // Not integrating from the middle of the bin, but from the inner edge.
81    if (x>0)
82      x -= 0.5/(n_pos()*n_neg());
83    else if(x<0)
84      x += 0.5/(n_pos()*n_neg());
85    else
86      return 0.5;
87    double var = 1.0+n();
88    if (has_ties_) {
89      double correction = 0;
90      Map::const_iterator first = multimap_.begin();
91      Map::const_iterator last = multimap_.begin();
92      while (first!=multimap_.end()) {
93        size_t n = 0;
94        while (first->first == last->first) {
95          ++n;
96          ++last;
97        }
98        correction += n * (n-1) * (n+1);
99        first = last;
100      }
101      /*
102        mn(N+1)/12-[mn/(12N(N-1)) * sum(t(t-1)(t+1))] =
103        mn/12 [ N+1 - 1/(N(N-1)) * sum(t(t-1)(t+1)) ]
104      */
105      var -= correction/(n() * (n()-1));
106    }
107    var = var / (12*n_pos()*n_neg());
108    return gsl_cdf_gaussian_Q(x, std::sqrt(var));
109  }
110
111  double ROC::get_p_exact(const double block, const double nof_pos, 
112                          const double nof_neg) const
113  {
114    if (block <= 0.0)
115      return 1.0;
116    if (block > nof_neg*nof_pos)
117      return 0.0;
118    double p1 = get_p_exact(block-nof_neg, nof_pos-1, nof_neg);
119    double p2 = get_p_exact(block, nof_pos, nof_neg-1);
120    return nof_pos/(nof_pos+nof_neg)*p1 + nof_neg/(nof_pos+nof_neg)*p2;
121  }
122
123  unsigned int& ROC::minimum_size(void)
124  {
125    return minimum_size_;
126  }
127
128
129  const unsigned int& ROC::minimum_size(void) const
130  {
131    return minimum_size_;
132  }
133
134
135  double ROC::n(void) const
136  {
137    return n_pos()+n_neg();
138  }
139
140
141  double ROC::n_neg(void) const
142  {
143    return w_neg_;
144  }
145
146
147  double ROC::n_pos(void) const
148  {
149    return w_pos_;
150  }
151
152
153  double ROC::p_value() const
154  {
155    double p = 2*p_value_one_sided();
156    return std::min(p, 2-p); 
157  }
158
159
160  double ROC::p_value_one_sided() const
161  {
162    double area(area_);
163    if (area_==-1){
164      AUC auc(false);
165      area = auc.score(multimap_);
166    }
167    if (n_pos() < minimum_size_ && n_neg() < minimum_size_) {
168      // for small areas we calculate probabilitu to get larger area -
169      // not larger or equal.
170      if (area<0.5)
171        return 1-get_p_exact((1-area)*n_pos()*n_neg(), n_pos(), n_neg());
172      return get_p_exact(area*n_pos()*n_neg(), n_pos(), n_neg());
173    }
174    return get_p_approx(area);
175  }
176
177
178  void ROC::reset(void)
179  {
180    area_=-1;
181    has_ties_ = false;
182    w_pos_=0;
183    w_neg_=0;
184    multimap_.clear();
185  }
186
187}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.