source: trunk/yat/statistics/ROC.h @ 821

Last change on this file since 821 was 821, checked in by Peter, 15 years ago

Modified ROC class to use AUC class in calculation of ROC area. Refs #101

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.9 KB
Line 
1#ifndef _theplu_yat_statistics_roc_
2#define _theplu_yat_statistics_roc_
3
4// $Id: ROC.h 821 2007-03-18 16:00:05Z peter $
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include <algorithm>
28#include <map>
29#include <utility>
30
31namespace theplu {
32namespace yat {
33namespace classifier {
34  class DataLookup1D;
35  class DataLookupWeighted1D;
36  class Target;
37}
38namespace utility {
39  class vector;
40}
41namespace statistics { 
42
43  ///
44  /// @brief Class for Reciever Operating Characteristic.
45  ///   
46  /// As the area under an ROC curve is equivalent to Mann-Whitney U
47  /// statistica, this class can be used to perform a Mann-Whitney
48  /// U-test (aka Wilcoxon).
49  ///
50  class ROC
51  {
52 
53  public:
54    ///
55    /// @brief Default constructor
56    ///
57    ROC(void);
58         
59    ///
60    /// @brief The destructor
61    ///
62    virtual ~ROC(void);
63         
64    /**
65       Adding a data value to ROC.
66    */
67    void add(double value, bool target, double weight=1.0);
68
69    /**
70       The area is defines as \f$ \frac{\sum w^+w^-} {\sum w^+w^-}\f$,
71       where the sum in the numerator goes over all pairs where value+
72       is larger than value-. The denominator goes over all pairs.
73
74       @return Area under curve.
75    */
76    double area(void);
77
78    ///
79    /// minimum_size is the threshold for when a normal
80    /// approximation is used for the p-value calculation.
81    ///
82    /// @return reference to minimum_size
83    ///
84    u_int& minimum_size(void);
85
86    /**
87       minimum_size is the threshold for when a normal
88       approximation is used for the p-value calculation.
89       
90       @return const reference to minimum_size
91    */
92    const u_int& minimum_size(void) const;
93
94    ///
95    /// @return sum of weights
96    ///
97    size_t n(void) const;
98
99    ///
100    /// @return sum of weights with negative target
101    ///
102    size_t n_neg(void) const;
103
104    ///
105    /// @return sum of weights with positive target
106    ///
107    size_t n_pos(void) const;
108
109    ///
110    ///Calculates the p-value, i.e. the probability of observing an
111    ///area equally or larger if the null hypothesis is true. If P is
112    ///near zero, this casts doubt on this hypothesis. The null
113    ///hypothesis is that the values from the 2 classes are generated
114    ///from 2 identical distributions. The alternative is that the
115    ///median of the first distribution is shifted from the median of
116    ///the second distribution by a non-zero amount. If the smallest
117    ///group size is larger than minimum_size (default = 10), then P
118    ///is calculated using a normal approximation. 
119    ///
120    /// \note Weights should be either zero or unity, else present
121    /// implementation is nonsense.
122    ///
123    /// @return One-sided p-value.
124    ///
125    double p_value_one_sided(void) const;
126   
127    /**
128       @brief Two-sided p-value.
129
130       @return min(2*p_value_one_sided, 2-2*p_value_one_sided)
131    */
132    double p_value(void) const;
133
134    /**
135       @brief Set everything to zero
136    */
137    void reset(void);
138
139  private:
140   
141    /// Implemented as in MatLab 13.1
142    double get_p_approx(double) const;
143
144    /// Implemented as in MatLab 13.1
145    double get_p_exact(const double, const double, const double) const;
146
147    double area_;
148    u_int minimum_size_;
149    double w_neg_;
150    double w_pos_;
151    // <data pair<class, weight> >
152    std::multimap<double, std::pair<bool, double> > multimap_;
153  };
154
155}}} // of namespace statistics, yat, and theplu
156
157#endif
Note: See TracBrowser for help on using the repository browser.