source: branches/0.3.1/yat/statistics/ROC.h @ 843

Last change on this file since 843 was 843, checked in by Peter, 16 years ago

Fixes #221. regression_test fails on peter@lev (see refs #222), yet I commit this change.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1#ifndef _theplu_yat_statistics_roc_
2#define _theplu_yat_statistics_roc_
3
4// $Id: ROC.h 843 2007-04-25 14:10:44Z peter $
5
6/*
7  Copyright (C) 2004 Peter Johansson
8  Copyright (C) 2005, 2006 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include <algorithm>
30#include <map>
31#include <utility>
32
33namespace theplu {
34namespace yat {
35namespace classifier {
36  class DataLookup1D;
37  class DataLookupWeighted1D;
38  class Target;
39}
40namespace utility {
41  class vector;
42}
43namespace statistics { 
44
45  ///
46  /// @brief Class for Reciever Operating Characteristic.
47  ///   
48  /// As the area under an ROC curve is equivalent to Mann-Whitney U
49  /// statistica, this class can be used to perform a Mann-Whitney
50  /// U-test (aka Wilcoxon).
51  ///
52  class ROC
53  {
54 
55  public:
56    ///
57    /// @brief Default constructor
58    ///
59    ROC(void);
60         
61    ///
62    /// @brief The destructor
63    ///
64    virtual ~ROC(void);
65         
66    /**
67       Adding a data value to ROC.
68    */
69    void add(double value, bool target, double weight=1.0);
70
71    /**
72       The area is defines as \f$ \frac{\sum w^+w^-} {\sum w^+w^-}\f$,
73       where the sum in the numerator goes over all pairs where value+
74       is larger than value-. The denominator goes over all pairs.
75
76       @return Area under curve.
77    */
78    double area(void);
79
80    ///
81    /// minimum_size is the threshold for when a normal
82    /// approximation is used for the p-value calculation.
83    ///
84    /// @return reference to minimum_size
85    ///
86    u_int& minimum_size(void);
87
88    /**
89       minimum_size is the threshold for when a normal
90       approximation is used for the p-value calculation.
91       
92       @return const reference to minimum_size
93    */
94    const u_int& minimum_size(void) const;
95
96    ///
97    /// @return sum of weights
98    ///
99    double n(void) const;
100
101    ///
102    /// @return sum of weights with negative target
103    ///
104    double n_neg(void) const;
105
106    ///
107    /// @return sum of weights with positive target
108    ///
109    double n_pos(void) const;
110
111    ///
112    ///Calculates the p-value, i.e. the probability of observing an
113    ///area equally or larger if the null hypothesis is true. If P is
114    ///near zero, this casts doubt on this hypothesis. The null
115    ///hypothesis is that the values from the 2 classes are generated
116    ///from 2 identical distributions. The alternative is that the
117    ///median of the first distribution is shifted from the median of
118    ///the second distribution by a non-zero amount. If the smallest
119    ///group size is larger than minimum_size (default = 10), then P
120    ///is calculated using a normal approximation. 
121    ///
122    /// \note Weights should be either zero or unity, else present
123    /// implementation is nonsense.
124    ///
125    /// @return One-sided p-value.
126    ///
127    double p_value_one_sided(void) const;
128   
129    /**
130       @brief Two-sided p-value.
131
132       @return min(2*p_value_one_sided, 2-2*p_value_one_sided)
133    */
134    double p_value(void) const;
135
136    /**
137       @brief Set everything to zero
138    */
139    void reset(void);
140
141  private:
142   
143    /// Implemented as in MatLab 13.1
144    double get_p_approx(double) const;
145
146    /// Implemented as in MatLab 13.1
147    double get_p_exact(const double, const double, const double) const;
148
149    double area_;
150    u_int minimum_size_;
151    double w_neg_;
152    double w_pos_;
153    // <data pair<class, weight> >
154    std::multimap<double, std::pair<bool, double> > multimap_;
155  };
156
157}}} // of namespace statistics, yat, and theplu
158
159#endif
Note: See TracBrowser for help on using the repository browser.