source: trunk/yat/statistics/ROC.h @ 1486

Last change on this file since 1486 was 1486, checked in by Jari Häkkinen, 13 years ago

Addresses #436.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.1 KB
Line 
1#ifndef _theplu_yat_statistics_roc_
2#define _theplu_yat_statistics_roc_
3
4// $Id: ROC.h 1486 2008-09-09 21:17:19Z jari $
5
6/*
7  Copyright (C) 2004 Peter Johansson
8  Copyright (C) 2005, 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "yat/classifier/Target.h"
29#include "yat/utility/iterator_traits.h"
30
31#include <algorithm>
32#include <map>
33#include <utility>
34
35namespace theplu {
36namespace yat {
37namespace statistics { 
38
39  ///
40  /// @brief Class for Reciever Operating Characteristic.
41  ///   
42  /// As the area under an ROC curve is equivalent to Mann-Whitney U
43  /// statistica, this class can be used to perform a Mann-Whitney
44  /// U-test (aka Wilcoxon).
45  ///
46  class ROC
47  {
48 
49  public:
50    ///
51    /// @brief Default constructor
52    ///
53    ROC(void);
54         
55    ///
56    /// @brief The destructor
57    ///
58    virtual ~ROC(void);
59         
60    /**
61       Adding a data value to ROC.
62
63       \see add(T &o, ForwardIterator first, ForwardIterator last,
64       const classifier::Target &target)
65    */
66    void add(double value, bool target, double weight=1.0);
67
68    /**
69       The area is defines as \f$ \frac{\sum w^+w^-} {\sum w^+w^-}\f$,
70       where the sum in the numerator goes over all pairs where value+
71       is larger than value-. The denominator goes over all pairs.
72
73       @return Area under curve.
74    */
75    double area(void);
76
77    ///
78    /// minimum_size is the threshold for when a normal
79    /// approximation is used for the p-value calculation.
80    ///
81    /// @return reference to minimum_size
82    ///
83    unsigned int& minimum_size(void);
84
85    /**
86       minimum_size is the threshold for when a normal
87       approximation is used for the p-value calculation.
88       
89       @return const reference to minimum_size
90    */
91    const unsigned int& minimum_size(void) const;
92
93    ///
94    /// @return sum of weights
95    ///
96    double n(void) const;
97
98    ///
99    /// @return sum of weights with negative target
100    ///
101    double n_neg(void) const;
102
103    ///
104    /// @return sum of weights with positive target
105    ///
106    double n_pos(void) const;
107
108    ///
109    ///Calculates the p-value, i.e. the probability of observing an
110    ///area equally or larger if the null hypothesis is true. If P is
111    ///near zero, this casts doubt on this hypothesis. The null
112    ///hypothesis is that the values from the 2 classes are generated
113    ///from 2 identical distributions. The alternative is that the
114    ///median of the first distribution is shifted from the median of
115    ///the second distribution by a non-zero amount. If the smallest
116    ///group size is larger than minimum_size (default = 10), then P
117    ///is calculated using a normal approximation. 
118    ///
119    /// \note Weights should be either zero or unity, else present
120    /// implementation is nonsense.
121    ///
122    /// @return One-sided p-value.
123    ///
124    double p_value_one_sided(void) const;
125   
126    /**
127       @brief Two-sided p-value.
128
129       @return min(2*p_value_one_sided, 2-2*p_value_one_sided)
130    */
131    double p_value(void) const;
132
133    /**
134       @brief Set everything to zero
135    */
136    void reset(void);
137
138  private:
139   
140    /// Implemented as in MatLab 13.1
141    double get_p_approx(double) const;
142
143    /// Implemented as in MatLab 13.1
144    double get_p_exact(const double, const double, const double) const;
145
146    double area_;
147    unsigned int minimum_size_;
148    double w_neg_;
149    double w_pos_;
150    // <data pair<class, weight> >
151    std::multimap<double, std::pair<bool, double> > multimap_;
152  };
153
154}}} // of namespace statistics, yat, and theplu
155
156#endif
Note: See TracBrowser for help on using the repository browser.