source: trunk/yat/statistics/ROC.h @ 1141

Last change on this file since 1141 was 1141, checked in by Peter, 14 years ago

adding iterator to ROC interface - refs #292

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1#ifndef _theplu_yat_statistics_roc_
2#define _theplu_yat_statistics_roc_
3
4// $Id: ROC.h 1141 2008-02-25 13:22:25Z peter $
5
6/*
7  Copyright (C) 2004 Peter Johansson
8  Copyright (C) 2005, 2006 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "yat/classifier/Target.h"
30#include "yat/utility/iterator_traits.h"
31
32#include <algorithm>
33#include <map>
34#include <utility>
35
36namespace theplu {
37namespace yat {
38namespace statistics { 
39
40  ///
41  /// @brief Class for Reciever Operating Characteristic.
42  ///   
43  /// As the area under an ROC curve is equivalent to Mann-Whitney U
44  /// statistica, this class can be used to perform a Mann-Whitney
45  /// U-test (aka Wilcoxon).
46  ///
47  class ROC
48  {
49 
50  public:
51    ///
52    /// @brief Default constructor
53    ///
54    ROC(void);
55         
56    ///
57    /// @brief The destructor
58    ///
59    virtual ~ROC(void);
60         
61    /**
62       Adding a data value to ROC.
63    */
64    void add(double value, bool target, double weight=1.0);
65
66    /**
67       The area is defines as \f$ \frac{\sum w^+w^-} {\sum w^+w^-}\f$,
68       where the sum in the numerator goes over all pairs where value+
69       is larger than value-. The denominator goes over all pairs.
70
71       @return Area under curve.
72    */
73    double area(void);
74
75    ///
76    /// minimum_size is the threshold for when a normal
77    /// approximation is used for the p-value calculation.
78    ///
79    /// @return reference to minimum_size
80    ///
81    u_int& minimum_size(void);
82
83    /**
84       minimum_size is the threshold for when a normal
85       approximation is used for the p-value calculation.
86       
87       @return const reference to minimum_size
88    */
89    const u_int& minimum_size(void) const;
90
91    ///
92    /// @return sum of weights
93    ///
94    double n(void) const;
95
96    ///
97    /// @return sum of weights with negative target
98    ///
99    double n_neg(void) const;
100
101    ///
102    /// @return sum of weights with positive target
103    ///
104    double n_pos(void) const;
105
106    ///
107    ///Calculates the p-value, i.e. the probability of observing an
108    ///area equally or larger if the null hypothesis is true. If P is
109    ///near zero, this casts doubt on this hypothesis. The null
110    ///hypothesis is that the values from the 2 classes are generated
111    ///from 2 identical distributions. The alternative is that the
112    ///median of the first distribution is shifted from the median of
113    ///the second distribution by a non-zero amount. If the smallest
114    ///group size is larger than minimum_size (default = 10), then P
115    ///is calculated using a normal approximation. 
116    ///
117    /// \note Weights should be either zero or unity, else present
118    /// implementation is nonsense.
119    ///
120    /// @return One-sided p-value.
121    ///
122    double p_value_one_sided(void) const;
123   
124    /**
125       @brief Two-sided p-value.
126
127       @return min(2*p_value_one_sided, 2-2*p_value_one_sided)
128    */
129    double p_value(void) const;
130
131    /**
132       @brief Set everything to zero
133    */
134    void reset(void);
135
136  private:
137   
138    /// Implemented as in MatLab 13.1
139    double get_p_approx(double) const;
140
141    /// Implemented as in MatLab 13.1
142    double get_p_exact(const double, const double, const double) const;
143
144    double area_;
145    u_int minimum_size_;
146    double w_neg_;
147    double w_pos_;
148    // <data pair<class, weight> >
149    std::multimap<double, std::pair<bool, double> > multimap_;
150  };
151
152  /**
153     Add a range [first, last) of values to ROC. The first last-first
154     elements in Target.binary are used.
155   */
156  template<typename ForwardIterator>
157  void add(ROC& roc, 
158           ForwardIterator first, ForwardIterator last, 
159           const classifier::Target& target)
160  {
161    for (size_t i=0; first!=last; ++i, ++first)
162      roc.add(utility::iterator_traits<ForwardIterator>().data(),
163              target.binary(i), 
164              utility::iterator_traits<ForwardIterator>().weight());
165  }
166}}} // of namespace statistics, yat, and theplu
167
168#endif
Note: See TracBrowser for help on using the repository browser.