source: trunk/yat/statistics/ROC.h @ 747

Last change on this file since 747 was 747, checked in by Peter, 15 years ago

replaced includes in header files with forward declarations when possible. Added some includes in cc files.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1#ifndef _theplu_yat_statistics_roc_
2#define _theplu_yat_statistics_roc_
3
4// $Id: ROC.h 747 2007-02-11 13:26:41Z peter $
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "Score.h"
28
29#include <utility>
30#include <vector>
31
32namespace theplu {
33namespace yat {
34namespace classifier {
35  class Target;
36}
37namespace utility {
38  class vector;
39}
40namespace statistics { 
41
42  ///
43  /// Class for ROC (Reciever Operating Characteristic).
44  ///   
45  /// As the area under an ROC curve is equivalent to Mann-Whitney U
46  /// statistica, this class can be used to perform a Mann-Whitney
47  /// U-test (aka Wilcoxon).
48  ///
49  class ROC : public Score
50  {
51 
52  public:
53    ///
54    /// @brief Default constructor
55    ///
56    ROC(bool absolute=true);
57         
58    ///
59    /// @brief The destructor
60    ///
61    virtual ~ROC(void);
62         
63    ///
64    /// minimum_size is the threshold for when a normal
65    /// approximation is used for the p-value calculation.
66    ///
67    /// @return reference to minimum_size
68    ///
69    u_int& minimum_size(void);
70
71    ///
72    /// @return number of samples
73    ///
74    size_t n(void) const;
75
76    ///
77    /// @return number of positive samples (Target.binary()==true)
78    ///
79    size_t n_pos(void) const;
80
81    ///
82    ///Calculates the p-value, i.e. the probability of observing an
83    ///area equally or larger if the null hypothesis is true. If P is
84    ///near zero, this casts doubt on this hypothesis. The null
85    ///hypothesis is that the values from the 2 classes are generated
86    ///from 2 identical distributions. The alternative is that the
87    ///median of the first distribution is shifted from the median of
88    ///the second distribution by a non-zero amount. If the smallest
89    ///group size is larger than minimum_size (default = 10), then P
90    ///is calculated using a normal approximation.  @return the
91    ///one-sided p-value( if absolute true is used this is equivalent
92    ///to the two-sided p-value.)
93    ///
94    double p_value(void) const;
95   
96    /// Function taking \a value, \a target (+1 or -1) and vector
97    /// defining what samples to use. The score is equivalent to
98    /// Mann-Whitney statistics.
99    /// @return the area under the ROC curve. If the area is less
100    /// than 0.5 and absolute=true, 1-area is returned. Complexity is
101    /// \f$ N\log N \f$ where \f$ N \f$ is number of samples.
102    ///
103    double score(const classifier::Target& target, 
104                 const utility::vector& value); 
105   
106    /**
107        Function taking values, target, weight and a vector defining
108        what samples to use. The area is defines as \f$ \frac{\sum
109        w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
110        over all pairs where value+ is larger than value-. The
111        denominator goes over all pairs. If target is equal to 1,
112        sample belonges to class + otherwise sample belongs to class
113        -. @return wheighted version of area under the ROC curve. If
114        the area is less than 0.5 and absolute=true, 1-area is
115        returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
116        of samples.
117    */
118    double score(const classifier::Target& target, 
119                 const classifier::DataLookupWeighted1D& value); 
120
121    /**
122        Function taking values, target, weight and a vector defining
123        what samples to use. The area is defines as \f$ \frac{\sum
124        w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
125        over all pairs where value+ is larger than value-. The
126        denominator goes over all pairs. If target is equal to 1,
127        sample belonges to class + otherwise sample belongs to class
128        -. @return wheighted version of area under the ROC curve. If
129        the area is less than 0.5 and absolute=true, 1-area is
130        returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
131        of samples.
132    */
133    double score(const classifier::Target& target, 
134                 const utility::vector& value, 
135                 const utility::vector& weight); 
136
137    ///
138    /// Function returning true if target is positive (binary()) for
139    /// the sample with ith lowest data value, so i=0 corresponds to
140    /// the sample with the lowest data value and i=n()-1 the sample
141    /// with highest data value.
142    ///
143    bool target(const size_t i) const;
144
145  private:
146   
147    /// Implemented as in MatLab 13.1
148    double get_p_approx(const double) const;
149
150    /// Implemented as in MatLab 13.1
151    double get_p_exact(const double, const double, const double) const;
152
153    double area_;
154    u_int minimum_size_;
155    u_int nof_pos_;
156    std::vector<std::pair<bool, double> > vec_pair_; // class-value-pair
157  };
158
159  ///
160  /// The output operator for the ROC class. The output is an Nx2
161  /// matrix, where the first column is the sensitivity and second
162  /// is the specificity.
163  ///
164  std::ostream& operator<< (std::ostream& s, const ROC&);
165
166}}} // of namespace statistics, yat, and theplu
167
168#endif
Note: See TracBrowser for help on using the repository browser.