source: trunk/c++_tools/statistics/ROC.h @ 675

Last change on this file since 675 was 675, checked in by Jari Häkkinen, 17 years ago

References #83. Changing project name to yat. Compilation will fail in this revision.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1#ifndef _theplu_statistics_roc_
2#define _theplu_statistics_roc_
3
4// $Id: ROC.h 675 2006-10-10 12:08:45Z jari $
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "yat/classifier/Target.h"
28#include "yat/statistics/Score.h"
29
30#include <utility>
31#include <vector>
32
33namespace theplu {
34  namespace utility {
35    class vector;
36  }
37namespace statistics { 
38
39  ///
40  /// Class for ROC (Reciever Operating Characteristic).
41  ///   
42  /// As the area under an ROC curve is equivalent to Mann-Whitney U
43  /// statistica, this class can be used to perform a Mann-Whitney
44  /// U-test (aka Wilcoxon).
45  ///
46  class ROC : public Score
47  {
48 
49  public:
50    ///
51    /// Default constructor
52    ///
53    ROC(bool absolute=true);
54         
55    ///
56    /// Destructor
57    ///
58    virtual ~ROC(void) {};
59         
60    /// Function taking \a value, \a target (+1 or -1) and vector
61    /// defining what samples to use. The score is equivalent to
62    /// Mann-Whitney statistics.
63    /// @return the area under the ROC curve. If the area is less
64    /// than 0.5 and absolute=true, 1-area is returned. Complexity is
65    /// \f$ N\log N \f$ where \f$ N \f$ is number of samples.
66    ///
67    double score(const classifier::Target& target, 
68                 const utility::vector& value); 
69   
70    /**
71        Function taking values, target, weight and a vector defining
72        what samples to use. The area is defines as \f$ \frac{\sum
73        w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
74        over all pairs where value+ is larger than value-. The
75        denominator goes over all pairs. If target is equal to 1,
76        sample belonges to class + otherwise sample belongs to class
77        -. @return wheighted version of area under the ROC curve. If
78        the area is less than 0.5 and absolute=true, 1-area is
79        returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
80        of samples.
81    */
82    double score(const classifier::Target& target, 
83                 const classifier::DataLookupWeighted1D& value); 
84       
85
86    /**
87        Function taking values, target, weight and a vector defining
88        what samples to use. The area is defines as \f$ \frac{\sum
89        w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
90        over all pairs where value+ is larger than value-. The
91        denominator goes over all pairs. If target is equal to 1,
92        sample belonges to class + otherwise sample belongs to class
93        -. @return wheighted version of area under the ROC curve. If
94        the area is less than 0.5 and absolute=true, 1-area is
95        returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
96        of samples.
97    */
98    double score(const classifier::Target& target, 
99                 const utility::vector& value, 
100                 const utility::vector& weight); 
101       
102
103    ///
104    ///Calculates the p-value, i.e. the probability of observing an
105    ///area equally or larger if the null hypothesis is true. If P is
106    ///near zero, this casts doubt on this hypothesis. The null
107    ///hypothesis is that the values from the 2 classes are generated
108    ///from 2 identical distributions. The alternative is that the
109    ///median of the first distribution is shifted from the median of
110    ///the second distribution by a non-zero amount. If the smallest
111    ///group size is larger than minimum_size (default = 10), then P
112    ///is calculated using a normal approximation.  @return the
113    ///one-sided p-value( if absolute true is used this is equivalent
114    ///to the two-sided p-value.)
115    ///
116    double p_value(void) const;
117   
118    ///
119    /// minimum_size is the threshold for when a normal
120    /// approximation is used for the p-value calculation.
121    ///
122    /// @return reference to minimum_size
123    ///
124    inline u_int& minimum_size(void){ return minimum_size_; } 
125
126    ///
127    /// Function returning true if target is positive (binary()) for
128    /// the sample with ith lowest data value, so i=0 corresponds to
129    /// the sample with the lowest data value and i=n()-1 the sample
130    /// with highest data value.
131    ///
132    bool target(const size_t i) const;
133
134    ///
135    /// @return number of samples
136    ///
137    inline size_t n(void) const { return vec_pair_.size(); }
138
139    ///
140    /// @return number of positive samples (Target.binary()==true)
141    ///
142    inline size_t n_pos(void) const { return nof_pos_; }
143
144  private:
145   
146    /// Implemented as in MatLab 13.1
147    double get_p_approx(const double) const;
148
149    /// Implemented as in MatLab 13.1
150    double get_p_exact(const double, const double, const double) const;
151
152    double area_;
153    u_int minimum_size_;
154    u_int nof_pos_;
155    std::vector<std::pair<bool, double> > vec_pair_; // class-value-pair
156  };
157
158  ///
159  /// The output operator for the ROC class. The output is an Nx2
160  /// matrix, where the first column is the sensitivity and second
161  /// is the specificity.
162  ///
163  std::ostream& operator<< (std::ostream& s, const ROC&);
164
165
166}} // of namespace statistics and namespace theplu
167
168#endif
169
Note: See TracBrowser for help on using the repository browser.