source: trunk/yat/statistics/ROC.h @ 703

Last change on this file since 703 was 703, checked in by Jari Häkkinen, 15 years ago

Addresses #65 and #170.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1#ifndef _theplu_yat_statistics_roc_
2#define _theplu_yat_statistics_roc_
3
4// $Id: ROC.h 703 2006-12-18 00:47:44Z jari $
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "Score.h"
28#include "yat/classifier/Target.h"
29
30#include <utility>
31#include <vector>
32
33namespace theplu {
34namespace yat {
35  namespace utility {
36    class vector;
37  }
38namespace statistics { 
39
40  ///
41  /// Class for ROC (Reciever Operating Characteristic).
42  ///   
43  /// As the area under an ROC curve is equivalent to Mann-Whitney U
44  /// statistica, this class can be used to perform a Mann-Whitney
45  /// U-test (aka Wilcoxon).
46  ///
47  class ROC : public Score
48  {
49 
50  public:
51    ///
52    /// @brief Default constructor
53    ///
54    ROC(bool absolute=true);
55         
56    ///
57    /// @brief The destructor
58    ///
59    virtual ~ROC(void);
60         
61    /// Function taking \a value, \a target (+1 or -1) and vector
62    /// defining what samples to use. The score is equivalent to
63    /// Mann-Whitney statistics.
64    /// @return the area under the ROC curve. If the area is less
65    /// than 0.5 and absolute=true, 1-area is returned. Complexity is
66    /// \f$ N\log N \f$ where \f$ N \f$ is number of samples.
67    ///
68    double score(const classifier::Target& target, 
69                 const utility::vector& value); 
70   
71    /**
72        Function taking values, target, weight and a vector defining
73        what samples to use. The area is defines as \f$ \frac{\sum
74        w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
75        over all pairs where value+ is larger than value-. The
76        denominator goes over all pairs. If target is equal to 1,
77        sample belonges to class + otherwise sample belongs to class
78        -. @return wheighted version of area under the ROC curve. If
79        the area is less than 0.5 and absolute=true, 1-area is
80        returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
81        of samples.
82    */
83    double score(const classifier::Target& target, 
84                 const classifier::DataLookupWeighted1D& value); 
85       
86
87    /**
88        Function taking values, target, weight and a vector defining
89        what samples to use. The area is defines as \f$ \frac{\sum
90        w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
91        over all pairs where value+ is larger than value-. The
92        denominator goes over all pairs. If target is equal to 1,
93        sample belonges to class + otherwise sample belongs to class
94        -. @return wheighted version of area under the ROC curve. If
95        the area is less than 0.5 and absolute=true, 1-area is
96        returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
97        of samples.
98    */
99    double score(const classifier::Target& target, 
100                 const utility::vector& value, 
101                 const utility::vector& weight); 
102       
103
104    ///
105    ///Calculates the p-value, i.e. the probability of observing an
106    ///area equally or larger if the null hypothesis is true. If P is
107    ///near zero, this casts doubt on this hypothesis. The null
108    ///hypothesis is that the values from the 2 classes are generated
109    ///from 2 identical distributions. The alternative is that the
110    ///median of the first distribution is shifted from the median of
111    ///the second distribution by a non-zero amount. If the smallest
112    ///group size is larger than minimum_size (default = 10), then P
113    ///is calculated using a normal approximation.  @return the
114    ///one-sided p-value( if absolute true is used this is equivalent
115    ///to the two-sided p-value.)
116    ///
117    double p_value(void) const;
118   
119    ///
120    /// minimum_size is the threshold for when a normal
121    /// approximation is used for the p-value calculation.
122    ///
123    /// @return reference to minimum_size
124    ///
125    inline u_int& minimum_size(void){ return minimum_size_; } 
126
127    ///
128    /// Function returning true if target is positive (binary()) for
129    /// the sample with ith lowest data value, so i=0 corresponds to
130    /// the sample with the lowest data value and i=n()-1 the sample
131    /// with highest data value.
132    ///
133    bool target(const size_t i) const;
134
135    ///
136    /// @return number of samples
137    ///
138    inline size_t n(void) const { return vec_pair_.size(); }
139
140    ///
141    /// @return number of positive samples (Target.binary()==true)
142    ///
143    inline size_t n_pos(void) const { return nof_pos_; }
144
145  private:
146   
147    /// Implemented as in MatLab 13.1
148    double get_p_approx(const double) const;
149
150    /// Implemented as in MatLab 13.1
151    double get_p_exact(const double, const double, const double) const;
152
153    double area_;
154    u_int minimum_size_;
155    u_int nof_pos_;
156    std::vector<std::pair<bool, double> > vec_pair_; // class-value-pair
157  };
158
159  ///
160  /// The output operator for the ROC class. The output is an Nx2
161  /// matrix, where the first column is the sensitivity and second
162  /// is the specificity.
163  ///
164  std::ostream& operator<< (std::ostream& s, const ROC&);
165
166}}} // of namespace statistics, yat, and theplu
167
168#endif
Note: See TracBrowser for help on using the repository browser.