source: trunk/src/ROC.h @ 112

Last change on this file since 112 was 112, checked in by Peter, 18 years ago

added the choice to not use all data points but just the train_set

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.3 KB
Line 
1// $Id: ROC.h 112 2004-07-07 10:23:44Z peter $
2
3#ifndef _theplu_cpptools_roc_
4#define _theplu_cpptools_roc_
5
6// C++ tools include
7/////////////////////
8#include "Score.h"
9#include "vector.h"
10#include <gsl/gsl_cdf.h>
11
12// Standard C++ includes
13////////////////////////
14#include <utility>
15#include <vector>
16
17namespace theplu {
18namespace cpptools { 
19  ///
20  /// Class for ROC (Reciever Operating Characteristic).
21  ///   
22 
23  class ROC : public Score
24  {
25 
26  public:
27    ///
28    /// Default constructor
29    ///
30    ROC();
31         
32    ///
33    /// Constructor taking a value vector, a target vector (+1 or -1)
34    /// and a vector defining what samples to use.
35    ///
36    ROC(const gslapi::vector&, const gslapi::vector&, 
37        const std::vector<size_t>& = std::vector<size_t>());
38         
39    ///
40    /// Destructor
41    ///
42    virtual ~ROC(void) {};
43         
44    /// Equivalent to the Mann-Whitney score, but normalized to be
45    /// between zero and one.  @return the area under the ROC curve
46    ///
47    double score() ;
48   
49    /// Function taking a vector of values and a vector of target (+1
50    /// or -1). The score is equivalent to the Mann-Whitney score but
51    /// normalized to be between zero and one. @return the area under
52    /// the ROC curve
53    ///
54    double score(const gslapi::vector&, const gslapi::vector&, 
55                 const std::vector<size_t>& = std::vector<size_t>());
56       
57    ///
58    ///Calculates the p-value, i.e. the probability of observing an area
59    ///equally or larger if the null hypothesis is true. If P is near zero,
60    ///this casts doubt on this hypothesis. The null hypothesis is that the
61    ///values from the 2 classes are generated from 2 identical
62    ///distributions. The alternative is that the median of the first
63    ///distribution is shifted from the median of the second distribution by a
64    ///non-zero amount. If the smallest group size is larger than minimum_size
65    ///(default = 10), then P is calculated using a normal approximation.
66    /// @return the one-sided p-value
67    ///
68    double p_value() ;
69         
70    ///
71    /// @return the targets in train_set sorted with respect to the
72    /// corresponding data
73    ///
74    gslapi::vector ROC::target(void) const;
75
76    ///
77    /// Changes minimum_size , i.e. the threshold when a normal
78    /// approximation is used for the p-value calculation.
79    ///
80    inline void minimum_size(const u_int minimum_size) 
81    {minimum_size_ = minimum_size; } 
82
83  private:
84    double area_;
85    gslapi::vector data_;
86    u_int minimum_size_;
87    u_int nof_pos_;
88    gslapi::vector target_;
89    std::vector<size_t> train_set_; 
90    /// pair of target and data. should always be sorted with respect to
91    /// data.
92    std::vector<std::pair<double, double> > value_; 
93   
94    ///
95    ///
96    /// Implemented as in MatLab 13.1
97    /// @return the p-value
98    ///
99    double ROC::get_p_approx(const double) const;
100   
101    ///
102    /// @return the p-value
103    ///
104    double ROC::get_p_exact(const double, const double, 
105                            const double);
106 
107    ///
108    /// sorting value_, should always be done when changing train_set_
109    ///
110    void ROC::sort();
111       
112  };
113
114  ///
115  /// The output operator for the ROC class. The output is an Nx2
116  /// matrix, where the first column is the sensitivity and second
117  /// is the specificity.
118  ///
119  std::ostream& operator<< (std::ostream& s, const ROC&);
120
121
122}} // of namespace cpptools and namespace theplu
123
124#endif
125
Note: See TracBrowser for help on using the repository browser.