source: trunk/lib/statistics/ROC.h @ 447

Last change on this file since 447 was 447, checked in by Peter, 17 years ago

added copy constructor for KernelView? and added construction of KernelView? in test

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1// $Id: ROC.h 447 2005-12-15 18:51:18Z peter $
2
3#ifndef _theplu_statistics_roc_
4#define _theplu_statistics_roc_
5
6#include <c++_tools/gslapi/vector.h>
7#include <c++_tools/statistics/Score.h>
8
9#include <utility>
10#include <vector>
11
12namespace theplu {
13namespace statistics { 
14
15  ///
16  /// Class for ROC (Reciever Operating Characteristic).
17  ///   
18  class ROC : public Score
19  {
20 
21  public:
22    ///
23    /// Default constructor
24    ///
25    ROC(bool absolute=true);
26         
27    ///
28    /// Destructor
29    ///
30    virtual ~ROC(void) {};
31         
32    /// Function taking \a value, \a target (+1 or -1) and vector
33    /// defining what samples to use. The score is equivalent to
34    /// Mann-Whitney statistics. If target is equal to 1, sample
35    /// belonges to class + otherwise sample belongs to class
36    /// -. @return the area under the ROC curve. If the area is less
37    /// than 0.5 and absolute=true, 1-area is returned.
38    ///
39    double score(const gslapi::vector& target, const gslapi::vector& value, 
40                 const std::vector<size_t>& = std::vector<size_t>());
41   
42    /// Function taking values, target, weight and a vector defining
43    /// what samples to use. The area is defines as \f$ \frac{\sum
44    /// w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
45    /// over all pairs where value+ is larger than value-. The
46    /// denominator goes over all pairs. If target is equal to 1,
47    /// sample belonges to class + otherwise sample belongs to class
48    /// -. @return wheighted version of area under the ROC curve. If
49    /// the area is less than 0.5 and absolute=true, 1-area is
50    /// returned.
51    ///
52    double score(const gslapi::vector& target, const gslapi::vector& value,
53                 const gslapi::vector& weight, 
54                 const std::vector<size_t>& = std::vector<size_t>());
55       
56
57    ///
58    ///Calculates the p-value, i.e. the probability of observing an
59    ///area equally or larger if the null hypothesis is true. If P is
60    ///near zero, this casts doubt on this hypothesis. The null
61    ///hypothesis is that the values from the 2 classes are generated
62    ///from 2 identical distributions. The alternative is that the
63    ///median of the first distribution is shifted from the median of
64    ///the second distribution by a non-zero amount. If the smallest
65    ///group size is larger than minimum_size (default = 10), then P
66    ///is calculated using a normal approximation.  @return the
67    ///one-sided p-value( if absolute true is used this is equivalent
68    ///to the two-sided p-value.)
69    ///
70    double p_value(void) const;
71         
72    ///
73    /// @return the targets in train_set sorted with respect to the
74    /// corresponding data
75    ///
76    gslapi::vector target(void) const;
77
78    ///
79    /// minimum_size is the threshold for when a normal
80    /// approximation is used for the p-value calculation.
81    ///
82    /// @return reference to minimum_size
83    ///
84    inline u_int& minimum_size(void){ return minimum_size_; } 
85
86  private:
87    double area_;
88    u_int minimum_size_;
89    u_int nof_pos_;
90    /// pair of target and data. should always be sorted with respect to
91    /// data.
92    std::vector<std::pair<double, double> > value_; 
93   
94    /// Implemented as in MatLab 13.1
95    double get_p_approx(const double) const;
96
97    /// Implemented as in MatLab 13.1
98    double get_p_exact(const double, const double, const double) const;
99
100    /// sorting value_, should always be done when changing train_set_
101    void sort(void);
102       
103  };
104
105  ///
106  /// The output operator for the ROC class. The output is an Nx2
107  /// matrix, where the first column is the sensitivity and second
108  /// is the specificity.
109  ///
110  std::ostream& operator<< (std::ostream& s, const ROC&);
111
112
113}} // of namespace statistics and namespace theplu
114
115#endif
116
Note: See TracBrowser for help on using the repository browser.