source: trunk/lib/statistics/ROC.h @ 465

Last change on this file since 465 was 465, checked in by Peter, 17 years ago

implemented score for WilcoxonFoldChange?

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.8 KB
Line 
1// $Id: ROC.h 465 2005-12-16 23:19:24Z peter $
2
3#ifndef _theplu_statistics_roc_
4#define _theplu_statistics_roc_
5
6#include <c++_tools/gslapi/vector.h>
7#include <c++_tools/statistics/Score.h>
8
9#include <utility>
10#include <vector>
11
12namespace theplu {
13namespace statistics { 
14
15  ///
16  /// Class for ROC (Reciever Operating Characteristic).
17  ///   
18  class ROC : public Score
19  {
20 
21  public:
22    ///
23    /// Default constructor
24    ///
25    ROC(bool absolute=true);
26         
27    ///
28    /// Destructor
29    ///
30    virtual ~ROC(void) {};
31         
32    /// Function taking \a value, \a target (+1 or -1) and vector
33    /// defining what samples to use. The score is equivalent to
34    /// Mann-Whitney statistics. If target is equal to 1, sample
35    /// belonges to class + otherwise sample belongs to class
36    /// -. @return the area under the ROC curve. If the area is less
37    /// than 0.5 and absolute=true, 1-area is returned. Complexity is
38    /// \f$ N\log N \f$ where \f$ N \f$ is number of samples.
39    ///
40    double score(const gslapi::vector& target, const gslapi::vector& value, 
41                 const std::vector<size_t>& = std::vector<size_t>());
42   
43    /// Function taking values, target, weight and a vector defining
44    /// what samples to use. The area is defines as \f$ \frac{\sum
45    /// w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
46    /// over all pairs where value+ is larger than value-. The
47    /// denominator goes over all pairs. If target is equal to 1,
48    /// sample belonges to class + otherwise sample belongs to class
49    /// -. @return wheighted version of area under the ROC curve. If
50    /// the area is less than 0.5 and absolute=true, 1-area is
51    /// returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
52    /// of samples.
53    ///
54    double score(const gslapi::vector& target, const gslapi::vector& value,
55                 const gslapi::vector& weight, 
56                 const std::vector<size_t>& = std::vector<size_t>());
57       
58
59    ///
60    ///Calculates the p-value, i.e. the probability of observing an
61    ///area equally or larger if the null hypothesis is true. If P is
62    ///near zero, this casts doubt on this hypothesis. The null
63    ///hypothesis is that the values from the 2 classes are generated
64    ///from 2 identical distributions. The alternative is that the
65    ///median of the first distribution is shifted from the median of
66    ///the second distribution by a non-zero amount. If the smallest
67    ///group size is larger than minimum_size (default = 10), then P
68    ///is calculated using a normal approximation.  @return the
69    ///one-sided p-value( if absolute true is used this is equivalent
70    ///to the two-sided p-value.)
71    ///
72    double p_value(void) const;
73         
74    ///
75    /// @return the targets in train_set sorted with respect to the
76    /// corresponding data
77    ///
78    gslapi::vector target(void) const;
79
80    ///
81    /// minimum_size is the threshold for when a normal
82    /// approximation is used for the p-value calculation.
83    ///
84    /// @return reference to minimum_size
85    ///
86    inline u_int& minimum_size(void){ return minimum_size_; } 
87
88  private:
89    double area_;
90    u_int minimum_size_;
91    u_int nof_pos_;
92    /// pair of target and data. should always be sorted with respect to
93    /// data.
94    std::vector<std::pair<double, double> > value_; 
95   
96    /// Implemented as in MatLab 13.1
97    double get_p_approx(const double) const;
98
99    /// Implemented as in MatLab 13.1
100    double get_p_exact(const double, const double, const double) const;
101
102    /// sorting value_, should always be done when changing train_set_
103    void sort(void);
104       
105  };
106
107  ///
108  /// The output operator for the ROC class. The output is an Nx2
109  /// matrix, where the first column is the sensitivity and second
110  /// is the specificity.
111  ///
112  std::ostream& operator<< (std::ostream& s, const ROC&);
113
114
115}} // of namespace statistics and namespace theplu
116
117#endif
118
Note: See TracBrowser for help on using the repository browser.