# source:trunk/c++_tools/statistics/tScore.h@669

Last change on this file since 669 was 669, checked in by Peter, 16 years ago

#closes #88. SAM score class added

• Property svn:eol-style set to native
• Property svn:keywords set to Author Date Id Revision
File size: 3.3 KB
Line
1#ifndef _theplu_statistics_tscore_
2#define _theplu_statistics_tscore_
3
4// $Id: tScore.h 669 2006-10-07 04:42:44Z peter$
5
6// C++ tools include
7/////////////////////
8#include <c++_tools/statistics/Score.h>
9
10#include <gsl/gsl_cdf.h>
11
12
13namespace theplu {
14  namespace utility {
15    class vector;
16  }
17namespace statistics {
18
19  ///
20  /// Class for Fisher's t-test.
21  ///
22  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
23  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
24  /// details on the t-test.
25  ///
26  class tScore : public Score
27  {
28
29  public:
30    ///
31    /// Default Constructor.
32    ///
33    tScore(bool absolute=true);
34
35
36    /**
37       Calculates the value of t-score, i.e. the ratio between
38       difference in mean and standard deviation of this
39       difference. \f$t = \frac{ m_x - m_y } 40 {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$m \f$ is the
41       mean, \f$n \f$ is the number of data points and \f$s^2 = 42 \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - 43 2 } \f$
44
45       @return t-score. If absolute=true absolute value of t-score
46       is returned
47    */
48    double score(const classifier::Target& target,
49                 const utility::vector& value);
50
51    /**
52       Calculates the weighted t-score, i.e. the ratio between
53       difference in mean and standard deviation of this
54       difference. \f$t = \frac{ m_x - m_y }{ 55 s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$m \f$ is the
56       weighted mean, n is the weighted version of number of data
57       points \f$\frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
58       \f$s^2 \f$ is an estimation of the variance \f$s^2 = \frac{ 59 \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2 60 } \f$. See AveragerWeighted for details.
61
62       @return t-score. If absolute=true absolute value of t-score
63       is returned
64    */
65    double score(const classifier::Target& target,
66                 const classifier::DataLookupWeighted1D& value);
67
68    ///
69    /// Calculates the weighted t-score, i.e. the ratio between
70    /// difference in mean and standard deviation of this
71    /// difference. \f$t = \frac{ m_x - m_y }{ 72 /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$m \f$ is the
73    /// weighted mean, n is the weighted version of number of data
74    /// points and \f$s2 \f$ is an estimation of the variance \f$s^2 75 /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x 76 /// + n_y - 2 } \f$. See AveragerWeighted for details.
77    ///
78    /// @return t-score if absolute=true absolute value of t-score
79    /// is returned
80    ///
81    double score(const classifier::Target& target,
82                 const utility::vector& value,
83                 const utility::vector& weight);
84
85    ///
86    /// Calculates the p-value, i.e. the probability of observing a
87    /// t-score equally or larger if the null hypothesis is true. If P
88    /// is near zero, this casts doubt on this hypothesis. The null
89    /// hypothesis is that the means of the two distributions are
90    /// equal. Assumtions for this test is that the two distributions
91    /// are normal distributions with equal variance. The latter
92    /// assumtion is dropped in Welch's t-test.
93    ///
94    /// @return the one-sided p-value( if absolute=true is used
95    /// the two-sided p-value)
96    ///
97    double p_value() const;
98
99
100
101  private:
102    double t_;
103    double dof_;
104
105  };
106
107}} // of namespace statistics and namespace theplu
108
109#endif
110
Note: See TracBrowser for help on using the repository browser.