# source:trunk/c++_tools/statistics/tScore.h@623

Last change on this file since 623 was 623, checked in by Peter, 15 years ago

fixes #112 and refs #123 added overloaded function score taking Target and DataLookupWeighted1D, which is needed for InputRanker?.

• Property svn:eol-style set to native
• Property svn:keywords set to Author Date Id Revision
File size: 3.3 KB
Line
1#ifndef _theplu_statistics_tscore_
2#define _theplu_statistics_tscore_
3
4// $Id: tScore.h 623 2006-09-05 02:13:12Z peter$
5
6// C++ tools include
7/////////////////////
8#include <c++_tools/statistics/Score.h>
9
10#include <gsl/gsl_cdf.h>
11
12
13namespace theplu {
14  namespace utility {
15    class vector;
16  }
17namespace statistics {
18
19  ///
20  /// Class for Fisher's t-test.
21  ///
22  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
23  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
24  /// details on the t-test.
25  ///
26  class tScore : public Score
27  {
28
29  public:
30    ///
31    /// Default Constructor.
32    ///
33    tScore(bool absolute=true);
34
35    ///
36    /// Calculates the value of t-score, i.e. the ratio between
37    /// difference in mean and standard deviation of this
38    /// difference. \f$t = \frac{ m_x - m_y } 39 /// {\frac{s^2}{n_x}+\frac{s^2}{n_y}} \f$ where \f$m \f$ is the
40    /// mean, \f$n \f$ is the number of data points and \f$s^2 = 41 /// \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - 42 /// 2 } \f$
43    ///
44    /// @return t-score if absolute=true absolute value of t-score
45    /// is returned
46    ///
47    double score(const classifier::Target& target,
48                 const utility::vector& value);
49
50    ///
51    /// Calculates the weighted t-score, i.e. the ratio between
52    /// difference in mean and standard deviation of this
53    /// difference. \f$t = \frac{ m_x - m_y }{ 54 /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$m \f$ is the
55    /// weighted mean, n is the weighted version of number of data
56    /// points and \f$s2 \f$ is an estimation of the variance \f$s^2 57 /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x 58 /// + n_y - 2 } \f$. See AveragerWeighted for details.
59    ///
60    /// @return t-score if absolute=true absolute value of t-score
61    /// is returned
62    ///
63    double score(const classifier::Target& target,
64                 const classifier::DataLookupWeighted1D& value);
65
66    ///
67    /// Calculates the weighted t-score, i.e. the ratio between
68    /// difference in mean and standard deviation of this
69    /// difference. \f$t = \frac{ m_x - m_y }{ 70 /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$m \f$ is the
71    /// weighted mean, n is the weighted version of number of data
72    /// points and \f$s2 \f$ is an estimation of the variance \f$s^2 73 /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x 74 /// + n_y - 2 } \f$. See AveragerWeighted for details.
75    ///
76    /// @return t-score if absolute=true absolute value of t-score
77    /// is returned
78    ///
79    double score(const classifier::Target& target,
80                 const utility::vector& value,
81                 const utility::vector& weight);
82
83    ///
84    /// Calculates the p-value, i.e. the probability of observing a
85    /// t-score equally or larger if the null hypothesis is true. If P
86    /// is near zero, this casts doubt on this hypothesis. The null
87    /// hypothesis is that the means of the two distributions are
88    /// equal. Assumtions for this test is that the two distributions
89    /// are normal distributions with equal variance. The latter
90    /// assumtion is dropped in Welch's t-test.
91    ///
92    /// @return the one-sided p-value( if absolute=true is used
93    /// the two-sided p-value)
94    ///
95    double p_value() const;
96
97
98
99  private:
100    double t_;
101    double dof_;
102
103  };
104
105}} // of namespace statistics and namespace theplu
106
107#endif
108
Note: See TracBrowser for help on using the repository browser.