source: trunk/yat/statistics/tScore.h @ 703

Last change on this file since 703 was 703, checked in by Jari Häkkinen, 15 years ago

Addresses #65 and #170.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.1 KB
Line 
1#ifndef _theplu_yat_statistics_tscore_
2#define _theplu_yat_statistics_tscore_
3
4// $Id: tScore.h 703 2006-12-18 00:47:44Z jari $
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "Score.h"
28
29#include <gsl/gsl_cdf.h>
30
31namespace theplu {
32namespace yat {
33  namespace utility {
34    class vector;
35  }
36namespace statistics { 
37
38  ///
39  /// Class for Fisher's t-test.
40  ///   
41  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
42  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
43  /// details on the t-test.
44  ///
45  class tScore : public Score
46  {
47 
48  public:
49    ///
50    /// 2brief Default Constructor.
51    ///
52    tScore(bool absolute=true);
53
54   
55    /**
56       Calculates the value of t-score, i.e. the ratio between
57       difference in mean and standard deviation of this
58       difference. \f$ t = \frac{ m_x - m_y }
59       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
60       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
61       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
62       2 } \f$
63       
64       @return t-score. If absolute=true absolute value of t-score
65       is returned
66    */
67    double score(const classifier::Target& target, 
68                 const utility::vector& value); 
69
70    /**
71       Calculates the weighted t-score, i.e. the ratio between
72       difference in mean and standard deviation of this
73       difference. \f$ t = \frac{ m_x - m_y }{
74       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
75       weighted mean, n is the weighted version of number of data
76       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
77       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
78       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
79       } \f$. See AveragerWeighted for details.
80       
81       @return t-score. If absolute=true absolute value of t-score
82       is returned
83    */
84    double score(const classifier::Target& target, 
85                 const classifier::DataLookupWeighted1D& value); 
86
87    ///
88    /// Calculates the weighted t-score, i.e. the ratio between
89    /// difference in mean and standard deviation of this
90    /// difference. \f$ t = \frac{ m_x - m_y }{
91    /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
92    /// weighted mean, n is the weighted version of number of data
93    /// points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
94    /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
95    /// + n_y - 2 } \f$. See AveragerWeighted for details.
96    ///
97    /// @return t-score if absolute=true absolute value of t-score
98    /// is returned
99    ///
100    double score(const classifier::Target& target, 
101                 const utility::vector& value, 
102                 const utility::vector& weight); 
103
104    ///
105    /// Calculates the p-value, i.e. the probability of observing a
106    /// t-score equally or larger if the null hypothesis is true. If P
107    /// is near zero, this casts doubt on this hypothesis. The null
108    /// hypothesis is that the means of the two distributions are
109    /// equal. Assumtions for this test is that the two distributions
110    /// are normal distributions with equal variance. The latter
111    /// assumtion is dropped in Welch's t-test.
112    ///
113    /// @return the one-sided p-value( if absolute=true is used
114    /// the two-sided p-value)
115    ///
116    double p_value() const;
117
118  private:
119    double t_;
120    double dof_;
121       
122  };
123
124}}} // of namespace statistics, yat, and theplu
125
126#endif
Note: See TracBrowser for help on using the repository browser.