source: trunk/yat/statistics/tScore.h @ 1703

Last change on this file since 1703 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1#ifndef _theplu_yat_statistics_tscore_
2#define _theplu_yat_statistics_tscore_
3
4// $Id: tScore.h 1487 2008-09-10 08:41:36Z jari $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "Score.h"
29
30#include <cmath>
31#include <gsl/gsl_cdf.h>
32
33namespace theplu {
34namespace yat {
35namespace utility {
36  class VectorBase;
37}
38namespace statistics { 
39
40  ///
41  /// @brief Class for Fisher's t-test.
42  ///   
43  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
44  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
45  /// details on the t-test.
46  ///
47  class tScore : public Score
48  {
49 
50  public:
51    ///
52    /// @brief Default Constructor.
53    ///
54    tScore(bool absolute=true);
55
56   
57    /**
58       Calculates the value of t-score, i.e. the ratio between
59       difference in mean and standard deviation of this
60       difference. \f$ t = \frac{ m_x - m_y }
61       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
62       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
63       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
64       2 } \f$
65
66       @return t-score. If absolute=true absolute value of t-score
67       is returned
68    */
69    double score(const classifier::Target& target, 
70                 const utility::VectorBase& value) const; 
71
72    /**
73       Calculates the value of t-score, i.e. the ratio between
74       difference in mean and standard deviation of this
75       difference. \f$ t = \frac{ m_x - m_y }
76       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
77       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
78       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
79       2 } \f$
80       
81       \param target Target defining the two groups
82       \param value Vector with data points on which calculation is based
83       @param dof double pointer in which approximation of degrees of
84       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
85
86       @return t-score. If absolute=true absolute value of t-score
87       is returned
88    */
89    double score(const classifier::Target& target, 
90                 const utility::VectorBase& value, double* dof) const; 
91
92    /**
93       Calculates the weighted t-score, i.e. the ratio between
94       difference in mean and standard deviation of this
95       difference. \f$ t = \frac{ m_x - m_y }{
96       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
97       weighted mean, n is the weighted version of number of data
98       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
99       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
100       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
101       } \f$. See AveragerWeighted for details.
102       
103       \param target Target defining the two groups
104       \param value Vector with values/weights on which calculation is based
105       @param dof double pointer in which approximation of degrees of
106       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
107
108       @return t-score. If absolute=true absolute value of t-score
109       is returned
110    */
111    double score(const classifier::Target& target, 
112                 const classifier::DataLookupWeighted1D& value,
113                 double* dof=0) const; 
114
115    /**
116       Calculates the weighted t-score, i.e. the ratio between
117       difference in mean and standard deviation of this
118       difference. \f$ t = \frac{ m_x - m_y }{
119       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
120       weighted mean, n is the weighted version of number of data
121       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
122       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
123       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
124       } \f$. See AveragerWeighted for details.
125       
126       @return t-score. If absolute=true absolute value of t-score
127       is returned
128    */
129    double score(const classifier::Target& target, 
130                 const classifier::DataLookupWeighted1D& value) const; 
131
132    /**
133       Calculates the weighted t-score, i.e. the ratio between
134       difference in mean and standard deviation of this
135       difference. \f$ t = \frac{ m_x - m_y }{
136       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
137       weighted mean, n is the weighted version of number of data
138       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
139       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
140       + n_y - 2 } \f$. See AveragerWeighted for details.
141     
142       @return t-score if absolute=true absolute value of t-score
143       is returned
144    */
145    double score(const classifier::Target& target, 
146                 const utility::VectorBase& value, 
147                 const utility::VectorBase& weight) const; 
148
149    /**
150       Calculates the weighted t-score, i.e. the ratio between
151       difference in mean and standard deviation of this
152       difference. \f$ t = \frac{ m_x - m_y }{
153       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
154       weighted mean, n is the weighted version of number of data
155       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
156       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
157       + n_y - 2 } \f$. See AveragerWeighted for details.
158     
159       \param target Target defining the two groups
160       \param value Vector with data values on which calculation is based
161       \param weight Vector with weight associated to \a value
162       @param dof double pointer in which approximation of degrees of
163       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
164
165       @return t-score if absolute=true absolute value of t-score
166       is returned
167    */
168    double score(const classifier::Target& target, 
169                 const utility::VectorBase& value, 
170                 const utility::VectorBase& weight,
171                 double* dof=0) const; 
172
173    /**
174       Calcultate t-score from Averager like objects. Requirements for
175       T1 and T2 are: double mean(), double n(), double sum_xx_centered()
176       
177       If \a dof is not a null pointer it is assigned to number of
178       degrees of freedom.
179    */
180    template<typename T1, typename T2> 
181    double score(const T1& pos, const T2& neg, double* dof=0) const;
182
183  private:
184   
185  };
186
187  template<typename T1, typename T2> 
188  double tScore::score(const T1& pos, const T2& neg, double* dof) const
189  {
190    double diff = pos.mean() - neg.mean();
191    if (dof)
192      *dof=pos.n()+neg.n()-2;
193    double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
194                (pos.n()+neg.n()-2));
195    double t=diff/sqrt(s2/pos.n()+s2/neg.n());
196    if (t<0 && absolute_)
197      return -t;
198    return t;
199  }
200
201}}} // of namespace statistics, yat, and theplu
202
203#endif
Note: See TracBrowser for help on using the repository browser.