source: trunk/yat/statistics/tScore.h

Last change on this file was 2119, checked in by Peter, 12 years ago

converted files to utf-8. fixes #577

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.0 KB
Line 
1#ifndef _theplu_yat_statistics_tscore_
2#define _theplu_yat_statistics_tscore_
3
4// $Id: tScore.h 2119 2009-12-12 23:11:43Z peter $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include "Score.h"
28
29#include <cmath>
30#include <gsl/gsl_cdf.h>
31
32namespace theplu {
33namespace yat {
34namespace utility {
35  class VectorBase;
36}
37namespace statistics { 
38
39  ///
40  /// @brief Class for Fisher's t-test.
41  ///   
42  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
43  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
44  /// details on the t-test.
45  ///
46  class tScore : public Score
47  {
48 
49  public:
50    ///
51    /// @brief Default Constructor.
52    ///
53    tScore(bool absolute=true);
54
55   
56    /**
57       Calculates the value of t-score, i.e. the ratio between
58       difference in mean and standard deviation of this
59       difference. \f$ t = \frac{ m_x - m_y }
60       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
61       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
62       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
63       2 } \f$
64
65       @return t-score. If absolute=true absolute value of t-score
66       is returned
67    */
68    double score(const classifier::Target& target, 
69                 const utility::VectorBase& value) const; 
70
71    /**
72       Calculates the value of t-score, i.e. the ratio between
73       difference in mean and standard deviation of this
74       difference. \f$ t = \frac{ m_x - m_y }
75       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
76       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
77       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
78       2 } \f$
79       
80       \param target Target defining the two groups
81       \param value Vector with data points on which calculation is based
82       @param dof double pointer in which approximation of degrees of
83       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
84
85       @return t-score. If absolute=true absolute value of t-score
86       is returned
87    */
88    double score(const classifier::Target& target, 
89                 const utility::VectorBase& value, double* dof) const; 
90
91    /**
92       Calculates the weighted t-score, i.e. the ratio between
93       difference in mean and standard deviation of this
94       difference. \f$ t = \frac{ m_x - m_y }{
95       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
96       weighted mean, n is the weighted version of number of data
97       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
98       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
99       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
100       } \f$. See AveragerWeighted for details.
101       
102       \param target Target defining the two groups
103       \param value Vector with values/weights on which calculation is based
104       @param dof double pointer in which approximation of degrees of
105       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
106
107       @return t-score. If absolute=true absolute value of t-score
108       is returned
109    */
110    double score(const classifier::Target& target, 
111                 const classifier::DataLookupWeighted1D& value,
112                 double* dof=0) const; 
113
114    /**
115       Calculates the weighted t-score, i.e. the ratio between
116       difference in mean and standard deviation of this
117       difference. \f$ t = \frac{ m_x - m_y }{
118       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
119       weighted mean, n is the weighted version of number of data
120       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
121       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
122       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
123       } \f$. See AveragerWeighted for details.
124       
125       @return t-score. If absolute=true absolute value of t-score
126       is returned
127    */
128    double score(const classifier::Target& target, 
129                 const classifier::DataLookupWeighted1D& value) const; 
130
131    /**
132       Calculates the weighted t-score, i.e. the ratio between
133       difference in mean and standard deviation of this
134       difference. \f$ t = \frac{ m_x - m_y }{
135       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
136       weighted mean, n is the weighted version of number of data
137       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
138       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
139       + n_y - 2 } \f$. See AveragerWeighted for details.
140     
141       @return t-score if absolute=true absolute value of t-score
142       is returned
143    */
144    double score(const classifier::Target& target, 
145                 const utility::VectorBase& value, 
146                 const utility::VectorBase& weight) const; 
147
148    /**
149       Calculates the weighted t-score, i.e. the ratio between
150       difference in mean and standard deviation of this
151       difference. \f$ t = \frac{ m_x - m_y }{
152       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
153       weighted mean, n is the weighted version of number of data
154       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
155       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
156       + n_y - 2 } \f$. See AveragerWeighted for details.
157     
158       \param target Target defining the two groups
159       \param value Vector with data values on which calculation is based
160       \param weight Vector with weight associated to \a value
161       @param dof double pointer in which approximation of degrees of
162       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
163
164       @return t-score if absolute=true absolute value of t-score
165       is returned
166    */
167    double score(const classifier::Target& target, 
168                 const utility::VectorBase& value, 
169                 const utility::VectorBase& weight,
170                 double* dof=0) const; 
171
172    /**
173       Calcultate t-score from Averager like objects. Requirements for
174       T1 and T2 are: double mean(), double n(), double sum_xx_centered()
175       
176       If \a dof is not a null pointer it is assigned to number of
177       degrees of freedom.
178    */
179    template<typename T1, typename T2> 
180    double score(const T1& pos, const T2& neg, double* dof=0) const;
181
182  private:
183   
184  };
185
186  template<typename T1, typename T2> 
187  double tScore::score(const T1& pos, const T2& neg, double* dof) const
188  {
189    double diff = pos.mean() - neg.mean();
190    if (dof)
191      *dof=pos.n()+neg.n()-2;
192    double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
193                (pos.n()+neg.n()-2));
194    double t=diff/sqrt(s2/pos.n()+s2/neg.n());
195    if (t<0 && absolute_)
196      return -t;
197    return t;
198  }
199
200}}} // of namespace statistics, yat, and theplu
201
202#endif
Note: See TracBrowser for help on using the repository browser.