source: trunk/yat/statistics/tScore.h @ 1275

Last change on this file since 1275 was 1275, checked in by Jari Häkkinen, 13 years ago

Updating copyright statements.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1#ifndef _theplu_yat_statistics_tscore_
2#define _theplu_yat_statistics_tscore_
3
4// $Id: tScore.h 1275 2008-04-11 06:10:12Z jari $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Peter Johansson
11
12  This file is part of the yat library, http://trac.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 2 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27  02111-1307, USA.
28*/
29
30#include "Score.h"
31
32#include <cmath>
33#include <gsl/gsl_cdf.h>
34
35namespace theplu {
36namespace yat {
37namespace utility {
38  class VectorBase;
39}
40namespace statistics { 
41
42  ///
43  /// @brief Class for Fisher's t-test.
44  ///   
45  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
46  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
47  /// details on the t-test.
48  ///
49  class tScore : public Score
50  {
51 
52  public:
53    ///
54    /// @brief Default Constructor.
55    ///
56    tScore(bool absolute=true);
57
58   
59    /**
60       Calculates the value of t-score, i.e. the ratio between
61       difference in mean and standard deviation of this
62       difference. \f$ t = \frac{ m_x - m_y }
63       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
64       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
65       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
66       2 } \f$
67
68       @return t-score. If absolute=true absolute value of t-score
69       is returned
70    */
71    double score(const classifier::Target& target, 
72                 const utility::VectorBase& value) const; 
73
74    /**
75       Calculates the value of t-score, i.e. the ratio between
76       difference in mean and standard deviation of this
77       difference. \f$ t = \frac{ m_x - m_y }
78       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
79       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
80       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
81       2 } \f$
82       
83       \param target Target defining the two groups
84       \param value Vector with data points on which calculation is based
85       @param dof double pointer in which approximation of degrees of
86       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
87
88       @return t-score. If absolute=true absolute value of t-score
89       is returned
90    */
91    double score(const classifier::Target& target, 
92                 const utility::VectorBase& value, double* dof) const; 
93
94    /**
95       Calculates the weighted t-score, i.e. the ratio between
96       difference in mean and standard deviation of this
97       difference. \f$ t = \frac{ m_x - m_y }{
98       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
99       weighted mean, n is the weighted version of number of data
100       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
101       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
102       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
103       } \f$. See AveragerWeighted for details.
104       
105       \param target Target defining the two groups
106       \param value Vector with values/weights on which calculation is based
107       @param dof double pointer in which approximation of degrees of
108       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
109
110       @return t-score. If absolute=true absolute value of t-score
111       is returned
112    */
113    double score(const classifier::Target& target, 
114                 const classifier::DataLookupWeighted1D& value,
115                 double* dof=0) const; 
116
117    /**
118       Calculates the weighted t-score, i.e. the ratio between
119       difference in mean and standard deviation of this
120       difference. \f$ t = \frac{ m_x - m_y }{
121       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
122       weighted mean, n is the weighted version of number of data
123       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
124       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
125       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
126       } \f$. See AveragerWeighted for details.
127       
128       @return t-score. If absolute=true absolute value of t-score
129       is returned
130    */
131    double score(const classifier::Target& target, 
132                 const classifier::DataLookupWeighted1D& value) const; 
133
134    /**
135       Calculates the weighted t-score, i.e. the ratio between
136       difference in mean and standard deviation of this
137       difference. \f$ t = \frac{ m_x - m_y }{
138       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
139       weighted mean, n is the weighted version of number of data
140       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
141       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
142       + n_y - 2 } \f$. See AveragerWeighted for details.
143     
144       @return t-score if absolute=true absolute value of t-score
145       is returned
146    */
147    double score(const classifier::Target& target, 
148                 const utility::VectorBase& value, 
149                 const utility::VectorBase& weight) const; 
150
151    /**
152       Calculates the weighted t-score, i.e. the ratio between
153       difference in mean and standard deviation of this
154       difference. \f$ t = \frac{ m_x - m_y }{
155       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
156       weighted mean, n is the weighted version of number of data
157       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
158       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
159       + n_y - 2 } \f$. See AveragerWeighted for details.
160     
161       \param target Target defining the two groups
162       \param value Vector with data values on which calculation is based
163       \param weight Vector with weight associated to \a value
164       @param dof double pointer in which approximation of degrees of
165       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
166
167       @return t-score if absolute=true absolute value of t-score
168       is returned
169    */
170    double score(const classifier::Target& target, 
171                 const utility::VectorBase& value, 
172                 const utility::VectorBase& weight,
173                 double* dof=0) const; 
174
175    /**
176       Calcultate t-score from Averager like objects. Requirements for
177       T1 and T2 are: double mean(), double n(), double sum_xx_centered()
178       
179       If \a dof is not a null pointer it is assigned to number of
180       degrees of freedom.
181    */
182    template<typename T1, typename T2> 
183    double score(const T1& pos, const T2& neg, double* dof=0) const;
184
185  private:
186   
187  };
188
189  template<typename T1, typename T2> 
190  double tScore::score(const T1& pos, const T2& neg, double* dof) const
191  {
192    double diff = pos.mean() - neg.mean();
193    if (dof)
194      *dof=pos.n()+neg.n()-2;
195    double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
196                (pos.n()+neg.n()-2));
197    double t=diff/sqrt(s2/pos.n()+s2/neg.n());
198    if (t<0 && absolute_)
199      return -t;
200    return t;
201  }
202
203}}} // of namespace statistics, yat, and theplu
204
205#endif
Note: See TracBrowser for help on using the repository browser.