source: trunk/yat/statistics/tScore.h @ 865

Last change on this file since 865 was 865, checked in by Peter, 14 years ago

changing URL to http://trac.thep.lu.se/trac/yat

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.7 KB
Line 
1#ifndef _theplu_yat_statistics_tscore_
2#define _theplu_yat_statistics_tscore_
3
4// $Id: tScore.h 865 2007-09-10 19:41:04Z peter $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/trac/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "Score.h"
30
31#include <cmath>
32#include <gsl/gsl_cdf.h>
33
34namespace theplu {
35namespace yat {
36  namespace utility {
37    class vector;
38  }
39namespace statistics { 
40
41  ///
42  /// @brief Class for Fisher's t-test.
43  ///   
44  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
45  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
46  /// details on the t-test.
47  ///
48  class tScore : public Score
49  {
50 
51  public:
52    ///
53    /// @brief Default Constructor.
54    ///
55    tScore(bool absolute=true);
56
57   
58    /**
59       Calculates the value of t-score, i.e. the ratio between
60       difference in mean and standard deviation of this
61       difference. \f$ t = \frac{ m_x - m_y }
62       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
63       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
64       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
65       2 } \f$
66
67       @return t-score. If absolute=true absolute value of t-score
68       is returned
69    */
70    double score(const classifier::Target& target, 
71                 const utility::vector& value) const; 
72
73    /**
74       Calculates the value of t-score, i.e. the ratio between
75       difference in mean and standard deviation of this
76       difference. \f$ t = \frac{ m_x - m_y }
77       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
78       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
79       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
80       2 } \f$
81       
82       @param dof double pointer in which approximation of degrees of
83       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
84
85       @return t-score. If absolute=true absolute value of t-score
86       is returned
87    */
88    double score(const classifier::Target& target, 
89                 const utility::vector& value, double* dof) const; 
90
91    /**
92       Calculates the weighted t-score, i.e. the ratio between
93       difference in mean and standard deviation of this
94       difference. \f$ t = \frac{ m_x - m_y }{
95       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
96       weighted mean, n is the weighted version of number of data
97       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
98       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
99       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
100       } \f$. See AveragerWeighted for details.
101       
102       @param dof double pointer in which approximation of degrees of
103       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
104
105       @return t-score. If absolute=true absolute value of t-score
106       is returned
107    */
108    double score(const classifier::Target& target, 
109                 const classifier::DataLookupWeighted1D& value,
110                 double* dof=0) const; 
111
112    /**
113       Calculates the weighted t-score, i.e. the ratio between
114       difference in mean and standard deviation of this
115       difference. \f$ t = \frac{ m_x - m_y }{
116       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
117       weighted mean, n is the weighted version of number of data
118       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
119       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
120       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
121       } \f$. See AveragerWeighted for details.
122       
123       @return t-score. If absolute=true absolute value of t-score
124       is returned
125    */
126    double score(const classifier::Target& target, 
127                 const classifier::DataLookupWeighted1D& value) const; 
128
129    /**
130       Calculates the weighted t-score, i.e. the ratio between
131       difference in mean and standard deviation of this
132       difference. \f$ t = \frac{ m_x - m_y }{
133       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
134       weighted mean, n is the weighted version of number of data
135       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
136       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
137       + n_y - 2 } \f$. See AveragerWeighted for details.
138     
139       @return t-score if absolute=true absolute value of t-score
140       is returned
141    */
142    double score(const classifier::Target& target, 
143                 const utility::vector& value, 
144                 const utility::vector& weight) const; 
145
146    /**
147       Calculates the weighted t-score, i.e. the ratio between
148       difference in mean and standard deviation of this
149       difference. \f$ t = \frac{ m_x - m_y }{
150       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
151       weighted mean, n is the weighted version of number of data
152       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
153       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
154       + n_y - 2 } \f$. See AveragerWeighted for details.
155     
156       @param dof double pointer in which approximation of degrees of
157       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
158
159       @return t-score if absolute=true absolute value of t-score
160       is returned
161    */
162    double score(const classifier::Target& target, 
163                 const utility::vector& value, 
164                 const utility::vector& weight,
165                 double* dof=0) const; 
166
167    /**
168       Calcultate t-score from Averager like objects. Requirements for
169       T1 and T2 are: double mean(), double n(), double sum_xx_centered()
170       
171       If \a dof is not a null pointer it is assigned to number of
172       degrees of freedom.
173    */
174    template<typename T1, typename T2> 
175    double score(const T1& pos, const T2& neg, double* dof=0) const;
176
177  private:
178   
179  };
180
181  template<typename T1, typename T2> 
182  double tScore::score(const T1& pos, const T2& neg, double* dof) const
183  {
184    double diff = pos.mean() - neg.mean();
185    if (dof)
186      *dof=pos.n()+neg.n()-2;
187    double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
188                (pos.n()+neg.n()-2));
189    double t=diff/sqrt(s2/pos.n()+s2/neg.n());
190    if (t<0 && absolute_)
191      return -t;
192    return t;
193  }
194
195}}} // of namespace statistics, yat, and theplu
196
197#endif
Note: See TracBrowser for help on using the repository browser.