source: trunk/yat/statistics/tScore.h @ 966

Last change on this file since 966 was 966, checked in by Peter, 14 years ago

fixing some doxygen warnings

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1#ifndef _theplu_yat_statistics_tscore_
2#define _theplu_yat_statistics_tscore_
3
4// $Id: tScore.h 966 2007-10-11 17:01:01Z peter $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/trac/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "Score.h"
30
31#include <cmath>
32#include <gsl/gsl_cdf.h>
33
34namespace theplu {
35namespace yat {
36  namespace utility {
37    class vector;
38  }
39namespace statistics { 
40
41  ///
42  /// @brief Class for Fisher's t-test.
43  ///   
44  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
45  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
46  /// details on the t-test.
47  ///
48  class tScore : public Score
49  {
50 
51  public:
52    ///
53    /// @brief Default Constructor.
54    ///
55    tScore(bool absolute=true);
56
57   
58    /**
59       Calculates the value of t-score, i.e. the ratio between
60       difference in mean and standard deviation of this
61       difference. \f$ t = \frac{ m_x - m_y }
62       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
63       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
64       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
65       2 } \f$
66
67       @return t-score. If absolute=true absolute value of t-score
68       is returned
69    */
70    double score(const classifier::Target& target, 
71                 const utility::vector& value) const; 
72
73    /**
74       Calculates the value of t-score, i.e. the ratio between
75       difference in mean and standard deviation of this
76       difference. \f$ t = \frac{ m_x - m_y }
77       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
78       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
79       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
80       2 } \f$
81       
82       \param target Target defining the two groups
83       \param value Vector with data points on which calculation is based
84       @param dof double pointer in which approximation of degrees of
85       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
86
87       @return t-score. If absolute=true absolute value of t-score
88       is returned
89    */
90    double score(const classifier::Target& target, 
91                 const utility::vector& value, double* dof) const; 
92
93    /**
94       Calculates the weighted t-score, i.e. the ratio between
95       difference in mean and standard deviation of this
96       difference. \f$ t = \frac{ m_x - m_y }{
97       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
98       weighted mean, n is the weighted version of number of data
99       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
100       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
101       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
102       } \f$. See AveragerWeighted for details.
103       
104       \param target Target defining the two groups
105       \param value Vector with values/weights on which calculation is based
106       @param dof double pointer in which approximation of degrees of
107       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
108
109       @return t-score. If absolute=true absolute value of t-score
110       is returned
111    */
112    double score(const classifier::Target& target, 
113                 const classifier::DataLookupWeighted1D& value,
114                 double* dof=0) const; 
115
116    /**
117       Calculates the weighted t-score, i.e. the ratio between
118       difference in mean and standard deviation of this
119       difference. \f$ t = \frac{ m_x - m_y }{
120       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
121       weighted mean, n is the weighted version of number of data
122       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
123       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
124       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
125       } \f$. See AveragerWeighted for details.
126       
127       @return t-score. If absolute=true absolute value of t-score
128       is returned
129    */
130    double score(const classifier::Target& target, 
131                 const classifier::DataLookupWeighted1D& value) const; 
132
133    /**
134       Calculates the weighted t-score, i.e. the ratio between
135       difference in mean and standard deviation of this
136       difference. \f$ t = \frac{ m_x - m_y }{
137       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
138       weighted mean, n is the weighted version of number of data
139       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
140       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
141       + n_y - 2 } \f$. See AveragerWeighted for details.
142     
143       @return t-score if absolute=true absolute value of t-score
144       is returned
145    */
146    double score(const classifier::Target& target, 
147                 const utility::vector& value, 
148                 const utility::vector& weight) const; 
149
150    /**
151       Calculates the weighted t-score, i.e. the ratio between
152       difference in mean and standard deviation of this
153       difference. \f$ t = \frac{ m_x - m_y }{
154       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
155       weighted mean, n is the weighted version of number of data
156       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
157       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
158       + n_y - 2 } \f$. See AveragerWeighted for details.
159     
160       \param target Target defining the two groups
161       \param value Vector with data values on which calculation is based
162       \param weight Vector with weight associated to \a value
163       @param dof double pointer in which approximation of degrees of
164       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
165
166       @return t-score if absolute=true absolute value of t-score
167       is returned
168    */
169    double score(const classifier::Target& target, 
170                 const utility::vector& value, 
171                 const utility::vector& weight,
172                 double* dof=0) const; 
173
174    /**
175       Calcultate t-score from Averager like objects. Requirements for
176       T1 and T2 are: double mean(), double n(), double sum_xx_centered()
177       
178       If \a dof is not a null pointer it is assigned to number of
179       degrees of freedom.
180    */
181    template<typename T1, typename T2> 
182    double score(const T1& pos, const T2& neg, double* dof=0) const;
183
184  private:
185   
186  };
187
188  template<typename T1, typename T2> 
189  double tScore::score(const T1& pos, const T2& neg, double* dof) const
190  {
191    double diff = pos.mean() - neg.mean();
192    if (dof)
193      *dof=pos.n()+neg.n()-2;
194    double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
195                (pos.n()+neg.n()-2));
196    double t=diff/sqrt(s2/pos.n()+s2/neg.n());
197    if (t<0 && absolute_)
198      return -t;
199    return t;
200  }
201
202}}} // of namespace statistics, yat, and theplu
203
204#endif
Note: See TracBrowser for help on using the repository browser.