source: trunk/yat/statistics/tScore.h @ 779

Last change on this file since 779 was 779, checked in by Peter, 15 years ago

Refs #101

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.2 KB
Line 
1#ifndef _theplu_yat_statistics_tscore_
2#define _theplu_yat_statistics_tscore_
3
4// $Id: tScore.h 779 2007-03-05 18:58:30Z peter $
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "Score.h"
28
29#include <cmath>
30#include <gsl/gsl_cdf.h>
31
32namespace theplu {
33namespace yat {
34  namespace utility {
35    class vector;
36  }
37namespace statistics { 
38
39  ///
40  /// @brief Class for Fisher's t-test.
41  ///   
42  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
43  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
44  /// details on the t-test.
45  ///
46  class tScore : public Score
47  {
48 
49  public:
50    ///
51    /// @brief Default Constructor.
52    ///
53    tScore(bool absolute=true);
54
55   
56    /**
57       Calculates the value of t-score, i.e. the ratio between
58       difference in mean and standard deviation of this
59       difference. \f$ t = \frac{ m_x - m_y }
60       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
61       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
62       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
63       2 } \f$
64
65       @return t-score. If absolute=true absolute value of t-score
66       is returned
67    */
68    double score(const classifier::Target& target, 
69                 const utility::vector& value) const; 
70
71    /**
72       Calculates the value of t-score, i.e. the ratio between
73       difference in mean and standard deviation of this
74       difference. \f$ t = \frac{ m_x - m_y }
75       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
76       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
77       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
78       2 } \f$
79       
80       @param dof double pointer in which approximation of degrees of
81       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
82
83       @return t-score. If absolute=true absolute value of t-score
84       is returned
85    */
86    double score(const classifier::Target& target, 
87                 const utility::vector& value, double* dof) const; 
88
89    /**
90       Calculates the weighted t-score, i.e. the ratio between
91       difference in mean and standard deviation of this
92       difference. \f$ t = \frac{ m_x - m_y }{
93       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
94       weighted mean, n is the weighted version of number of data
95       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
96       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
97       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
98       } \f$. See AveragerWeighted for details.
99       
100       @param dof double pointer in which approximation of degrees of
101       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
102
103       @return t-score. If absolute=true absolute value of t-score
104       is returned
105    */
106    double score(const classifier::Target& target, 
107                 const classifier::DataLookupWeighted1D& value,
108                 double* dof=0) const; 
109
110    /**
111       Calculates the weighted t-score, i.e. the ratio between
112       difference in mean and standard deviation of this
113       difference. \f$ t = \frac{ m_x - m_y }{
114       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
115       weighted mean, n is the weighted version of number of data
116       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
117       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
118       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
119       } \f$. See AveragerWeighted for details.
120       
121       @return t-score. If absolute=true absolute value of t-score
122       is returned
123    */
124    double score(const classifier::Target& target, 
125                 const classifier::DataLookupWeighted1D& value) const; 
126
127    /**
128       Calculates the weighted t-score, i.e. the ratio between
129       difference in mean and standard deviation of this
130       difference. \f$ t = \frac{ m_x - m_y }{
131       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
132       weighted mean, n is the weighted version of number of data
133       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
134       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
135       + n_y - 2 } \f$. See AveragerWeighted for details.
136     
137       @return t-score if absolute=true absolute value of t-score
138       is returned
139    */
140    double score(const classifier::Target& target, 
141                 const utility::vector& value, 
142                 const utility::vector& weight) const; 
143
144    /**
145       Calculates the weighted t-score, i.e. the ratio between
146       difference in mean and standard deviation of this
147       difference. \f$ t = \frac{ m_x - m_y }{
148       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
149       weighted mean, n is the weighted version of number of data
150       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
151       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
152       + n_y - 2 } \f$. See AveragerWeighted for details.
153     
154       @param dof double pointer in which approximation of degrees of
155       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
156
157       @return t-score if absolute=true absolute value of t-score
158       is returned
159    */
160    double score(const classifier::Target& target, 
161                 const utility::vector& value, 
162                 const utility::vector& weight,
163                 double* dof=0) const; 
164
165  private:
166
167    template<class T> 
168    double score(const T& pos, const T& neg, double* dof) const
169    {
170      double diff = pos.mean() - neg.mean();
171      if (dof)
172        *dof=pos.n()+neg.n()-2;
173      double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
174                  (pos.n()+neg.n()-2));
175      double t=diff/sqrt(s2/pos.n()+s2/(neg.n()));
176      if (t<0 && absolute_)
177        return -t;
178      return t;
179    }
180  };
181
182}}} // of namespace statistics, yat, and theplu
183
184#endif
Note: See TracBrowser for help on using the repository browser.