#ifndef _theplu_yat_statistics_tscore_ #define _theplu_yat_statistics_tscore_ // $Id: tScore.h 1487 2008-09-10 08:41:36Z jari$ /* Copyright (C) 2004, 2005 Peter Johansson Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér Copyright (C) 2007 Jari Häkkinen, Peter Johansson Copyright (C) 2008 Peter Johansson This file is part of the yat library, http://dev.thep.lu.se/yat The yat library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The yat library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with yat. If not, see . */ #include "Score.h" #include #include namespace theplu { namespace yat { namespace utility { class VectorBase; } namespace statistics { /// /// @brief Class for Fisher's t-test. /// /// See /// http://en.wikipedia.org/wiki/Student's_t-test for more /// details on the t-test. /// class tScore : public Score { public: /// /// @brief Default Constructor. /// tScore(bool absolute=true); /** Calculates the value of t-score, i.e. the ratio between difference in mean and standard deviation of this difference. \f$t = \frac{ m_x - m_y } {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$m \f$ is the mean, \f$n \f$ is the number of data points and \f$s^2 = \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - 2 } \f$ @return t-score. If absolute=true absolute value of t-score is returned */ double score(const classifier::Target& target, const utility::VectorBase& value) const; /** Calculates the value of t-score, i.e. the ratio between difference in mean and standard deviation of this difference. \f$t = \frac{ m_x - m_y } {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$m \f$ is the mean, \f$n \f$ is the number of data points and \f$s^2 = \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - 2 } \f$ \param target Target defining the two groups \param value Vector with data points on which calculation is based @param dof double pointer in which approximation of degrees of freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. @return t-score. If absolute=true absolute value of t-score is returned */ double score(const classifier::Target& target, const utility::VectorBase& value, double* dof) const; /** Calculates the weighted t-score, i.e. the ratio between difference in mean and standard deviation of this difference. \f$t = \frac{ m_x - m_y }{ s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$m \f$ is the weighted mean, n is the weighted version of number of data points \f$\frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and \f$s^2 \f$ is an estimation of the variance \f$s^2 = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2 } \f$. See AveragerWeighted for details. \param target Target defining the two groups \param value Vector with values/weights on which calculation is based @param dof double pointer in which approximation of degrees of freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. @return t-score. If absolute=true absolute value of t-score is returned */ double score(const classifier::Target& target, const classifier::DataLookupWeighted1D& value, double* dof=0) const; /** Calculates the weighted t-score, i.e. the ratio between difference in mean and standard deviation of this difference. \f$t = \frac{ m_x - m_y }{ s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$m \f$ is the weighted mean, n is the weighted version of number of data points \f$\frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and \f$s^2 \f$ is an estimation of the variance \f$s^2 = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2 } \f$. See AveragerWeighted for details. @return t-score. If absolute=true absolute value of t-score is returned */ double score(const classifier::Target& target, const classifier::DataLookupWeighted1D& value) const; /** Calculates the weighted t-score, i.e. the ratio between difference in mean and standard deviation of this difference. \f$t = \frac{ m_x - m_y }{ \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$m \f$ is the weighted mean, n is the weighted version of number of data points and \f$s2 \f$ is an estimation of the variance \f$s^2 = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2 } \f$. See AveragerWeighted for details. @return t-score if absolute=true absolute value of t-score is returned */ double score(const classifier::Target& target, const utility::VectorBase& value, const utility::VectorBase& weight) const; /** Calculates the weighted t-score, i.e. the ratio between difference in mean and standard deviation of this difference. \f$t = \frac{ m_x - m_y }{ \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$m \f$ is the weighted mean, n is the weighted version of number of data points and \f$s2 \f$ is an estimation of the variance \f$s^2 = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2 } \f$. See AveragerWeighted for details. \param target Target defining the two groups \param value Vector with data values on which calculation is based \param weight Vector with weight associated to \a value @param dof double pointer in which approximation of degrees of freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. @return t-score if absolute=true absolute value of t-score is returned */ double score(const classifier::Target& target, const utility::VectorBase& value, const utility::VectorBase& weight, double* dof=0) const; /** Calcultate t-score from Averager like objects. Requirements for T1 and T2 are: double mean(), double n(), double sum_xx_centered() If \a dof is not a null pointer it is assigned to number of degrees of freedom. */ template double score(const T1& pos, const T2& neg, double* dof=0) const; private: }; template double tScore::score(const T1& pos, const T2& neg, double* dof) const { double diff = pos.mean() - neg.mean(); if (dof) *dof=pos.n()+neg.n()-2; double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/ (pos.n()+neg.n()-2)); double t=diff/sqrt(s2/pos.n()+s2/neg.n()); if (t<0 && absolute_) return -t; return t; } }}} // of namespace statistics, yat, and theplu #endif