source: trunk/yat/statistics/SAMScore.h @ 1703

Last change on this file since 1703 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.7 KB
Line 
1#ifndef _theplu_yat_statistics_sam_score_
2#define _theplu_yat_statistics_sam_score_
3
4// $Id: SAMScore.h 1487 2008-09-10 08:41:36Z jari $
5
6/*
7  Copyright (C) 2006, 2007 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2008 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "Score.h"
27
28#include <cmath>
29
30namespace theplu {
31namespace yat {
32  namespace utility {
33    class VectorBase;
34  }
35  namespace classifier {
36    class DataLookWeighted1D;
37  }
38namespace statistics { 
39
40  /**
41     @brief Class for score used in Significance Analysis of
42     Microarrays (SAM).
43 
44     The score is similar to the Student t-test but with an added
45     fudge factor in denominator to avoid groups with small variance
46     getting a large score. \f$ \frac{m_x-m_y}{s+s_0} \f$
47
48     see http://www.pnas.org/cgi/content/abstract/98/9/5116 for
49     details
50  */   
51  class SAMScore : public Score
52  {
53
54  public:
55    ///
56    /// @param s0 \f$ s_0 \f$ is a fudge factor
57    /// @param absolute if true max(score, -score) is used
58    ///
59    SAMScore(const double s0, bool absolute=true);
60
61    /**
62       \f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{1}{n_x}\sum
63       x_i \f$, \f$ s^2 = \left(\frac{1}{n_x}+\frac{1}{n_y} \right)
64       \frac{\sum (x_i-m_x)^2 + \sum(y_i-m_y)^2}{n_x+n_y-2} \f$, and
65       \f$ s_0 \f$ is the fudge factor.
66
67       @return SAM score. If absolute=true absolute value of t-score
68       is returned
69    */
70    double score(const classifier::Target& target, 
71                 const utility::VectorBase& value) const; 
72
73    /**
74       \f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{\sum
75       w_ix_i}{w_i} \f$, \f$ s_0 \f$ is the fudge factor, and \f$ s^2
76       = \left(\frac{1}{n_x}+\frac{1}{n_y} \right) \frac{\sum
77       w_i(x_i-m_x)^2 + \sum w_i(y_i-m_y)^2}{n_x+n_y-2} \f$ where \f$
78       n \f$ is weighted version of number of data points \f$
79       \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$.
80
81       @return weighted version of SAM score. If absolute=true
82       absolute value is returned
83    */
84    double score(const classifier::Target& target, 
85                 const classifier::DataLookupWeighted1D& value) const;         
86
87    /**
88       \f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{\sum
89       w_ix_i}{w_i} \f$, \f$ s_0 \f$ is the fudge factor, and \f$ s^2
90       = \left(\frac{1}{n_x}+\frac{1}{n_y} \right) \frac{\sum
91       w_i(x_i-m_x)^2 + \sum w_i(y_i-m_y)^2}{n_x+n_y-2} \f$ where \f$
92       n \f$ is weighted version of number of data points \f$
93       \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$.
94
95       @return weighted version of SAM score. If absolute=true
96       absolute value is returned
97    */
98    double score(const classifier::Target& target, 
99                 const utility::VectorBase& value, 
100                 const utility::VectorBase& weight) const;         
101  private:
102    double s0_;
103
104    template<class T> 
105    double score(const T& positive, const T& negative) const
106    {
107      if(positive.n()+negative.n()<=2) 
108        return 0;
109      double diff = positive.mean() - negative.mean();
110      double s2 = ( (1.0/positive.n()+1.0/negative.n()) * 
111                    (positive.sum_xx_centered()+negative.sum_xx_centered()) /
112                    (positive.n()+negative.n()-2) );
113      if (diff<0 && absolute_)
114        return -diff/(sqrt(s2)+s0_);
115      return diff/(sqrt(s2)+s0_);
116    }
117  };
118
119}}} // of namespace statistics, yat, and theplu
120
121#endif
Note: See TracBrowser for help on using the repository browser.