source: trunk/yat/statistics/SAMScore.h @ 1437

Last change on this file since 1437 was 1437, checked in by Peter, 13 years ago

merge patch release 0.4.2 to trunk. Delta 0.4.2-0.4.1

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.8 KB
Line 
1#ifndef _theplu_yat_statistics_sam_score_
2#define _theplu_yat_statistics_sam_score_
3
4// $Id: SAMScore.h 1437 2008-08-25 17:55:00Z peter $
5
6/*
7  Copyright (C) 2006, 2007 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2008 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "Score.h"
29
30#include <cmath>
31
32namespace theplu {
33namespace yat {
34  namespace utility {
35    class VectorBase;
36  }
37  namespace classifier {
38    class DataLookWeighted1D;
39  }
40namespace statistics { 
41
42  /**
43     @brief Class for score used in Significance Analysis of
44     Microarrays (SAM).
45 
46     The score is similar to the Student t-test but with an added
47     fudge factor in denominator to avoid groups with small variance
48     getting a large score. \f$ \frac{m_x-m_y}{s+s_0} \f$
49
50     see http://www.pnas.org/cgi/content/abstract/98/9/5116 for
51     details
52  */   
53  class SAMScore : public Score
54  {
55
56  public:
57    ///
58    /// @param s0 \f$ s_0 \f$ is a fudge factor
59    /// @param absolute if true max(score, -score) is used
60    ///
61    SAMScore(const double s0, bool absolute=true);
62
63    /**
64       \f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{1}{n_x}\sum
65       x_i \f$, \f$ s^2 = \left(\frac{1}{n_x}+\frac{1}{n_y} \right)
66       \frac{\sum (x_i-m_x)^2 + \sum(y_i-m_y)^2}{n_x+n_y-2} \f$, and
67       \f$ s_0 \f$ is the fudge factor.
68
69       @return SAM score. If absolute=true absolute value of t-score
70       is returned
71    */
72    double score(const classifier::Target& target, 
73                 const utility::VectorBase& value) const; 
74
75    /**
76       \f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{\sum
77       w_ix_i}{w_i} \f$, \f$ s_0 \f$ is the fudge factor, and \f$ s^2
78       = \left(\frac{1}{n_x}+\frac{1}{n_y} \right) \frac{\sum
79       w_i(x_i-m_x)^2 + \sum w_i(y_i-m_y)^2}{n_x+n_y-2} \f$ where \f$
80       n \f$ is weighted version of number of data points \f$
81       \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$.
82
83       @return weighted version of SAM score. If absolute=true
84       absolute value is returned
85    */
86    double score(const classifier::Target& target, 
87                 const classifier::DataLookupWeighted1D& value) const;         
88
89    /**
90       \f$ \frac{m_x-m_y}{s+s_0} \f$ where \f$ m = \frac{\sum
91       w_ix_i}{w_i} \f$, \f$ s_0 \f$ is the fudge factor, and \f$ s^2
92       = \left(\frac{1}{n_x}+\frac{1}{n_y} \right) \frac{\sum
93       w_i(x_i-m_x)^2 + \sum w_i(y_i-m_y)^2}{n_x+n_y-2} \f$ where \f$
94       n \f$ is weighted version of number of data points \f$
95       \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$.
96
97       @return weighted version of SAM score. If absolute=true
98       absolute value is returned
99    */
100    double score(const classifier::Target& target, 
101                 const utility::VectorBase& value, 
102                 const utility::VectorBase& weight) const;         
103  private:
104    double s0_;
105
106    template<class T> 
107    double score(const T& positive, const T& negative) const
108    {
109      if(positive.n()+negative.n()<=2) 
110        return 0;
111      double diff = positive.mean() - negative.mean();
112      double s2 = ( (1.0/positive.n()+1.0/negative.n()) * 
113                    (positive.sum_xx_centered()+negative.sum_xx_centered()) /
114                    (positive.n()+negative.n()-2) );
115      if (diff<0 && absolute_)
116        return -diff/(sqrt(s2)+s0_);
117      return diff/(sqrt(s2)+s0_);
118    }
119  };
120
121}}} // of namespace statistics, yat, and theplu
122
123#endif
Note: See TracBrowser for help on using the repository browser.