source: trunk/yat/normalizer/qQuantileNormalizer.h @ 1716

Last change on this file since 1716 was 1716, checked in by Jari Häkkinen, 15 years ago

Addresses #425. Made class Partitioner private.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 4.0 KB
Line 
1#ifndef _theplu_yat_normalizer_qquantile_normalizer_
2#define _theplu_yat_normalizer_qquantile_normalizer_
3
4/*
5  Copyright (C) 2009 Jari Häkkinen
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "yat/utility/Vector.h"
24
25namespace theplu {
26namespace yat {
27namespace utility {
28  class Matrix;
29  class VectorBase;
30}
31namespace normalizer {
32
33  /**
34     \brief Perform Q-quantile normalization
35
36     After a Q-quantile normalization each column has approximately
37     the same distribution of data (the Q-quantiles are the
38     same). Also, within each column the rank of an element is not
39     changed.
40
41     There is currently no weighted version of qQuantileNormalizer
42
43     The normalization goes like this
44     - Data is not assumed to be sorted.
45     - Partition the target data in N parts.
46     - Calculate the arithmetic mean for each part, the mean is
47       assigned to the mid point of each part.
48     - Do the same for the data to be tranformed (called source
49       here).
50     - For each part, calculate the difference between the target and
51       the source. Now we have N differences d_i with associated rank
52       (midpoint of each part).
53     - Create a cubic spline fit to this difference vector d. The
54       resulting curve is used to recalculate all column values.
55       - Use the cubic spline fit for values within the cubic spline
56         fit range [midpoint 1st part, midpoint last part].
57       - For data outside the cubic spline fit use linear
58         extrapolation, i.e., a constant shift. d_first for points
59         below fit range, and d_last for points above fit range.
60
61     \since New in yat 0.5
62   */
63  class qQuantileNormalizer
64  {
65  public:
66    /**
67       \brief Documentation please.
68
69       \a Q is the number of parts and must be within \f$ [2,N] \f$
70       where \f$ N \f$ is the total number of data points in the
71       target. However, if \f$ N \f$ is larger than the number of points
72       in the data to be normalized the behaviour of the code is
73       undefined. Keep \f$ N \f$ equal to or less than the smallest
74       number of data points in the target or each data set to be
75       normalized against a ginven target.
76    */
77    qQuantileNormalizer(const utility::VectorBase& target, unsigned int Q);
78
79    /**
80       \brief perform the Q-quantile normalization.
81
82       It is possible to normalize "in place"; it is permissible for
83       \a matrix and \a result to reference the same Matrix.
84
85       \note dimensions of \a matrix and \a result must match.
86     */
87    void operator()(const utility::Matrix& matrix,
88                    utility::Matrix& result) const;
89
90  private:
91
92  /**
93     \brief Partition a vector of data into equal sizes.
94
95     The class also calculates the average of each part and assigns
96     the average to the mid point of each part. The midpoint is a
97     double, i.e., it is not forced to be an integer index.
98  */
99  class Partitioner
100  {
101  public:
102    /**
103       \brief Create the partition and perform required calculations.
104    */
105    Partitioner(const utility::VectorBase& vec, unsigned int N);
106
107    /**
108       \brief Return the averages for each part.
109
110       \return The average vector.
111    */
112    const utility::Vector& averages(void) const;
113
114    /**
115       \brief Return the mid point for each partition.
116
117       \return The index vector.
118    */
119    const utility::Vector& index(void) const;
120
121    /**
122       \return The number of parts.
123    */
124    size_t size(void) const;
125
126  private:
127    utility::Vector average_;
128    utility::Vector index_;
129  };
130
131
132    Partitioner target_;
133  };
134
135}}} // end of namespace normalizer, yat and thep
136
137#endif
Note: See TracBrowser for help on using the repository browser.