source: trunk/yat/normalizer/qQuantileNormalizer.h @ 1712

Last change on this file since 1712 was 1712, checked in by Jari Häkkinen, 12 years ago

Addresses #425. Added linear extrapolation at ends. Added documentation. Cleaneup of code.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 4.0 KB
Line 
1#ifndef _theplu_yat_normalizer_qquantile_normalizer_
2#define _theplu_yat_normalizer_qquantile_normalizer_
3
4/*
5  Copyright (C) 2009 Jari Häkkinen
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "yat/utility/Vector.h"
24
25namespace theplu {
26namespace yat {
27namespace utility {
28  class Matrix;
29  class VectorBase;
30}
31namespace normalizer {
32
33  /**
34     \brief Partition a vector of data into equal sizes.
35
36     The class also calculates the average of each part and assigns
37     the average to the mid point of each part. The midpoint is a
38     double, i.e., it is not forced to be an integer index.
39  */
40  class Partitioner
41  {
42  public:
43    /**
44       \brief Create the partition and perform required calculations.
45    */
46    Partitioner(const utility::VectorBase& vec, unsigned int N);
47
48    /**
49       \brief Return the averages for each part.
50
51       \return The average vector.
52    */
53    const utility::Vector& averages(void) const;
54
55    /**
56       \brief Return the mid point for each partition.
57
58       \return The index vector.
59    */
60    const utility::Vector& index(void) const;
61
62    /**
63       \return The number of parts.
64    */
65    size_t size(void) const;
66
67  private:
68    utility::Vector average_;
69    utility::Vector index_;
70  };
71
72
73  /**
74     \brief Perform Q-quantile normalization
75
76     After a Q-quantile normalization each column has approximately
77     the same distribution of data (the Q-quantiles are the
78     same). Also, within each column the rank of an element is not
79     changed.
80
81     There is currently no weighted version of qQuantileNormalizer
82
83     The normalization goes like this
84     - Data is not assumed to be sorted.
85     - Partition the target data in N parts.
86     - Calculate the arithmetic mean for each part, the mean is
87       assigned to the mid point of each part.
88     - Do the same for the data to be tranformed (called source
89       here).
90     - For each part, calculate the difference between the target and
91       the source. Now we have N differences d_i with associated rank
92       (midpoint of each part).
93     - Create a cubic spline fit to this difference vector d. The
94       resulting curve is used to recalculate all column values.
95       - Use the cubic spline fit for values within the cubic spline
96         fit range [midpoint 1st part, midpoint last part].
97       - For data outside the cubic spline fit use linear
98         extrapolation, i.e., a constant shift. d_first for points
99         below fit range, and d_last for points above fit range.
100
101     \since New in yat 0.5
102   */
103  class qQuantileNormalizer
104  {
105  public:
106    /**
107       \brief Documentation please.
108
109       \a Q is the number of parts and must be within \f$ [2,N] \f$
110       where \f$ N \f$ is the total number of data points in the
111       target. However, if \f$ N \f$ is larger than the number of points
112       in the data to be normalized the behaviour of the code is
113       undefined. Keep \f$ N \f$ equal to or less than the smallest
114       number of data points in the target or each data set to be
115       normalized against a ginven target.
116    */
117    qQuantileNormalizer(const utility::VectorBase& target, unsigned int Q);
118
119    /**
120       \brief perform the Q-quantile normalization.
121
122       It is possible to normalize "in place"; it is permissible for
123       \a matrix and \a result to reference the same Matrix.
124
125       \note dimensions of \a matrix and \a result must match.
126     */
127    void operator()(const utility::Matrix& matrix,
128                    utility::Matrix& result) const;
129
130  private:
131    Partitioner target_;
132  };
133
134}}} // end of namespace normalizer, yat and thep
135
136#endif
Note: See TracBrowser for help on using the repository browser.