source: trunk/yat/normalizer/qQuantileNormalizer.cc @ 1735

Last change on this file since 1735 was 1735, checked in by Jari Häkkinen, 13 years ago

Addresses #425. Taking care of some special cases. No iterative test is need.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 4.6 KB
Line 
1// $Id: qQuantileNormalizer.cc 1735 2009-01-16 21:33:53Z jari $
2
3/*
4  Copyright (C) 2009 Jari Häkkinen
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "qQuantileNormalizer.h"
23
24#include "yat/regression/CSplineInterpolation.h"
25#include "yat/statistics/Averager.h"
26#include "yat/utility/Matrix.h"
27#include "yat/utility/Vector.h"
28#include "yat/utility/VectorBase.h"
29
30#include <algorithm>
31#include <cassert>
32
33namespace theplu {
34namespace yat {
35namespace normalizer {
36
37
38  qQuantileNormalizer::Partitioner::Partitioner(const utility::VectorBase& vec,
39                                                unsigned int N)
40    : average_(utility::Vector(N)), index_(utility::Vector(N))
41  {
42    assert(N>1);
43    assert(N<=vec.size());
44    double range=static_cast<double>(vec.size())/N;
45    assert(range);
46    utility::Vector sortedvec(vec);
47    std::sort(sortedvec.begin(),sortedvec.end());
48    unsigned int start=0;
49    for (unsigned int i=0; i<N; ++i) {
50      unsigned int end = ( i==(N-1) ? sortedvec.size() :
51                           static_cast<unsigned int>((i+1)*range) );
52      statistics::Averager av;
53      for (unsigned int r=start; r<end; ++r)
54        av.add(sortedvec(r));
55      average_(i) = av.mean();
56      index_(i)   = 0.5*(end+start-1);
57      start=end;
58    }
59  }
60
61
62  const utility::Vector& qQuantileNormalizer::Partitioner::averages(void) const
63  {
64    return average_;
65  }
66
67
68  const utility::Vector& qQuantileNormalizer::Partitioner::index(void) const
69  {
70    return index_;
71  }
72
73
74  size_t qQuantileNormalizer::Partitioner::size(void) const
75  {
76    return average_.size();
77  }
78
79
80  qQuantileNormalizer::qQuantileNormalizer(const utility::VectorBase& target,
81                                           unsigned int Q)
82    : target_(Partitioner(target,Q))
83  {
84    assert(Q>2); // required by cspline fit
85  }
86
87
88  void qQuantileNormalizer::operator()(const utility::Matrix& matrix,
89                                       utility::Matrix& result) const
90  {
91    assert(matrix.rows()    == result.rows());
92    assert(matrix.columns() == result.columns());
93    assert(matrix.rows()    >= target_.size());
94
95    std::vector<std::vector<size_t> > sorted_index(matrix.rows());
96    for (size_t column=0; column<matrix.columns(); ++column)
97      utility::sort_index(sorted_index[column],
98                          matrix.column_const_view(column));
99
100    for (size_t column=0; column<matrix.columns(); ++column) {
101      Partitioner source(matrix.column_const_view(column),target_.size());
102      utility::Vector diff(source.averages());
103      diff-=target_.averages();
104      const utility::Vector& idx=target_.index();
105      regression::CSplineInterpolation cspline(idx,diff);
106
107      // linear interpolation for first part, i.e., use first diff for
108      // all points in the first part.
109      size_t start=0;
110      size_t end=static_cast<unsigned int>(idx(0));
111      // The first condition below takes care of limiting case number
112      // of parts approximately equal to the number of matrix rows and
113      // the second condition makes sure that index is larege enough
114      // when using cspline below ... the static cast above takes the
115      // floor whereas we want to take the "roof" forcing next index
116      // range to be within interpolation range for the cspline.
117      if ((end==0) || (end<idx(0)))
118        ++end;
119      for (size_t row=start; row<end; ++row) {
120        size_t srow=sorted_index[column][row];
121        result(srow,column) = matrix(srow,column) - diff(0);
122      }
123
124      // cspline interpolation for all data between the mid points of
125      // the first and last part
126      start=end;
127      end=static_cast<unsigned int>(idx(target_.size()-1));
128      // take care of limiting case number of parts approximately
129      // equal to the number of matrix rows
130      if (end==(matrix.rows()-1))
131        --end;
132      for (size_t row=start; row<=end; ++row) {
133        size_t srow=sorted_index[column][row];
134        result(srow,column) = matrix(srow,column) - cspline.evaluate(row) ;
135      }
136
137      // linear interpolation for last part, i.e., use last diff for
138      // all points in the last part.
139      start=end+1;
140      end=result.rows();
141      for (size_t row=start; row<end; ++row) {
142        size_t srow=sorted_index[column][row];
143        result(srow,column) = matrix(srow,column) - diff(diff.size()-1);
144      }
145    }
146  }
147
148}}} // end of namespace normalizer, yat and thep
Note: See TracBrowser for help on using the repository browser.