source: trunk/yat/normalizer/qQuantileNormalizer.cc @ 1708

Last change on this file since 1708 was 1708, checked in by Jari Häkkinen, 12 years ago

Addresses #425

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.5 KB
Line 
1// $Id: qQuantileNormalizer.cc 1708 2009-01-13 10:09:53Z jari $
2
3/*
4  Copyright (C) 2009 Jari Häkkinen
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "qQuantileNormalizer.h"
23
24#include "yat/regression/CSplineInterpolation.h"
25#include "yat/statistics/Averager.h"
26#include "yat/utility/Matrix.h"
27#include "yat/utility/VectorConstView.h"
28
29#include <algorithm>
30#include <cassert>
31
32namespace theplu {
33namespace yat {
34namespace normalizer {
35
36
37  Partitioner::Partitioner(const utility::VectorConstView& vec,
38                           unsigned int N)
39    : average_(utility::Vector(N+1)), index_(utility::Vector(N+1))
40  {
41    assert(N>1);
42    assert(N<=vec.size());
43    double range=vec.size();
44    range/=N;
45    assert(range);
46    utility::Vector sortedvec(vec);
47    std::sort(sortedvec.begin(),sortedvec.end());
48    for (unsigned int i=0; i<=N; ++i) {
49      unsigned int first = ( i    ? (i-0.5)*range : 0 );
50      unsigned int end   = ( i!=N ? (i+0.5)*range : sortedvec.size() );
51      statistics::Averager av;
52      for (unsigned int r=first; r<end; ++r)
53        av.add(sortedvec(r));
54      average_(i)=av.mean();
55      index_(i)=end;
56    }
57  }
58
59
60  const utility::Vector& Partitioner::averages(void) const
61  {
62    return average_;
63  }
64
65
66  const utility::Vector& Partitioner::index(void) const
67  {
68    return index_;
69  }
70
71
72  size_t Partitioner::size(void) const
73  {
74    return average_.size()-1;
75  }
76
77
78  qQuantileNormalizer::qQuantileNormalizer(const
79                                           utility::VectorConstView& target,
80                                           unsigned int Q)
81    : target_(Partitioner(target,Q))
82  {
83  }
84
85
86  void qQuantileNormalizer::operator()(const utility::Matrix& matrix,
87                                       utility::Matrix& result) const
88  {
89    assert(matrix.rows()    == result.rows());
90    assert(matrix.columns() == result.columns());
91    assert(matrix.rows()    >= target_.size());
92
93    std::vector<std::vector<size_t> > sorted_index(matrix.rows());
94    for (size_t column=0; column<matrix.columns(); ++column)
95      utility::sort_index(sorted_index[column],
96                          matrix.column_const_view(column));
97
98    for (size_t column=0; column<matrix.columns(); ++column) {
99      Partitioner source(matrix.column_const_view(column),target_.size());
100      utility::Vector diff(source.averages());
101      diff-=target_.averages();
102      const utility::Vector& idx=target_.index();
103
104      // add linear interpolation for first part
105      for (size_t row=0; row<idx(0); ++row) {
106        size_t srow=sorted_index[column][row];
107        result(srow,column) = matrix(srow,column);
108      }
109
110      // cspline interpolation for all data between the first and last
111      // parts
112      regression::CSplineInterpolation cspline(idx,diff);
113      for (size_t row=idx(0); row<=idx(target_.size()-1); ++row) {
114        size_t srow=sorted_index[column][row];
115        result(srow,column) = ( matrix(srow,column) + cspline.evaluate(row) );
116      }
117
118      // add linear interpolation for last part
119      for (size_t row=idx(target_.size()-1)+1; row<result.rows(); ++row) {
120        size_t srow=sorted_index[column][row];
121        result(srow,column) = matrix(srow,column);
122      }
123    }
124  }
125
126}}} // end of namespace normalizer, yat and thep
Note: See TracBrowser for help on using the repository browser.