source: trunk/yat/normalizer/qQuantileNormalizer.cc @ 2103

Last change on this file since 2103 was 2103, checked in by Peter, 14 years ago

merging patch release 0.5.5 into trunk. Delta 0.5.5 - 0.5.4

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.5 KB
Line 
1// $Id: qQuantileNormalizer.cc 2103 2009-11-06 12:40:26Z peter $
2
3/*
4  Copyright (C) 2009 Jari Häkkinen, Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "qQuantileNormalizer.h"
23
24#include "yat/regression/CSplineInterpolation.h"
25#include "yat/statistics/Averager.h"
26#include "yat/statistics/AveragerWeighted.h"
27#include "yat/utility/DataWeight.h"
28#include "yat/utility/Vector.h"
29#include "yat/utility/VectorBase.h"
30#include "yat/utility/WeightIterator.h"
31
32#include <algorithm>
33#include <cassert>
34#include <numeric>
35#include <sstream>
36#include <stdexcept>
37#include <string>
38#include <vector>
39
40namespace theplu {
41namespace yat {
42namespace normalizer {
43
44
45  void 
46  qQuantileNormalizer::Partitioner::init(const utility::VectorBase& sortedvec,
47                                         unsigned int N)
48  {
49    assert(N>1);
50    assert(N<=sortedvec.size());
51    double range=static_cast<double>(sortedvec.size())/N;
52    assert(range);
53    unsigned int start=0;
54    for (unsigned int i=0; i<N; ++i) {
55      unsigned int end = ( i==(N-1) ? sortedvec.size() :
56                           static_cast<unsigned int>((i+1)*range) );
57      statistics::Averager av;
58      for (unsigned int r=start; r<end; ++r)
59        av.add(sortedvec(r));
60      average_(i) = av.mean();
61      quantiles_(i)   = 0.5*(end+start);
62      start=end;
63    }
64    // rescale quantiles to be in range (0,1)
65    quantiles_ *= 1.0/sortedvec.size();
66  }
67
68
69  void qQuantileNormalizer::Partitioner::init
70  (const std::vector<utility::DataWeight>& sortedvec, unsigned int N)
71  {
72    assert(N>1);
73    assert(N<=sortedvec.size());
74    double total_w = std::accumulate(utility::weight_iterator(sortedvec.begin()),
75                                     utility::weight_iterator(sortedvec.end()), 
76                                     0.0);
77
78    assert(total_w);
79    double sum_w = 0;
80    std::vector<utility::DataWeight>::const_iterator iter(sortedvec.begin());
81    for (unsigned int i=0; i<N; ++i) {
82      statistics::AveragerWeighted av;
83      double end_sum_w = (i+1) * total_w / N - sum_w; 
84      if (i!=N-1) {
85        while(av.sum_w() + iter->weight() <= end_sum_w) {
86          av.add(iter->data(), iter->weight());
87          ++iter;
88        }
89      }
90      // use all remaining data for last bin (to avoid problems
91      // due to rounding errors)
92      else 
93        add(av, iter, sortedvec.end());
94
95      if (av.sum_w() == 0) {
96        std::stringstream ss;
97        ss << "yat::normalizer::qQuantileNormalizer: relative weight too "
98           << "large. See qQuantileNormalizer constructor documentation "
99           << "for details on weights.\n";
100        throw std::runtime_error(ss.str());
101      }
102      average_(i) = av.mean();
103      quantiles_(i)   = (sum_w + 0.5*av.sum_w())/total_w;
104      sum_w += av.sum_w();
105    }
106  }
107
108
109  const utility::Vector& qQuantileNormalizer::Partitioner::averages(void) const
110  {
111    return average_;
112  }
113
114
115  const utility::Vector& qQuantileNormalizer::Partitioner::quantiles(void) const
116  {
117    return quantiles_;
118  }
119
120
121  size_t qQuantileNormalizer::Partitioner::size(void) const
122  {
123    return average_.size();
124  }
125
126}}} // end of namespace normalizer, yat and thep
Note: See TracBrowser for help on using the repository browser.