Changeset 1738


Ignore:
Timestamp:
Jan 20, 2009, 2:58:44 AM (12 years ago)
Author:
Peter
Message:

added support for creation of an qQuantileNormalizer from a weighted range. Addresses #478

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/test/normalization_test.cc

    r1737 r1738  
    180180  for (size_t i=0; i<100; ++i)
    181181    vec.push_back(utility::DataWeight(i, 1.0));
    182   //qQuantileNormalizer(vec.begin(), vec.end(), 5);
     182  qQuantileNormalizer(vec.begin(), vec.end(), 5);
    183183}
    184184
  • trunk/yat/normalizer/qQuantileNormalizer.cc

    r1736 r1738  
    2424#include "yat/regression/CSplineInterpolation.h"
    2525#include "yat/statistics/Averager.h"
     26#include "yat/statistics/AveragerWeighted.h"
     27#include "yat/utility/DataWeight.h"
    2628#include "yat/utility/Vector.h"
    2729#include "yat/utility/VectorBase.h"
     30#include "yat/utility/WeightIterator.h"
    2831
    2932#include <algorithm>
    3033#include <cassert>
     34#include <numeric>
     35#include <sstream>
     36#include <stdexcept>
     37#include <string>
     38#include <vector>
    3139
    3240namespace theplu {
     
    5765
    5866
     67  void qQuantileNormalizer::Partitioner::init
     68  (const std::vector<utility::DataWeight>& sortedvec, unsigned int N)
     69  {
     70    assert(N>1);
     71    assert(N<=sortedvec.size());
     72    double total_w = std::accumulate(utility::weight_iterator(sortedvec.begin()),
     73                                     utility::weight_iterator(sortedvec.end()),
     74                                     0.0);
     75
     76    assert(total_w);
     77    double sum_w = 0;
     78    std::vector<utility::DataWeight>::const_iterator iter(sortedvec.begin());
     79    for (unsigned int i=0; i<N; ++i) {
     80      statistics::AveragerWeighted av;
     81      double end_sum_w = (i+1) * total_w / N - sum_w;
     82      std::cout << "end_sum_w: " << end_sum_w << std::endl;
     83      if (i!=N-1) {
     84        while(av.sum_w() < end_sum_w) {
     85          av.add(iter->data(), iter->weight());
     86          ++iter;
     87        }
     88      }
     89      // use all remaining data for last bin (to avoid problems
     90      // due to rounding errors)
     91      else
     92        add(av, iter, sortedvec.end());
     93
     94      if (av.sum_w() == 0) {
     95        std::stringstream ss;
     96        ss << "yat::normalizer::qQuantileNormalizer: relative weight too "
     97           << "large in\n";
     98        throw std::runtime_error(ss.str());
     99      }
     100      average_(i) = av.mean();
     101      index_(i)   = sum_w + 0.5*av.sum_w();
     102      sum_w += av.sum_w();
     103    }
     104  }
     105
     106
    59107  const utility::Vector& qQuantileNormalizer::Partitioner::averages(void) const
    60108  {
  • trunk/yat/normalizer/qQuantileNormalizer.h

    r1737 r1738  
    146146    void build(Iterator first, Iterator last, unsigned int N,
    147147               utility::unweighted_iterator_tag);
     148    // weighted "constructor"
     149    template<typename Iterator>
     150    void build(Iterator first, Iterator last, unsigned int N,
     151               utility::weighted_iterator_tag);
    148152    void init(const utility::VectorBase&, unsigned int N);
     153    void init(const std::vector<utility::DataWeight>&, unsigned int N);
    149154
    150155    utility::Vector average_;
     
    256261
    257262
     263  template<typename Iterator>
     264  void qQuantileNormalizer::Partitioner::build(Iterator first,
     265                                               Iterator last, unsigned int N,
     266                                               utility::weighted_iterator_tag)
     267  {
     268    std::vector<utility::DataWeight> vec;
     269    vec.reserve(std::distance(first, last));
     270    std::back_insert_iterator<std::vector<utility::DataWeight> > inserter(vec);
     271    std::copy(first, last, inserter);
     272    std::sort(vec.begin(), vec.end());
     273    init(vec, N);
     274  }
     275
     276
    258277}}} // end of namespace normalizer, yat and thep
    259278
Note: See TracChangeset for help on using the changeset viewer.