Changeset 1062


Ignore:
Timestamp:
May 13, 2009, 8:09:05 AM (12 years ago)
Author:
Jari Häkkinen
Message:

Addresses #118. Added support to use either mean or median in target calculation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • plugins/base2/net.sf.basedb.normalizers/trunk/src/c++/bin/qQN.cc

    r1059 r1062  
    3535#include <yat/utility/OptionSwitch.h>
    3636#include <yat/utility/stl_utility.h>
     37#include <yat/statistics/utility.h>
    3738
    3839#include <cstdlib>
     
    4546
    4647
    47 void create_target(std::vector<double>&, const MatrixWeighted&);
     48void create_target(std::vector<double>&, const MatrixWeighted&, bool use_median);
    4849void create_target(std::vector<double>&, const MatrixWeighted&,
    49                    const std::string&);
     50                   const std::string&, bool use_median);
    5051
    5152/**
     
    6263  OptionInFile indata(cmd, "in-data", "data to be normalized");
    6364  OptionOutFile outdata(cmd, "out-data", "normalized data");
     65  OptionSwitch target_median(cmd, "median",
     66                             "use median instead of mean for targets", false);
    6467  OptionHelp help(cmd);
    6568  help.synopsis()=(std::string("See ") +
     
    103106
    104107  std::vector<double> target;
    105   ( assay.present() ? create_target(target,m,assay.value()) :
    106                       create_target(target,m) );
     108  ( assay.present() ?
     109    create_target(target, m, assay.value(), target_median.value()) :
     110    create_target(target, m, target_median.value()) );
    107111  std::transform(target.begin(), target.end(),
    108112                 target.begin(), theplu::yat::utility::Log<double>());
     
    135139
    136140void create_target(std::vector<double>& t, const MatrixWeighted& m,
    137                    const std::string& assay)
    138 {
     141                   const std::string& assay, bool use_median)
     142{
     143  std::vector< std::vector<double> > calc_support(m.rows());
     144  for (std::vector< std::vector<double> >::iterator i=calc_support.begin();
     145       i!=calc_support.end(); ++i)
     146    i->reserve(m.columns());
     147
    139148  std::ifstream is(assay.c_str());
    140149  std::string line;
    141150  size_t column=0;
    142   std::vector<size_t> column_contribs(m.rows(),0);
    143   std::vector<double> temp_target(m.rows(),0.0);
    144151  while (getline(is, line)) {
    145152    size_t found=line.find("yes");
    146     if (found!=std::string::npos) {
    147       // use this assay as a part of reference
     153    if (found!=std::string::npos) // use this assay as a part of reference
    148154      for (size_t row=0; row<m.rows(); ++row)
    149         if (m(row,column).weight()) { // weight either 0 or 1
    150           temp_target[row]+=m(row,column).data();
    151           ++column_contribs[row];
    152         }
    153     }
     155        if (m(row,column).weight())       // weight either 0 or 1
     156          calc_support[row].push_back(m(row,column).data());
    154157    ++column;
    155158    if (column>m.columns())
    156159      throw std::runtime_error("Too many annotation columns wrt data matrix");
    157160  }
     161
    158162  t.reserve(m.rows());
    159   for (size_t row=0; row<m.rows(); ++row)
    160     if (column_contribs[row])
    161       t.push_back(temp_target[row]/=column_contribs[row]);
     163  for (size_t row=0; row<m.rows(); ++row) {
     164    const std::vector<double>& cs=calc_support[row];
     165    if (!cs.empty())
     166      t.push_back( use_median ?
     167                   theplu::yat::statistics::median(cs.begin(), cs.end()) :
     168                   std::accumulate(cs.begin(), cs.end(), 0.0) / cs.size() );
     169  }
    162170  if (!t.size())
    163171    throw std::runtime_error("Not a well defined reference, aborting");
     
    165173
    166174
    167 void create_target(std::vector<double>& t, const MatrixWeighted& m)
    168 {
    169   std::vector<double> temp_target(m.rows(),0.0);
     175void create_target(std::vector<double>& t, const MatrixWeighted& m,
     176                   bool use_median)
     177{
    170178  t.reserve(m.rows());
     179  std::vector<double> cs;
     180  cs.reserve(m.columns());
    171181  for (size_t row=0; row<m.rows(); ++row) {
    172     size_t column_contribs=0;
     182    cs.clear();
    173183    for (size_t column=0; column<m.columns(); ++column)
    174       if (m(row,column).weight()) { // weight either 0 or 1
    175         temp_target[row]+=m(row,column).data();
    176         ++column_contribs;
    177       }
    178     if (column_contribs)
    179       t.push_back(temp_target[row]/=column_contribs);
     184      if (m(row,column).weight())     // weight either 0 or 1
     185        cs.push_back(m(row,column).data());
     186    if (!cs.empty())
     187      t.push_back( use_median ?
     188                   theplu::yat::statistics::median(cs.begin(), cs.end()) :
     189                   std::accumulate(cs.begin(), cs.end(), 0.0) / cs.size() );
    180190  }
    181191  if (!t.size())
Note: See TracChangeset for help on using the changeset viewer.