Changeset 1043


Ignore:
Timestamp:
Apr 23, 2009, 6:55:02 PM (14 years ago)
Author:
Jari Häkkinen
Message:

Addresses #118 and #206. This code produces the same result as the preivous code but fails miserably with missing values.

Location:
plugins/base2/net.sf.basedb.normalizers/trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • plugins/base2/net.sf.basedb.normalizers/trunk/src/c++/bin/qQN.cc

    r1037 r1043  
    2929
    3030#include <yat/utility/CommandLine.h>
    31 #include <yat/utility/Matrix.h>
     31#include <yat/utility/MatrixWeighted.h>
    3232#include <yat/utility/OptionHelp.h>
    3333#include <yat/utility/OptionInFile.h>
     
    4343using namespace theplu::yat::utility;
    4444
    45 void create_target(std::vector<double>&, const Matrix&);
    46 void create_target(std::vector<double>&, const Matrix&, const std::string&);
     45void create_target(std::vector<double>&, const MatrixWeighted&);
     46void create_target(std::vector<double>&, const MatrixWeighted&,
     47                   const std::string&);
     48/**
     49   writes the data values in the matrix ignoring the weights, i.e.,
     50   produces the same output as the Matrix output operator does.
     51 */
     52std::ostream& operator<< (std::ostream&, const MatrixWeighted&);
    4753
    4854
     
    7985    return EXIT_SUCCESS;
    8086  }
    81 
    8287  std::ifstream* infile=NULL;
    8388  std::streambuf* cin_buffer=NULL;
     
    8792    std::cin.rdbuf(infile->rdbuf());
    8893  }
    89   Matrix m(std::cin,'\t');
     94  MatrixWeighted m(std::cin,'\t');
    9095  if (indata.present()) {
    9196    std::cin.rdbuf(cin_buffer); // restore old input buffer
     
    9499  }
    95100
    96   std::vector<double> target(m.rows(),0);
     101  std::vector<double> target(m.rows());
    97102  ( assay.present() ? create_target(target,m,assay.value()) :
    98103                      create_target(target,m) );
    99104  qQuantileNormalizer qqn(target.begin(), target.end(), 100);
    100105  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
    101   Matrix result(m.rows(),m.columns());
     106  MatrixWeighted result(m.rows(),m.columns());
    102107  cn(m,result);
    103108
     
    120125
    121126
    122 void create_target(std::vector<double>& t, const Matrix& m,
     127void create_target(std::vector<double>& t, const MatrixWeighted& m,
    123128                   const std::string& assay)
    124129{
     
    126131  std::string line;
    127132  size_t column=0;
    128   size_t yes=0;
     133  std::vector<size_t> yes(m.rows(),0);
    129134  for (size_t row=0; row<m.rows(); ++row)
    130135    t[row]=0;
     
    133138    if (found!=std::string::npos) {
    134139      for (size_t row=0; row<m.rows(); ++row)
    135         t[row]+=m(row,column);
    136       ++yes;
     140        if (m(row,column).weight()) { // weight either 0 or 1
     141          t[row]+=m(row,column).data();
     142          ++yes[row];
     143        }
    137144    }
    138145    ++column;
     
    140147      throw std::runtime_error("Too many annotation columns wrt data matrix");
    141148  }
    142   if (!yes)
    143     throw std::runtime_error("No columns marked as reference");
    144   for (size_t row=0; row<m.rows(); ++row)
    145     t[row]/=yes;
     149  for (size_t row=0; row<m.rows(); ++row) {
     150    if (!yes[row])
     151      throw std::runtime_error("At least one row with no valid reference");
     152    t[row]/=yes[row];
     153  }
    146154}
    147155
    148156
    149 void create_target(std::vector<double>& t, const Matrix& m)
     157void create_target(std::vector<double>& t, const MatrixWeighted& m)
    150158{
    151159  for (size_t row=0; row<m.rows(); ++row) {
    152160    t[row]=0;
    153161    for (size_t column=0; column<m.columns(); ++column)
    154       t[row]+=m(row,column);
     162      t[row]+=m(row,column).data();
    155163    t[row]/=m.columns();
    156164  }
    157165}
     166
     167
     168std::ostream& operator<< (std::ostream& s, const MatrixWeighted& m)
     169{
     170  s.setf(std::ios::dec);
     171  s.precision(12);
     172  for(size_t i=0, j=0; i<m.rows(); i++)
     173    for (j=0; j<m.columns(); j++) {
     174      s << m(i,j).data();
     175      if (j<m.columns()-1)
     176        s << s.fill();
     177      else if (i<m.rows()-1)
     178        s << "\n";
     179    }
     180  return s;
     181}
  • plugins/base2/net.sf.basedb.normalizers/trunk/src/net/sf/basedb/plugins/qQuantileNormalization.java

    r1036 r1043  
    263263          // C/C++ style string nan's (Java expects NaN) will cause a
    264264          // NumberFormatException which also an emtpy string
    265           // does. However, we do not expect missing values in the
    266           // resulting matrix. Postponing exception catching until
    267           // they start occuring.
    268           batcher.insert(column.get(i), position,
    269                          Float.parseFloat(lineSplit[i]));
     265          // does.
     266          try
     267          {
     268            float value=Float.parseFloat(lineSplit[i]);
     269            if (value!=Float.NaN) batcher.insert(column.get(i), position, value);
     270          }
     271          catch (NumberFormatException e)
     272          {
     273            // Assume all NumberFormatExceptions are triggered by
     274            // nan's in the result file. Nan's in the result file
     275            // represents a missing value and should not be stored in
     276            // BASE since BASE missing values are really missing in
     277            // BASE.
     278          }
    270279        }
    271280      }
Note: See TracChangeset for help on using the changeset viewer.