source: plugins/base2/net.sf.basedb.normalizers/trunk/src/c++/bin/qQN.cc @ 1067

Last change on this file since 1067 was 1067, checked in by Jari Häkkinen, 13 years ago

Addresses #118. Forcing negative and 0 intensities to be NaNs? with 0 weight.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.0 KB
Line 
1// $Id: qQN.cc 1067 2009-05-14 17:05:35Z jari $
2
3/*
4  Copyright (C) 2009 Jari Häkkinen
5
6  This file is part of the Normalizers plug-in package for BASE
7  (net.sf.based.normalizers). The package is available at
8  http://baseplugins.thep.lu.se/ BASE main site is
9  http://base.thep.lu.se/
10
11  This is free software; you can redistribute it and/or modify it
12  under the terms of the GNU General Public License as published by
13  the Free Software Foundation; either version 3 of the License, or
14  (at your option) any later version.
15
16  The software is distributed in the hope that it will be useful, but
17  WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <config.h> // this header file is created by configure
26
27#include <yat/normalizer/ColumnNormalizer.h>
28#include <yat/normalizer/qQuantileNormalizer.h>
29
30#include <yat/utility/CommandLine.h>
31#include <yat/utility/MatrixWeighted.h>
32#include <yat/utility/OptionHelp.h>
33#include <yat/utility/OptionInFile.h>
34#include <yat/utility/OptionOutFile.h>
35#include <yat/utility/OptionSwitch.h>
36#include <yat/utility/stl_utility.h>
37#include <yat/statistics/utility.h>
38
39#include <cstdlib>
40#include <fstream>
41#include <functional>
42#include <iostream>
43#include <limits>
44#include <stdexcept>
45
46using namespace theplu::yat::normalizer;
47using namespace theplu::yat::utility;
48
49
50class CleanUpMatrix : std::unary_function<double, double>
51{
52public:
53  CleanUpMatrix(void) {}
54
55  inline DataWeight operator()(DataWeight x) const
56    { return ( x.data()>0 ?
57               x : DataWeight(std::numeric_limits<double>::quiet_NaN(),0.0) ); }
58};
59
60
61void create_target(std::vector<double>&, const MatrixWeighted&, bool use_median);
62void create_target(std::vector<double>&, const MatrixWeighted&,
63                   const std::string&, bool use_median);
64
65/**
66   writes the data values in the matrix ignoring the weights, i.e.,
67   produces the same output as the Matrix output operator does.
68 */
69std::ostream& operator<< (std::ostream&, const MatrixWeighted&);
70
71
72int main(int argc, char* argv[])
73{
74  CommandLine cmd;
75  OptionInFile assay(cmd, "assay-data", "assay annotations");
76  OptionInFile indata(cmd, "in-data", "data to be normalized");
77  OptionOutFile outdata(cmd, "out-data", "normalized data");
78  OptionSwitch target_median(cmd, "median",
79                             "use median instead of mean for targets", false);
80  OptionHelp help(cmd);
81  help.synopsis()=(std::string("See ") +
82                   "http://baseplugins.thep.lu.se/net.sf.basedb.normalizers " +
83                   "for\ndetails on this program\n");
84  OptionSwitch version(cmd, "version", "output version and exit");
85  std::stringstream copyright;
86  copyright << PACKAGE_STRING << '\n'
87            << "Copyright (C) 2009 Jari Häkkinen\n\n"
88            << "This is free software see the source for copying "
89            << "conditions. There is NO\nwarranty; not even for "
90            << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
91  try {
92    cmd.parse(argc, argv);
93  }
94  catch (cmd_error& e) {
95    if (version.present()) {
96      std::cout << copyright.str();
97      return EXIT_SUCCESS;
98    }
99    std::cout << e.what() << std::endl;
100    return EXIT_FAILURE;
101  }
102  if (version.present()) {
103    std::cout << copyright.str();
104    return EXIT_SUCCESS;
105  }
106  std::ifstream* infile=NULL;
107  std::streambuf* cin_buffer=NULL;
108  if (indata.present()) {
109    infile=new std::ifstream(indata.value().c_str());
110    cin_buffer = std::cin.rdbuf(); // save cin's input buffer
111    std::cin.rdbuf(infile->rdbuf());
112  }
113  MatrixWeighted m(std::cin,'\t');
114  if (indata.present()) {
115    std::cin.rdbuf(cin_buffer); // restore old input buffer
116    infile->close();
117    delete infile;
118  }
119
120  std::transform(m.begin(), m.end(), m.begin(), CleanUpMatrix());
121  std::vector<double> target;
122  ( assay.present() ?
123    create_target(target, m, assay.value(), target_median.value()) :
124    create_target(target, m, target_median.value()) );
125  std::transform(target.begin(), target.end(),
126                 target.begin(), theplu::yat::utility::Log<double>());
127  std::transform(data_iterator(m.begin()), data_iterator(m.end()),
128                 data_iterator(m.begin()), theplu::yat::utility::Log<double>());
129  qQuantileNormalizer qqn(target.begin(), target.end(), 100);
130  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
131  MatrixWeighted result(m.rows(),m.columns());
132  cn(m,result);
133  std::transform(data_iterator(result.begin()), data_iterator(result.end()),
134                 data_iterator(result.begin()), theplu::yat::utility::Exp<double>());
135
136  std::ofstream* outfile=NULL;
137  std::streambuf* cout_buffer = std::cout.rdbuf();
138  if (outdata.present()) {
139    outfile=new std::ofstream(outdata.value().c_str());
140    cout_buffer = std::cout.rdbuf(); // save cout's output buffer
141    std::cout.rdbuf(outfile->rdbuf());
142  }
143  std::cout << result << std::endl;
144  if (outdata.present()) {
145    std::cout.rdbuf(cout_buffer); // restore old output buffer
146    outfile->close();
147    delete outfile;
148  }
149
150  return EXIT_SUCCESS;
151}
152
153
154void create_target(std::vector<double>& t, const MatrixWeighted& m,
155                   const std::string& assay, bool use_median)
156{
157  std::vector< std::vector<double> > calc_support(m.rows());
158  for (std::vector< std::vector<double> >::iterator i=calc_support.begin();
159       i!=calc_support.end(); ++i)
160    i->reserve(m.columns());
161
162  std::ifstream is(assay.c_str());
163  std::string line;
164  size_t column=0;
165  while (getline(is, line)) {
166    size_t found=line.find("yes");
167    if (found!=std::string::npos) // use this assay as a part of reference
168      for (size_t row=0; row<m.rows(); ++row)
169        if (m(row,column).weight())       // weight either 0 or 1
170          calc_support[row].push_back(m(row,column).data());
171    ++column;
172    if (column>m.columns())
173      throw std::runtime_error("Too many annotation columns wrt data matrix");
174  }
175
176  t.reserve(m.rows());
177  for (size_t row=0; row<m.rows(); ++row) {
178    const std::vector<double>& cs=calc_support[row];
179    if (!cs.empty())
180      t.push_back( use_median ?
181                   theplu::yat::statistics::median(cs.begin(), cs.end()) :
182                   std::accumulate(cs.begin(), cs.end(), 0.0) / cs.size() );
183  }
184  if (!t.size())
185    throw std::runtime_error("Not a well defined reference, aborting");
186}
187
188
189void create_target(std::vector<double>& t, const MatrixWeighted& m,
190                   bool use_median)
191{
192  t.reserve(m.rows());
193  std::vector<double> cs;
194  cs.reserve(m.columns());
195  for (size_t row=0; row<m.rows(); ++row) {
196    cs.clear();
197    for (size_t column=0; column<m.columns(); ++column)
198      if (m(row,column).weight())      // weight either 0 or 1
199        cs.push_back(m(row,column).data());
200    if (!cs.empty())
201      t.push_back( use_median ?
202                   theplu::yat::statistics::median(cs.begin(), cs.end()) :
203                   std::accumulate(cs.begin(), cs.end(), 0.0) / cs.size() );
204  }
205  if (!t.size())
206    throw std::runtime_error("Not a well defined reference, aborting");
207}
208
209
210std::ostream& operator<< (std::ostream& s, const MatrixWeighted& m)
211{
212  s.setf(std::ios::dec);
213  s.precision(12);
214  for(size_t i=0, j=0; i<m.rows(); i++)
215    for (j=0; j<m.columns(); j++) {
216      s << m(i,j).data();
217      if (j<m.columns()-1)
218        s << s.fill();
219      else if (i<m.rows()-1)
220        s << "\n";
221    }
222  return s;
223}
Note: See TracBrowser for help on using the repository browser.