source: plugins/base2/net.sf.basedb.normalizers/trunk/src/c++/bin/qQN.cc @ 1220

Last change on this file since 1220 was 1220, checked in by Jari Häkkinen, 13 years ago

Write error messages to stderr

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 9.3 KB
Line 
1// $Id: qQN.cc 1220 2010-04-07 14:38:43Z jari $
2
3/*
4  Copyright (C) 2009 Jari Häkkinen
5
6  This file is part of the Normalizers plug-in package for BASE
7  (net.sf.based.normalizers). The package is available at
8  http://baseplugins.thep.lu.se/ BASE main site is
9  http://base.thep.lu.se/
10
11  This is free software; you can redistribute it and/or modify it
12  under the terms of the GNU General Public License as published by
13  the Free Software Foundation; either version 3 of the License, or
14  (at your option) any later version.
15
16  The software is distributed in the hope that it will be useful, but
17  WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <config.h> // this header file is created by configure
26
27#include <yat/normalizer/ColumnNormalizer.h>
28#include <yat/normalizer/qQuantileNormalizer.h>
29
30#include <yat/utility/CommandLine.h>
31#include <yat/utility/MatrixWeighted.h>
32#include <yat/utility/OptionHelp.h>
33#include <yat/utility/OptionInFile.h>
34#include <yat/utility/OptionOutFile.h>
35#include <yat/utility/OptionSwitch.h>
36#include <yat/utility/stl_utility.h>
37#include <yat/statistics/utility.h>
38
39#include <cstdlib>
40#include <fstream>
41#include <functional>
42#include <iostream>
43#include <limits>
44#include <stdexcept>
45
46using namespace theplu::yat::normalizer;
47using namespace theplu::yat::utility;
48
49
50class CleanUpMatrix : std::unary_function<DataWeight, DataWeight>
51{
52public:
53  CleanUpMatrix(void) {}
54
55  inline DataWeight operator()(DataWeight x) const
56    { return ( x.data()>0 ?
57               x : DataWeight(std::numeric_limits<double>::quiet_NaN(),0.0) ); }
58};
59
60
61void create_matrix_until_183_fixed(MatrixWeighted**, const MatrixWeighted&,
62                                   const std::string&);
63void create_target(std::vector<double>&, const MatrixWeighted&, bool use_median);
64void create_target(std::vector<double>&, const MatrixWeighted&,
65                   const std::string&, bool use_median);
66
67/**
68   writes the data values in the matrix ignoring the weights, i.e.,
69   produces the same output as the Matrix output operator does.
70 */
71std::ostream& operator<< (std::ostream&, const MatrixWeighted&);
72
73
74int main(int argc, char* argv[])
75{
76  CommandLine cmd;
77  OptionInFile assay(cmd, "assay-data", "assay annotations");
78  OptionInFile indata(cmd, "in-data", "data to be normalized");
79  OptionOutFile outdata(cmd, "out-data", "normalized data");
80  OptionSwitch target_median(cmd, "median",
81                             "use median instead of mean for targets", false);
82  OptionHelp help(cmd);
83  help.synopsis()=(std::string("See ") +
84                   "http://baseplugins.thep.lu.se/net.sf.basedb.normalizers " +
85                   "for\ndetails on this program.\n\n" +
86                   "All assays are used to calculate the target if no assay " +
87                   "annotion file is set.\n" +
88                   "The assay file format is pair information: " +
89                   "assay_no\\tyes/no\n" +
90                   "where assay_no is the assay number starting at 1, yes " +
91                   "indicates that it should\n" +
92                   "be used in target calculation, and no indicates that it " +
93                   "should not be a part\n" +
94                   "of target.\n");
95  OptionSwitch version(cmd, "version", "output version and exit");
96  std::stringstream copyright;
97  copyright << PACKAGE_STRING << '\n'
98            << "Copyright (C) 2009 Jari Häkkinen\n\n"
99            << "This is free software see the source for copying "
100            << "conditions. There is NO\nwarranty; not even for "
101            << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
102  try {
103    cmd.parse(argc, argv);
104  }
105  catch (cmd_error& e) {
106    if (version.present()) {
107      std::cout << copyright.str();
108      return EXIT_SUCCESS;
109    }
110    std::cerr << e.what() << std::endl;
111    return EXIT_FAILURE;
112  }
113  if (version.present()) {
114    std::cout << copyright.str();
115    return EXIT_SUCCESS;
116  }
117  std::ifstream* infile=NULL;
118  std::streambuf* cin_buffer=NULL;
119  if (indata.present()) {
120    infile=new std::ifstream(indata.value().c_str());
121    cin_buffer = std::cin.rdbuf(); // save cin's input buffer
122    std::cin.rdbuf(infile->rdbuf());
123  }
124
125  // The BASE core API deprecated exportPlainMatrix method of the
126  // deprecated BioAssaySetExporter does not behave well when
127  // exporting bioassaysets where assays have been filtered out in
128  // some filter step. Variables and functions with 183 in their name
129  // should be removed when http://baseplugins.thep.lu.se/ticket/183
130  // is fixed.
131
132  // m is a pointer below, should be changed to a normal variable when
133  // http://baseplugins.thep.lu.se/ticket/183 is fixed.
134
135  // Change to m when http://baseplugins.thep.lu.se/ticket/183 fixed.
136  MatrixWeighted m_tmp_until_183_fixed(std::cin,'\t');
137  if (indata.present()) {
138    std::cin.rdbuf(cin_buffer); // restore old input buffer
139    infile->close();
140    delete infile;
141  }
142  // Remove until HERE below when
143  // http://baseplugins.thep.lu.se/ticket/183 fixed.
144  MatrixWeighted* m=NULL;
145  if (assay.present())
146    create_matrix_until_183_fixed(&m, m_tmp_until_183_fixed, assay.value());
147  else
148    m = &m_tmp_until_183_fixed;
149  // HERE remove until here.
150
151  std::transform(m->begin(), m->end(), m->begin(), CleanUpMatrix());
152  std::vector<double> target;
153  ( assay.present() ?
154    create_target(target, *m, assay.value(), target_median.value()) :
155    create_target(target, *m, target_median.value()) );
156  std::transform(target.begin(), target.end(),
157                 target.begin(), theplu::yat::utility::Log<double>());
158  std::transform(data_iterator(m->begin()), data_iterator(m->end()),
159                 data_iterator(m->begin()), theplu::yat::utility::Log<double>());
160  // q = min(100,target_size/10) but no smaller than 10
161  unsigned int q=target.size()/10;
162  q = q>100 ? 100 : ( q<10 ? 10 : q );
163  qQuantileNormalizer qqn(target.begin(), target.end(), q);
164  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
165  MatrixWeighted result(m->rows(),m->columns());
166  cn(*m,result);
167  std::transform(data_iterator(result.begin()), data_iterator(result.end()),
168                 data_iterator(result.begin()), theplu::yat::utility::Exp<double>());
169
170  std::ofstream* outfile=NULL;
171  std::streambuf* cout_buffer = std::cout.rdbuf();
172  if (outdata.present()) {
173    outfile=new std::ofstream(outdata.value().c_str());
174    cout_buffer = std::cout.rdbuf(); // save cout's output buffer
175    std::cout.rdbuf(outfile->rdbuf());
176  }
177  std::cout << result << std::endl;
178  if (outdata.present()) {
179    std::cout.rdbuf(cout_buffer); // restore old output buffer
180    outfile->close();
181    delete outfile;
182  }
183
184  // clean up until http://baseplugins.thep.lu.se/ticket/183 fixed.
185  if (assay.present())
186    delete m;
187
188  return EXIT_SUCCESS;
189}
190
191
192void create_matrix_until_183_fixed(MatrixWeighted** m,
193                                   const MatrixWeighted& m_tmp_183,
194                                   const std::string& assay)
195{
196  std::ifstream is(assay.c_str());
197  std::vector<size_t> idx;
198  while (is.good()) {
199    size_t i;
200    is >> i;
201    if (is.good()) idx.push_back(i);
202    std::string line;
203    getline(is, line);
204    if (idx.size()>m_tmp_183.columns())
205      throw std::runtime_error(std::string("183_tmp: Too many annotation ") +
206                               "columns wrt data matrix");
207  }
208
209  if (*m) delete *m;
210  size_t rows=m_tmp_183.rows();
211  size_t cols=idx.size();
212  *m=new MatrixWeighted(rows,cols);
213  for (size_t i=0; i<rows; ++i)
214    for (size_t j=0; j<cols; ++j)
215      (**m)(i,j)=m_tmp_183(i,idx[j]-1);
216}
217
218
219void create_target(std::vector<double>& t, const MatrixWeighted& m,
220                   const std::string& assay, bool use_median)
221{
222  std::vector< std::vector<double> > calc_support(m.rows());
223  for (std::vector< std::vector<double> >::iterator i=calc_support.begin();
224       i!=calc_support.end(); ++i)
225    i->reserve(m.columns());
226
227  std::ifstream is(assay.c_str());
228  std::string line;
229  size_t column=0;
230  while (getline(is, line)) {
231    size_t found=line.find("yes");
232    if (found!=std::string::npos) // use this assay as a part of reference
233      for (size_t row=0; row<m.rows(); ++row)
234        if (m(row,column).weight())       // weight either 0 or 1
235          calc_support[row].push_back(m(row,column).data());
236    ++column;
237    if (column>m.columns())
238      throw std::runtime_error("Too many annotation columns wrt data matrix");
239  }
240
241  t.reserve(m.rows());
242  for (size_t row=0; row<m.rows(); ++row) {
243    const std::vector<double>& cs=calc_support[row];
244    if (!cs.empty())
245      t.push_back( use_median ?
246                   theplu::yat::statistics::median(cs.begin(), cs.end()) :
247                   std::accumulate(cs.begin(), cs.end(), 0.0) / cs.size() );
248  }
249  if (!t.size())
250    throw std::runtime_error("Not a well defined reference, aborting");
251}
252
253
254void create_target(std::vector<double>& t, const MatrixWeighted& m,
255                   bool use_median)
256{
257  t.reserve(m.rows());
258  std::vector<double> cs;
259  cs.reserve(m.columns());
260  for (size_t row=0; row<m.rows(); ++row) {
261    cs.clear();
262    for (size_t column=0; column<m.columns(); ++column)
263      if (m(row,column).weight())      // weight either 0 or 1
264        cs.push_back(m(row,column).data());
265    if (!cs.empty())
266      t.push_back( use_median ?
267                   theplu::yat::statistics::median(cs.begin(), cs.end()) :
268                   std::accumulate(cs.begin(), cs.end(), 0.0) / cs.size() );
269  }
270  if (!t.size())
271    throw std::runtime_error("Not a well defined reference, aborting");
272}
273
274
275std::ostream& operator<< (std::ostream& s, const MatrixWeighted& m)
276{
277  s.setf(std::ios::dec);
278  s.precision(12);
279  for(size_t i=0, j=0; i<m.rows(); i++)
280    for (j=0; j<m.columns(); j++) {
281      s << m(i,j).data();
282      if (j<m.columns()-1)
283        s << s.fill();
284      else if (i<m.rows()-1)
285        s << "\n";
286    }
287  return s;
288}
Note: See TracBrowser for help on using the repository browser.