source: plugins/base2/net.sf.basedb.normalizers/trunk/src/c++/bin/qQN.cc @ 1050

Last change on this file since 1050 was 1050, checked in by Jari Häkkinen, 13 years ago

Addresses #118. Non-logged input values are expected, geometric mean is used for averaging.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.7 KB
Line 
1// $Id: qQN.cc 1050 2009-05-06 09:11:59Z jari $
2
3/*
4  Copyright (C) 2009 Jari Häkkinen
5
6  This file is part of the Normalizers plug-in package for BASE
7  (net.sf.based.normalizers). The package is available at
8  http://baseplugins.thep.lu.se/ BASE main site is
9  http://base.thep.lu.se/
10
11  This is free software; you can redistribute it and/or modify it
12  under the terms of the GNU General Public License as published by
13  the Free Software Foundation; either version 3 of the License, or
14  (at your option) any later version.
15
16  The software is distributed in the hope that it will be useful, but
17  WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <config.h> // this header file is created by configure
26
27#include <yat/normalizer/ColumnNormalizer.h>
28#include <yat/normalizer/qQuantileNormalizer.h>
29
30#include <yat/utility/CommandLine.h>
31#include <yat/utility/MatrixWeighted.h>
32#include <yat/utility/OptionHelp.h>
33#include <yat/utility/OptionInFile.h>
34#include <yat/utility/OptionOutFile.h>
35#include <yat/utility/OptionSwitch.h>
36
37#include <cstdlib>
38#include <fstream>
39#include <iostream>
40#include <stdexcept>
41
42using namespace theplu::yat::normalizer;
43using namespace theplu::yat::utility;
44
45
46void create_target_and_fix_nans(std::vector<double>&, MatrixWeighted&);
47void create_target_and_fix_nans(std::vector<double>&, MatrixWeighted&,
48                                const std::string&);
49
50void exp_numbers(MatrixWeighted&);
51template<typename Iterator> void log_numbers(Iterator first, Iterator last);
52void log_numbers(MatrixWeighted&);
53
54/**
55   writes the data values in the matrix ignoring the weights, i.e.,
56   produces the same output as the Matrix output operator does.
57 */
58std::ostream& operator<< (std::ostream&, const MatrixWeighted&);
59
60
61int main(int argc, char* argv[])
62{
63  CommandLine cmd;
64  OptionInFile assay(cmd, "assay-data", "assay annotations");
65  OptionInFile indata(cmd, "in-data", "data to be normalized");
66  OptionOutFile outdata(cmd, "out-data", "normalized data");
67  OptionHelp help(cmd);
68  help.synopsis()=(std::string("See ") +
69                   "http://baseplugins.thep.lu.se/net.sf.basedb.normalizers " +
70                   "for\ndetails on this program\n");
71  OptionSwitch version(cmd, "version", "output version and exit");
72  std::stringstream copyright;
73  copyright << PACKAGE_STRING << '\n'
74            << "Copyright (C) 2009 Jari Häkkinen\n\n"
75            << "This is free software see the source for copying "
76            << "conditions. There is NO\nwarranty; not even for "
77            << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
78  try {
79    cmd.parse(argc, argv);
80  }
81  catch (cmd_error& e) {
82    if (version.present()) {
83      std::cout << copyright.str();
84      return EXIT_SUCCESS;
85    }
86    std::cout << e.what() << std::endl;
87    return EXIT_FAILURE;
88  }
89  if (version.present()) {
90    std::cout << copyright.str();
91    return EXIT_SUCCESS;
92  }
93  std::ifstream* infile=NULL;
94  std::streambuf* cin_buffer=NULL;
95  if (indata.present()) {
96    infile=new std::ifstream(indata.value().c_str());
97    cin_buffer = std::cin.rdbuf(); // save cin's input buffer
98    std::cin.rdbuf(infile->rdbuf());
99  }
100  MatrixWeighted m(std::cin,'\t');
101  if (indata.present()) {
102    std::cin.rdbuf(cin_buffer); // restore old input buffer
103    infile->close();
104    delete infile;
105  }
106
107  std::vector<double> target;
108  ( assay.present() ? create_target_and_fix_nans(target,m,assay.value()) :
109                      create_target_and_fix_nans(target,m) );
110  log_numbers(target.begin(),target.end());
111  log_numbers(m);
112  qQuantileNormalizer qqn(target.begin(), target.end(), 100);
113  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
114  MatrixWeighted result(m.rows(),m.columns());
115  cn(m,result);
116  exp_numbers(result);
117
118  std::ofstream* outfile=NULL;
119  std::streambuf* cout_buffer = std::cout.rdbuf();
120  if (outdata.present()) {
121    outfile=new std::ofstream(outdata.value().c_str());
122    cout_buffer = std::cout.rdbuf(); // save cout's output buffer
123    std::cout.rdbuf(outfile->rdbuf());
124  }
125  std::cout << result << std::endl;
126  if (outdata.present()) {
127    std::cout.rdbuf(cout_buffer); // restore old output buffer
128    outfile->close();
129    delete outfile;
130  }
131
132  return EXIT_SUCCESS;
133}
134
135
136void create_target_and_fix_nans(std::vector<double>& t, MatrixWeighted& m,
137                                const std::string& assay)
138{
139  std::ifstream is(assay.c_str());
140  std::string line;
141  size_t column=0;
142  std::vector<size_t> column_contribs(m.rows(),0);
143  std::vector<double> temp_target(m.rows(),0.0);
144  while (getline(is, line)) {
145    size_t found=line.find("yes");
146
147    if (found!=std::string::npos) {
148      // use this assay as a part of reference
149      for (size_t row=0; row<m.rows(); ++row)
150        if (m(row,column).weight()) { // weight either 0 or 1
151          temp_target[row]+=m(row,column).data();
152          ++column_contribs[row];
153        }
154        else
155          m(row,column).data()=0;
156    }
157    else
158      // this assay as not a part of the reference but fix nan
159      for (size_t row=0; row<m.rows(); ++row)
160        if (!m(row,column).weight()) // weight either 0 or 1
161          m(row,column).data()=0;
162
163    ++column;
164    if (column>m.columns())
165      throw std::runtime_error("Too many annotation columns wrt data matrix");
166  }
167  t.reserve(m.rows());
168  for (size_t row=0; row<m.rows(); ++row)
169    if (column_contribs[row])
170      t.push_back(temp_target[row]/=column_contribs[row]);
171  if (!t.size())
172    throw std::runtime_error("Not a well defined reference, aborting");
173}
174
175
176void create_target_and_fix_nans(std::vector<double>& t, MatrixWeighted& m)
177{
178  std::vector<double> temp_target(m.rows(),0.0);
179  t.reserve(m.rows());
180  for (size_t row=0; row<m.rows(); ++row) {
181    size_t column_contribs=0;
182    for (size_t column=0; column<m.columns(); ++column)
183      if (m(row,column).weight()) { // weight either 0 or 1
184        temp_target[row]+=m(row,column).data();
185        ++column_contribs;
186      }
187      else
188        m(row,column).data()=0;
189    if (column_contribs)
190      t.push_back(temp_target[row]/=column_contribs);
191  }
192  if (!t.size())
193    throw std::runtime_error("Not a well defined reference, aborting");
194}
195
196
197void exp_numbers(MatrixWeighted& m)
198{
199  MatrixWeighted::iterator i=m.begin();
200  while (i!=m.end()) {
201    i->data() =std::exp(i->data());
202    ++i;
203  }
204}
205
206
207template<typename Iterator> void log_numbers(Iterator i, Iterator last)
208{
209  while (i!=last) {
210    *i=std::log(*i);
211    ++i;
212  }
213}
214
215
216void log_numbers(MatrixWeighted& m)
217{
218  MatrixWeighted::iterator i=m.begin();
219  while (i!=m.end()) {
220    i->data() =std::log(i->data());
221    ++i;
222  }
223}
224
225
226std::ostream& operator<< (std::ostream& s, const MatrixWeighted& m)
227{
228  s.setf(std::ios::dec);
229  s.precision(12);
230  for(size_t i=0, j=0; i<m.rows(); i++)
231    for (j=0; j<m.columns(); j++) {
232      if (m(i,j).weight())
233        s << m(i,j).data();
234      else
235        s << "nan";
236      if (j<m.columns()-1)
237        s << s.fill();
238      else if (i<m.rows()-1)
239        s << "\n";
240    }
241  return s;
242}
Note: See TracBrowser for help on using the repository browser.