source: plugins/base2/net.sf.basedb.normalizers/trunk/src/c++/bin/qQN.cc @ 1044

Last change on this file since 1044 was 1044, checked in by Jari Häkkinen, 13 years ago

Addresses #118 and #206. If no assay data is given use the average of all assays as reference ... fixed create_target for this case.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.5 KB
Line 
1// $Id: qQN.cc 1044 2009-04-23 17:09:51Z jari $
2
3/*
4  Copyright (C) 2009 Jari Häkkinen
5
6  This file is part of the Normalizers plug-in package for BASE
7  (net.sf.based.normalizers). The package is available at
8  http://baseplugins.thep.lu.se/ BASE main site is
9  http://base.thep.lu.se/
10
11  This is free software; you can redistribute it and/or modify it
12  under the terms of the GNU General Public License as published by
13  the Free Software Foundation; either version 3 of the License, or
14  (at your option) any later version.
15
16  The software is distributed in the hope that it will be useful, but
17  WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <config.h> // this header file is created by configure
26
27#include <yat/normalizer/ColumnNormalizer.h>
28#include <yat/normalizer/qQuantileNormalizer.h>
29
30#include <yat/utility/CommandLine.h>
31#include <yat/utility/MatrixWeighted.h>
32#include <yat/utility/OptionHelp.h>
33#include <yat/utility/OptionInFile.h>
34#include <yat/utility/OptionOutFile.h>
35#include <yat/utility/OptionSwitch.h>
36
37#include <cstdlib>
38#include <fstream>
39#include <iostream>
40#include <stdexcept>
41
42using namespace theplu::yat::normalizer;
43using namespace theplu::yat::utility;
44
45void create_target(std::vector<double>&, const MatrixWeighted&);
46void create_target(std::vector<double>&, const MatrixWeighted&,
47                   const std::string&);
48/**
49   writes the data values in the matrix ignoring the weights, i.e.,
50   produces the same output as the Matrix output operator does.
51 */
52std::ostream& operator<< (std::ostream&, const MatrixWeighted&);
53
54
55int main(int argc, char* argv[])
56{
57  CommandLine cmd;
58  OptionInFile assay(cmd, "assay-data", "assay annotations");
59  OptionInFile indata(cmd, "in-data", "data to be normalized");
60  OptionOutFile outdata(cmd, "out-data", "normalized data");
61  OptionHelp help(cmd);
62  help.synopsis()=(std::string("See ") +
63                   "http://baseplugins.thep.lu.se/net.sf.basedb.normalizers " +
64                   "for\ndetails on this program\n");
65  OptionSwitch version(cmd, "version", "output version and exit");
66  std::stringstream copyright;
67  copyright << PACKAGE_STRING << '\n'
68            << "Copyright (C) 2009 Jari Häkkinen\n\n"
69            << "This is free software see the source for copying "
70            << "conditions. There is NO\nwarranty; not even for "
71            << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
72  try {
73    cmd.parse(argc, argv);
74  }
75  catch (cmd_error& e) {
76    if (version.present()) {
77      std::cout << copyright.str();
78      return EXIT_SUCCESS;
79    }
80    std::cout << e.what() << std::endl;
81    return EXIT_FAILURE;
82  }
83  if (version.present()) {
84    std::cout << copyright.str();
85    return EXIT_SUCCESS;
86  }
87  std::ifstream* infile=NULL;
88  std::streambuf* cin_buffer=NULL;
89  if (indata.present()) {
90    infile=new std::ifstream(indata.value().c_str());
91    cin_buffer = std::cin.rdbuf(); // save cin's input buffer
92    std::cin.rdbuf(infile->rdbuf());
93  }
94  MatrixWeighted m(std::cin,'\t');
95  if (indata.present()) {
96    std::cin.rdbuf(cin_buffer); // restore old input buffer
97    infile->close();
98    delete infile;
99  }
100
101  std::vector<double> target(m.rows());
102  ( assay.present() ? create_target(target,m,assay.value()) :
103                      create_target(target,m) );
104  qQuantileNormalizer qqn(target.begin(), target.end(), 100);
105  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
106  MatrixWeighted result(m.rows(),m.columns());
107  cn(m,result);
108
109  std::ofstream* outfile=NULL;
110  std::streambuf* cout_buffer = std::cout.rdbuf();
111  if (outdata.present()) {
112    outfile=new std::ofstream(outdata.value().c_str());
113    cout_buffer = std::cout.rdbuf(); // save cout's output buffer
114    std::cout.rdbuf(outfile->rdbuf());
115  }
116  std::cout << result << std::endl;
117  if (outdata.present()) {
118    std::cout.rdbuf(cout_buffer); // restore old output buffer
119    outfile->close();
120    delete outfile;
121  }
122
123  return EXIT_SUCCESS;
124}
125
126
127void create_target(std::vector<double>& t, const MatrixWeighted& m,
128                   const std::string& assay)
129{
130  std::ifstream is(assay.c_str());
131  std::string line;
132  size_t column=0;
133  std::vector<size_t> yes(m.rows(),0);
134  for (size_t row=0; row<m.rows(); ++row)
135    t[row]=0;
136  while (getline(is, line)) {
137    size_t found=line.find("yes");
138    if (found!=std::string::npos) {
139      for (size_t row=0; row<m.rows(); ++row)
140        if (m(row,column).weight()) { // weight either 0 or 1
141          t[row]+=m(row,column).data();
142          ++yes[row];
143        }
144    }
145    ++column;
146    if (column>m.columns())
147      throw std::runtime_error("Too many annotation columns wrt data matrix");
148  }
149  for (size_t row=0; row<m.rows(); ++row) {
150    if (!yes[row])
151      throw std::runtime_error("At least one row with no valid reference");
152    t[row]/=yes[row];
153  }
154}
155
156
157void create_target(std::vector<double>& t, const MatrixWeighted& m)
158{
159  for (size_t row=0; row<m.rows(); ++row) {
160    t[row]=0;
161    size_t column_contribs=0;
162    for (size_t column=0; column<m.columns(); ++column)
163      if (m(row,column).weight()) { // weight either 0 or 1
164        t[row]+=m(row,column).data();
165        ++column_contribs;
166      }
167    if (!column_contribs)
168      throw std::runtime_error("At least one row with no valid reference");
169    t[row]/=column_contribs;
170  }
171}
172
173
174std::ostream& operator<< (std::ostream& s, const MatrixWeighted& m)
175{
176  s.setf(std::ios::dec);
177  s.precision(12);
178  for(size_t i=0, j=0; i<m.rows(); i++)
179    for (j=0; j<m.columns(); j++) {
180      s << m(i,j).data();
181      if (j<m.columns()-1)
182        s << s.fill();
183      else if (i<m.rows()-1)
184        s << "\n";
185    }
186  return s;
187}
Note: See TracBrowser for help on using the repository browser.