source: trunk/test/feature_selection_test.cc @ 865

Last change on this file since 865 was 865, checked in by Peter, 16 years ago

changing URL to http://trac.thep.lu.se/trac/yat

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.5 KB
Line 
1// $Id: feature_selection_test.cc 865 2007-09-10 19:41:04Z peter $
2
3/*
4  Copyright (C) 2006 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2007 Peter Johansson
6
7  This file is part of the yat library, http://trac.thep.lu.se/trac/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22  02111-1307, USA.
23*/
24
25#include "yat/classifier/DataLookupWeighted1D.h"
26#include "yat/classifier/FeatureSelectorIR.h"
27#include "yat/classifier/FeatureSelectorRandom.h"
28#include "yat/classifier/MatrixLookupWeighted.h"
29#include "yat/classifier/Target.h"
30#include "yat/statistics/SNRScore.h"
31
32#include "yat/utility/matrix.h"
33
34#include <algorithm>
35#include <cmath>
36#include <fstream>
37#include <iterator>
38#include <iostream>
39#include <string>
40
41using namespace theplu::yat;
42
43int main(const int argc,const char* argv[])
44{ 
45  std::ostream* error;
46  if (argc>1 && argv[1]==std::string("-v"))
47    error = &std::cerr;
48  else {
49    error = new std::ofstream("/dev/null");
50    if (argc>1)
51      std::cout << "feature_selection -v : for printing extra information\n";
52  }
53  *error << "testing feature_selection" << std::endl;
54  bool ok = true;
55
56  statistics::SNRScore snr;
57  classifier::FeatureSelectorIR f(snr, 12);
58  classifier::FeatureSelectorRandom f2(12);
59
60  *error << "Reading in Sorlie data to identify top gene ..." << std::endl;
61  std::ifstream is("data/sorlie_centroid_data.txt");
62  utility::matrix data(is,'\t');
63  is.close();
64
65  is.open("data/sorlie_centroid_classes.txt");
66  classifier::Target targets(is);
67  is.close();
68
69  *error << "... done" << std::endl;
70
71  // Generate weight matrix with 0 for missing values and 1 for others.
72  utility::matrix weights(data.rows(),data.columns(),0.0);
73  for(size_t i=0;i<data.rows();++i)
74    for(size_t j=0;j<data.columns();++j)
75      if(!std::isnan(data(i,j)))
76        weights(i,j)=1.0;
77 
78  classifier::MatrixLookupWeighted dataviewweighted(data,weights);
79 
80  f2.update(dataviewweighted,targets);
81  *error << "\nRandomly ordered features (top 12):\n";
82  std::vector<size_t> features=f2.features();
83  std::copy(features.begin(),features.end(),
84            std::ostream_iterator<size_t>(*error," ")); 
85  *error << std::endl;
86
87  f.update(dataviewweighted,targets);
88  *error << "\nSNR ordered ordered features (top 12):\n";
89  features=f.features();
90  std::copy(features.begin(),features.end(),
91            std::ostream_iterator<size_t>(*error," ")); 
92  *error << std::endl;
93
94  size_t best_feature=features[0];
95  if(best_feature!=69) {
96    *error << "\nERROR: Incorrect best feature found!\n" << std::endl;
97    ok=0;
98  }
99
100  classifier::DataLookupWeighted1D row(dataviewweighted,best_feature,true);
101  double score_diff=fabs(snr.score(targets,row)-1.47804);
102  if(score_diff>0.00001) {
103    *error << "\nERROR: Best score not what expected!\n" << std::endl;
104    ok=0;
105  }
106 
107  // Re-rank the best features
108  classifier::MatrixLookupWeighted ranked=f.get(dataviewweighted);
109  f.update(ranked,targets);
110  features=f.features();
111  for(size_t i=0;i<features.size();i++) {
112    if(features[i]!=i) {
113      ok=0;
114      *error << "ERROR: Problem with top-ranked feature" << std::endl;
115    }
116  }
117
118  if (ok)
119    return 0;
120  return -1;
121}
Note: See TracBrowser for help on using the repository browser.