1 | // $Id: ensemble_test.cc 2132 2009-12-24 01:13:05Z peter $ |
---|
2 | |
---|
3 | /* |
---|
4 | Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér |
---|
5 | Copyright (C) 2007 Jari Häkkinen, Peter Johansson |
---|
6 | Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér |
---|
7 | |
---|
8 | This file is part of the yat library, http://dev.thep.lu.se/yat |
---|
9 | |
---|
10 | The yat library is free software; you can redistribute it and/or |
---|
11 | modify it under the terms of the GNU General Public License as |
---|
12 | published by the Free Software Foundation; either version 3 of the |
---|
13 | License, or (at your option) any later version. |
---|
14 | |
---|
15 | The yat library is distributed in the hope that it will be useful, |
---|
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
18 | General Public License for more details. |
---|
19 | |
---|
20 | You should have received a copy of the GNU General Public License |
---|
21 | along with yat. If not, see <http://www.gnu.org/licenses/>. |
---|
22 | */ |
---|
23 | |
---|
24 | #include "Suite.h" |
---|
25 | |
---|
26 | #include "yat/utility/Matrix.h" |
---|
27 | #include "yat/classifier/SubsetGenerator.h" |
---|
28 | #include "yat/classifier/CrossValidationSampler.h" |
---|
29 | #include "yat/classifier/EnsembleBuilder.h" |
---|
30 | #include "yat/classifier/Kernel.h" |
---|
31 | #include "yat/classifier/KernelLookup.h" |
---|
32 | #include "yat/classifier/Kernel_SEV.h" |
---|
33 | #include "yat/classifier/Kernel_MEV.h" |
---|
34 | #include "yat/classifier/MatrixLookup.h" |
---|
35 | #include "yat/classifier/MatrixLookupWeighted.h" |
---|
36 | #include "yat/classifier/NCC.h" |
---|
37 | #include "yat/classifier/PolynomialKernelFunction.h" |
---|
38 | #include "yat/classifier/SVM.h" |
---|
39 | #include "yat/statistics/AUC.h" |
---|
40 | #include "yat/statistics/EuclideanDistance.h" |
---|
41 | |
---|
42 | #include <cassert> |
---|
43 | #include <fstream> |
---|
44 | #include <iostream> |
---|
45 | #include <cstdlib> |
---|
46 | #include <limits> |
---|
47 | |
---|
48 | |
---|
49 | int main(int argc, char* argv[]) |
---|
50 | { |
---|
51 | using namespace theplu::yat; |
---|
52 | test::Suite suite(argc, argv); |
---|
53 | |
---|
54 | suite.err() << "testing ensemble" << std::endl; |
---|
55 | |
---|
56 | suite.err() << "loading data" << std::endl; |
---|
57 | std::ifstream is(test::filename("data/nm_data_centralized.txt").c_str()); |
---|
58 | utility::Matrix data_core(is); |
---|
59 | is.close(); |
---|
60 | |
---|
61 | suite.err() << "create MatrixLookup" << std::endl; |
---|
62 | classifier::MatrixLookup data(data_core); |
---|
63 | classifier::KernelFunction* kf = new classifier::PolynomialKernelFunction(); |
---|
64 | suite.err() << "Building kernel" << std::endl; |
---|
65 | classifier::Kernel_SEV kernel(data,*kf); |
---|
66 | |
---|
67 | |
---|
68 | suite.err() << "load target" << std::endl; |
---|
69 | is.open(test::filename("data/nm_target_bin.txt").c_str()); |
---|
70 | classifier::Target target(is); |
---|
71 | is.close(); |
---|
72 | assert(data.columns()==target.size()); |
---|
73 | |
---|
74 | { |
---|
75 | suite.err() << "create ensemble of ncc" << std::endl; |
---|
76 | classifier::NCC<statistics::EuclideanDistance> ncc; |
---|
77 | classifier::CrossValidationSampler sampler(target,3,3); |
---|
78 | classifier::SubsetGenerator<classifier::MatrixLookup> subdata(sampler,data); |
---|
79 | classifier::EnsembleBuilder<classifier::SupervisedClassifier, |
---|
80 | classifier::MatrixLookup> ensemble(ncc, data, sampler); |
---|
81 | suite.err() << "build ensemble" << std::endl; |
---|
82 | ensemble.build(); |
---|
83 | std::vector<std::vector<statistics::Averager> > result; |
---|
84 | ensemble.predict(data, result); |
---|
85 | } |
---|
86 | |
---|
87 | { |
---|
88 | suite.err() << "create ensemble of ncc" << std::endl; |
---|
89 | classifier::MatrixLookupWeighted data_weighted(data); |
---|
90 | classifier::NCC<statistics::EuclideanDistance> ncc; |
---|
91 | classifier::CrossValidationSampler sampler(target,3,3); |
---|
92 | classifier::SubsetGenerator<classifier::MatrixLookupWeighted> |
---|
93 | subdata(sampler,data_weighted); |
---|
94 | classifier::EnsembleBuilder<classifier::SupervisedClassifier, |
---|
95 | classifier::MatrixLookupWeighted> ensemble(ncc, data_weighted, sampler); |
---|
96 | suite.err() << "build ensemble" << std::endl; |
---|
97 | ensemble.build(); |
---|
98 | std::vector<std::vector<statistics::Averager> > result; |
---|
99 | ensemble.predict(data_weighted, result); |
---|
100 | } |
---|
101 | |
---|
102 | suite.err() << "create KernelLookup" << std::endl; |
---|
103 | classifier::KernelLookup kernel_lookup(kernel); |
---|
104 | suite.err() << "create svm" << std::endl; |
---|
105 | classifier::SVM svm; |
---|
106 | suite.err() << "create Subsets" << std::endl; |
---|
107 | classifier::CrossValidationSampler sampler(target,3,3); |
---|
108 | classifier::SubsetGenerator<classifier::KernelLookup> cv(sampler, |
---|
109 | kernel_lookup); |
---|
110 | |
---|
111 | suite.err() << "create ensemble" << std::endl; |
---|
112 | classifier::EnsembleBuilder<classifier::SVM, classifier::KernelLookup> |
---|
113 | ensemble(svm, kernel_lookup, sampler); |
---|
114 | suite.err() << "build ensemble" << std::endl; |
---|
115 | ensemble.build(); |
---|
116 | utility::Vector out(target.size(),0); |
---|
117 | for (size_t i = 0; i<out.size(); ++i) { |
---|
118 | out(i)=ensemble.validate()[0][i].mean(); |
---|
119 | } |
---|
120 | statistics::AUC roc; |
---|
121 | suite.err() << roc.score(target,out) << std::endl; |
---|
122 | |
---|
123 | std::vector<std::vector<statistics::Averager> > result; |
---|
124 | ensemble.predict(kernel_lookup, result); |
---|
125 | for (size_t i = 0; i<result.size(); ++i) { |
---|
126 | for (size_t j=0; j<result[0].size(); ++j) { |
---|
127 | if (!suite.add(result[i][j].variance() > 0)) { |
---|
128 | suite.err() << "error: element " << i << " " << j << "\n"; |
---|
129 | suite.err() << "expected finite prediction varince\n"; |
---|
130 | suite.err() << "found: " << result[i][j].variance() << "\n"; |
---|
131 | } |
---|
132 | } |
---|
133 | } |
---|
134 | |
---|
135 | { |
---|
136 | suite.err() << "create ensemble" << std::endl; |
---|
137 | classifier::EnsembleBuilder<classifier::SVM, classifier::KernelLookup> |
---|
138 | ensemble(svm, kernel_lookup, sampler); |
---|
139 | suite.err() << "test validate() before build()\n"; |
---|
140 | ensemble.validate(); |
---|
141 | std::vector<std::vector<statistics::Averager> > result; |
---|
142 | suite.err() << "test predict() before build()\n"; |
---|
143 | ensemble.predict(kernel_lookup, result); |
---|
144 | } |
---|
145 | delete kf; |
---|
146 | |
---|
147 | return suite.return_value(); |
---|
148 | } |
---|