1 | // $Id$ |
---|
2 | |
---|
3 | #include <c++_tools/classifier/NCC.h> |
---|
4 | |
---|
5 | #include <c++_tools/classifier/DataLookup1D.h> |
---|
6 | #include <c++_tools/classifier/DataLookup2D.h> |
---|
7 | #include <c++_tools/classifier/MatrixLookup.h> |
---|
8 | #include <c++_tools/classifier/InputRanker.h> |
---|
9 | #include <c++_tools/classifier/Target.h> |
---|
10 | #include <c++_tools/gslapi/vector.h> |
---|
11 | #include <c++_tools/statistics/Distance.h> |
---|
12 | #include <c++_tools/utility/stl_utility.h> |
---|
13 | |
---|
14 | #include<iostream> |
---|
15 | #include<iterator> |
---|
16 | #include <map> |
---|
17 | #include <cmath> |
---|
18 | |
---|
19 | namespace theplu { |
---|
20 | namespace classifier { |
---|
21 | |
---|
22 | NCC::NCC(const MatrixLookup& data, const Target& target, |
---|
23 | const statistics::Distance& distance) |
---|
24 | : SupervisedClassifier(target), distance_(distance), matrix_(data) |
---|
25 | { |
---|
26 | } |
---|
27 | |
---|
28 | NCC::NCC(const MatrixLookup& data, const Target& target, |
---|
29 | const statistics::Distance& distance, |
---|
30 | statistics::Score& score, size_t nof_inputs) |
---|
31 | : SupervisedClassifier(target, &score, nof_inputs), |
---|
32 | distance_(distance), matrix_(data) |
---|
33 | { |
---|
34 | } |
---|
35 | |
---|
36 | NCC::~NCC() |
---|
37 | { |
---|
38 | if(ranker_) |
---|
39 | delete ranker_; |
---|
40 | } |
---|
41 | |
---|
42 | |
---|
43 | SupervisedClassifier* |
---|
44 | NCC::make_classifier(const DataLookup2D& data, |
---|
45 | const Target& target) const |
---|
46 | { |
---|
47 | const MatrixLookup& tmp = dynamic_cast<const MatrixLookup&>(data); |
---|
48 | |
---|
49 | NCC* ncc= new NCC(tmp,target,this->distance_); |
---|
50 | ncc->score_=this->score_; |
---|
51 | ncc->nof_inputs_=this->nof_inputs_; |
---|
52 | return ncc; |
---|
53 | } |
---|
54 | |
---|
55 | |
---|
56 | bool NCC::train() |
---|
57 | { |
---|
58 | // If score is set calculate centroids only for nof_inputs_ number |
---|
59 | // of top ranked inputs. Otherwise calculate centroids based on |
---|
60 | // all inputs ( = all rows in data matrix). |
---|
61 | if(ranker_) |
---|
62 | delete ranker_; |
---|
63 | size_t rows=matrix_.rows(); |
---|
64 | if(score_) { |
---|
65 | // Markus: missing values should not be handled here, but a weight matrix |
---|
66 | // should be supported throughout the classifier class structure. |
---|
67 | gslapi::matrix weight(matrix_.rows(),matrix_.columns(),0.0); |
---|
68 | for(size_t i=0; i<matrix_.rows(); i++) |
---|
69 | for(size_t j=0; j<matrix_.columns(); j++) |
---|
70 | if(!std::isnan(matrix_(i,j))) |
---|
71 | weight(i,j)=1.0; |
---|
72 | MatrixLookup weightview(weight); |
---|
73 | ranker_=new InputRanker(matrix_, target_, *score_, weightview); |
---|
74 | rows=nof_inputs_; |
---|
75 | } |
---|
76 | centroids_=gslapi::matrix(rows, target_.nof_classes()); |
---|
77 | gslapi::matrix nof_in_class(rows, target_.nof_classes()); |
---|
78 | for(size_t i=0; i<rows; i++) { |
---|
79 | for(size_t j=0; j<matrix_.columns(); j++) { |
---|
80 | double value=matrix_(i,j); |
---|
81 | if(score_) |
---|
82 | value=matrix_(ranker_->id(i),j); |
---|
83 | if(!std::isnan(value)) { |
---|
84 | centroids_(i,target_(j)) += value; |
---|
85 | nof_in_class(i,target_(j))++; |
---|
86 | } |
---|
87 | } |
---|
88 | } |
---|
89 | centroids_.div_elements(nof_in_class); |
---|
90 | trained_=true; |
---|
91 | return trained_; |
---|
92 | } |
---|
93 | |
---|
94 | |
---|
95 | void NCC::predict(const DataLookup1D& input, |
---|
96 | gslapi::vector& prediction) const |
---|
97 | { |
---|
98 | prediction=gslapi::vector(centroids_.columns()); |
---|
99 | size_t size=input.size(); |
---|
100 | if(ranker_) |
---|
101 | size=nof_inputs_; |
---|
102 | gslapi::vector w(size,0); |
---|
103 | gslapi::vector value(size,0); |
---|
104 | for(size_t i=0; i<size; i++) { // take care of missing values |
---|
105 | value(i)=input(i); |
---|
106 | if(ranker_) |
---|
107 | value(i)=input(ranker_->id(i)); |
---|
108 | if(!std::isnan(value(i))) |
---|
109 | w(i)=1.0; |
---|
110 | } |
---|
111 | for(size_t j=0; j<centroids_.columns(); j++) |
---|
112 | prediction(j)=distance_(value,gslapi::vector(centroids_,j,false),w, w); |
---|
113 | } |
---|
114 | |
---|
115 | |
---|
116 | void NCC::predict(const DataLookup2D& input, |
---|
117 | gslapi::matrix& prediction) const |
---|
118 | { |
---|
119 | prediction=gslapi::matrix(centroids_.columns(), input.columns()); |
---|
120 | for(size_t j=0; j<input.columns();j++) { |
---|
121 | DataLookup1D in(input,j,true); |
---|
122 | gslapi::vector out; |
---|
123 | predict(in,out); |
---|
124 | prediction.set_column(j,out); |
---|
125 | } |
---|
126 | } |
---|
127 | |
---|
128 | |
---|
129 | // additional operators |
---|
130 | |
---|
131 | // std::ostream& operator<< (std::ostream& s, const NCC& ncc) { |
---|
132 | // std::copy(ncc.classes().begin(), ncc.classes().end(), |
---|
133 | // std::ostream_iterator<std::map<double, u_int>::value_type> |
---|
134 | // (s, "\n")); |
---|
135 | // s << "\n" << ncc.centroids() << "\n"; |
---|
136 | // return s; |
---|
137 | // } |
---|
138 | |
---|
139 | }} // of namespace classifier and namespace theplu |
---|