1 | // $Id: SVM.cc 545 2006-03-06 13:35:45Z peter $ |
---|
2 | |
---|
3 | #include <c++_tools/classifier/SVM.h> |
---|
4 | |
---|
5 | #include <c++_tools/classifier/DataLookup2D.h> |
---|
6 | #include <c++_tools/classifier/InputRanker.h> |
---|
7 | #include <c++_tools/gslapi/matrix.h> |
---|
8 | #include <c++_tools/gslapi/vector.h> |
---|
9 | #include <c++_tools/statistics/Averager.h> |
---|
10 | #include <c++_tools/statistics/Score.h> |
---|
11 | #include <c++_tools/random/random.h> |
---|
12 | |
---|
13 | #include <algorithm> |
---|
14 | #include <cassert> |
---|
15 | #include <cmath> |
---|
16 | #include <limits> |
---|
17 | #include <utility> |
---|
18 | #include <vector> |
---|
19 | |
---|
20 | |
---|
21 | namespace theplu { |
---|
22 | namespace classifier { |
---|
23 | |
---|
24 | SVM::SVM(const KernelLookup& kernel, const Target& target) |
---|
25 | : SupervisedClassifier(target), |
---|
26 | alpha_(target.size(),0), |
---|
27 | bias_(0), |
---|
28 | C_inverse_(0), |
---|
29 | kernel_(&kernel), |
---|
30 | max_epochs_(10000000), |
---|
31 | output_(target.size(),0), |
---|
32 | owner_(false), |
---|
33 | sample_(target.size()), |
---|
34 | trained_(false), |
---|
35 | tolerance_(0.00000001) |
---|
36 | { |
---|
37 | } |
---|
38 | |
---|
39 | SVM::SVM(const KernelLookup& kernel, const Target& target, |
---|
40 | statistics::Score& score, const size_t nof_inputs) |
---|
41 | : SupervisedClassifier(target, &score, nof_inputs), |
---|
42 | alpha_(target.size(),0), |
---|
43 | bias_(0), |
---|
44 | C_inverse_(0), |
---|
45 | max_epochs_(10000000), |
---|
46 | output_(target.size(),0), |
---|
47 | sample_(target.size()), |
---|
48 | trained_(false), |
---|
49 | tolerance_(0.00000001) |
---|
50 | { |
---|
51 | |
---|
52 | if (kernel.weighted()){ |
---|
53 | const MatrixLookup* data = kernel.data(); |
---|
54 | const MatrixLookup* weights = kernel.weights(); |
---|
55 | ranker_ = new InputRanker(*data, target, score, *weights); |
---|
56 | delete data; |
---|
57 | delete weights; |
---|
58 | } |
---|
59 | else{ |
---|
60 | const MatrixLookup* data = kernel.data(); |
---|
61 | ranker_ = new InputRanker(*data, target, score); |
---|
62 | delete data; |
---|
63 | } |
---|
64 | |
---|
65 | std::vector<size_t> index; |
---|
66 | index.reserve(nof_inputs); |
---|
67 | for (size_t i=0; i<nof_inputs; i++) |
---|
68 | index.push_back(ranker_->id(i)); |
---|
69 | kernel_ = kernel.selected(index); |
---|
70 | owner_ = true; |
---|
71 | |
---|
72 | } |
---|
73 | |
---|
74 | |
---|
75 | |
---|
76 | SupervisedClassifier* SVM::make_classifier(const DataLookup2D& data, |
---|
77 | const Target& target) const |
---|
78 | { |
---|
79 | // Peter, should check success of dynamic_cast |
---|
80 | const KernelLookup& tmp = dynamic_cast<const KernelLookup&>(data); |
---|
81 | SVM* sc; |
---|
82 | if (ranker_) |
---|
83 | sc = new SVM(tmp,target,*score_,nof_inputs_); |
---|
84 | else |
---|
85 | sc = new SVM(tmp,target); |
---|
86 | |
---|
87 | //Copy those variables possible to modify from outside |
---|
88 | return sc; |
---|
89 | } |
---|
90 | |
---|
91 | void SVM::predict(const DataLookup2D& input, gslapi::matrix& prediction) const |
---|
92 | { |
---|
93 | // Peter, should check success of dynamic_cast |
---|
94 | const KernelLookup* input_kernel= &dynamic_cast<const KernelLookup&>(input); |
---|
95 | |
---|
96 | if (ranker_) {// feature selection |
---|
97 | std::vector<size_t> index; |
---|
98 | index.reserve(nof_inputs_); |
---|
99 | for (size_t i=0; i<nof_inputs_; i++) |
---|
100 | index.push_back(ranker_->id(i)); |
---|
101 | input_kernel = input_kernel->selected(index); |
---|
102 | } |
---|
103 | |
---|
104 | assert(input.rows()==alpha_.size()); |
---|
105 | prediction=gslapi::matrix(2,input.columns(),0); |
---|
106 | for (size_t i = 0; i<input.columns(); i++){ |
---|
107 | for (size_t j = 0; i<input.rows(); i++) |
---|
108 | prediction(0,i) += target(j)*alpha_(i)*(*input_kernel)(j,i); |
---|
109 | prediction(0,i) += bias_; |
---|
110 | } |
---|
111 | |
---|
112 | for (size_t i = 0; i<prediction.columns(); i++) |
---|
113 | prediction(1,i) = -prediction(0,i); |
---|
114 | } |
---|
115 | |
---|
116 | double SVM::predict(const DataLookup1D& x) const |
---|
117 | { |
---|
118 | double y=0; |
---|
119 | for (size_t i=0; i<alpha_.size(); i++) |
---|
120 | y += alpha_(i)*target_(i)*kernel_->element(x,i); |
---|
121 | |
---|
122 | return y+bias_; |
---|
123 | } |
---|
124 | |
---|
125 | double SVM::predict(const DataLookup1D& x, const DataLookup1D& w) const |
---|
126 | { |
---|
127 | double y=0; |
---|
128 | for (size_t i=0; i<alpha_.size(); i++) |
---|
129 | y += alpha_(i)*target_(i)*kernel_->element(x,w,i); |
---|
130 | |
---|
131 | return y+bias_; |
---|
132 | } |
---|
133 | |
---|
134 | bool SVM::train(void) |
---|
135 | { |
---|
136 | // initializing variables for optimization |
---|
137 | assert(target_.size()==kernel_->rows()); |
---|
138 | assert(target_.size()==alpha_.size()); |
---|
139 | |
---|
140 | sample_.init(alpha_,tolerance_); |
---|
141 | gslapi::vector E(target_.size(),0); |
---|
142 | for (size_t i=0; i<E.size(); i++) { |
---|
143 | for (size_t j=0; j<E.size(); j++) |
---|
144 | E(i) += kernel_mod(i,j)*target(j)*alpha_(j); |
---|
145 | E(i)=E(i)-target(i); |
---|
146 | } |
---|
147 | assert(target_.size()==E.size()); |
---|
148 | assert(target_.size()==sample_.n()); |
---|
149 | |
---|
150 | unsigned long int epochs = 0; |
---|
151 | double alpha_new2; |
---|
152 | double alpha_new1; |
---|
153 | double u; |
---|
154 | double v; |
---|
155 | |
---|
156 | // Training loop |
---|
157 | while(choose(E)) { |
---|
158 | bounds(u,v); |
---|
159 | double k = ( kernel_mod(sample_.value_first(), sample_.value_first()) + |
---|
160 | kernel_mod(sample_.value_second(), sample_.value_second()) - |
---|
161 | 2*kernel_mod(sample_.value_first(), sample_.value_second())); |
---|
162 | |
---|
163 | double alpha_old1=alpha_(sample_.value_first()); |
---|
164 | double alpha_old2=alpha_(sample_.value_second()); |
---|
165 | |
---|
166 | alpha_new2 = ( alpha_(sample_.value_second()) + |
---|
167 | target(sample_.value_second())* |
---|
168 | ( E(sample_.value_first())-E(sample_.value_second()) )/k ); |
---|
169 | |
---|
170 | if (alpha_new2 > v) |
---|
171 | alpha_new2 = v; |
---|
172 | else if (alpha_new2<u) |
---|
173 | alpha_new2 = u; |
---|
174 | |
---|
175 | |
---|
176 | // Updating the alphas |
---|
177 | // if alpha is 'zero' make the sample a non-support vector |
---|
178 | if (alpha_new2 < tolerance_){ |
---|
179 | sample_.nsv_second(); |
---|
180 | } |
---|
181 | else{ |
---|
182 | sample_.sv_second(); |
---|
183 | } |
---|
184 | |
---|
185 | |
---|
186 | alpha_new1 = (alpha_(sample_.value_first()) + |
---|
187 | (target(sample_.value_first()) * |
---|
188 | target(sample_.value_second()) * |
---|
189 | (alpha_(sample_.value_second()) - alpha_new2) )); |
---|
190 | |
---|
191 | // if alpha is 'zero' make the sample a non-support vector |
---|
192 | if (alpha_new1 < tolerance_){ |
---|
193 | sample_.nsv_first(); |
---|
194 | } |
---|
195 | else |
---|
196 | sample_.sv_first(); |
---|
197 | |
---|
198 | alpha_(sample_.value_first()) = alpha_new1; |
---|
199 | alpha_(sample_.value_second()) = alpha_new2; |
---|
200 | |
---|
201 | // update E vector |
---|
202 | // Peter, perhaps one should only update SVs, but what happens in choose? |
---|
203 | for (size_t i=0; i<E.size(); i++) { |
---|
204 | E(i)+=( kernel_mod(i,sample_.value_first())* |
---|
205 | target(sample_.value_first()) * |
---|
206 | (alpha_new1-alpha_old1) ); |
---|
207 | E(i)+=( kernel_mod(i,sample_.value_second())* |
---|
208 | target(sample_.value_second()) * |
---|
209 | (alpha_new2-alpha_old2) ); |
---|
210 | } |
---|
211 | |
---|
212 | epochs++; |
---|
213 | if (epochs>max_epochs_){ |
---|
214 | std::cerr << "WARNING: SVM: maximal number of epochs reached.\n"; |
---|
215 | return false; |
---|
216 | } |
---|
217 | } |
---|
218 | |
---|
219 | trained_ = calculate_bias(); |
---|
220 | return trained_; |
---|
221 | } |
---|
222 | |
---|
223 | |
---|
224 | bool SVM::choose(const theplu::gslapi::vector& E) |
---|
225 | { |
---|
226 | // First check for violation among SVs |
---|
227 | // E should be the same for all SVs |
---|
228 | // Choose that pair having largest violation/difference. |
---|
229 | sample_.update_second(0); |
---|
230 | sample_.update_first(0); |
---|
231 | if (sample_.nof_sv()>1){ |
---|
232 | |
---|
233 | double max = E(sample_(0)); |
---|
234 | double min = max; |
---|
235 | for (size_t i=1; i<sample_.nof_sv(); i++){ |
---|
236 | assert(alpha_(sample_(i))>tolerance_); |
---|
237 | if (E(sample_(i)) > max){ |
---|
238 | max = E(sample_(i)); |
---|
239 | sample_.update_second(i); |
---|
240 | } |
---|
241 | else if (E(sample_(i))<min){ |
---|
242 | min = E(sample_(i)); |
---|
243 | sample_.update_first(i); |
---|
244 | } |
---|
245 | } |
---|
246 | assert(alpha_(sample_.value_first())>tolerance_); |
---|
247 | assert(alpha_(sample_.value_second())>tolerance_); |
---|
248 | |
---|
249 | |
---|
250 | if (E(sample_.value_second()) - E(sample_.value_first()) > 2*tolerance_){ |
---|
251 | return true; |
---|
252 | } |
---|
253 | |
---|
254 | // If no violation check among non-support vectors |
---|
255 | sample_.shuffle(); |
---|
256 | for (size_t i=sample_.nof_sv(); i<sample_.n();i++){ |
---|
257 | if (target_.binary(sample_(i))){ |
---|
258 | if(E(sample_(i)) < E(sample_.value_first()) - 2*tolerance_){ |
---|
259 | sample_.update_second(i); |
---|
260 | return true; |
---|
261 | } |
---|
262 | } |
---|
263 | else{ |
---|
264 | if(E(sample_(i)) > E(sample_.value_second()) + 2*tolerance_){ |
---|
265 | sample_.update_first(i); |
---|
266 | return true; |
---|
267 | } |
---|
268 | } |
---|
269 | } |
---|
270 | } |
---|
271 | |
---|
272 | // if no support vectors - special case |
---|
273 | else{ |
---|
274 | for (size_t i=0; i<sample_.n(); i++) { |
---|
275 | if (target_.binary(sample_(i))){ |
---|
276 | for (size_t j=0; j<sample_.n(); j++) { |
---|
277 | if ( !target_.binary(sample_(j)) && |
---|
278 | E(sample_(i)) < E(sample_(j))+2*tolerance_ ){ |
---|
279 | sample_.update_first(i); |
---|
280 | sample_.update_second(j); |
---|
281 | return true; |
---|
282 | } |
---|
283 | } |
---|
284 | } |
---|
285 | } |
---|
286 | } |
---|
287 | |
---|
288 | // If there is no violation then we should stop training |
---|
289 | return false; |
---|
290 | |
---|
291 | } |
---|
292 | |
---|
293 | |
---|
294 | void SVM::bounds( double& u, double& v) const |
---|
295 | { |
---|
296 | if (target(sample_.value_first())!=target(sample_.value_second())) { |
---|
297 | if (alpha_(sample_.value_second()) > alpha_(sample_.value_first())) { |
---|
298 | v = std::numeric_limits<double>::max(); |
---|
299 | u = alpha_(sample_.value_second()) - alpha_(sample_.value_first()); |
---|
300 | } |
---|
301 | else { |
---|
302 | v = (std::numeric_limits<double>::max() - |
---|
303 | alpha_(sample_.value_first()) + |
---|
304 | alpha_(sample_.value_second())); |
---|
305 | u = 0; |
---|
306 | } |
---|
307 | } |
---|
308 | else { |
---|
309 | if (alpha_(sample_.value_second()) + alpha_(sample_.value_first()) > |
---|
310 | std::numeric_limits<double>::max()) { |
---|
311 | u = (alpha_(sample_.value_second()) + alpha_(sample_.value_first()) - |
---|
312 | std::numeric_limits<double>::max()); |
---|
313 | v = std::numeric_limits<double>::max(); |
---|
314 | } |
---|
315 | else { |
---|
316 | u = 0; |
---|
317 | v = alpha_(sample_.value_first()) + alpha_(sample_.value_second()); |
---|
318 | } |
---|
319 | } |
---|
320 | } |
---|
321 | |
---|
322 | bool SVM::calculate_bias(void) |
---|
323 | { |
---|
324 | |
---|
325 | // calculating output without bias |
---|
326 | for (size_t i=0; i<output_.size(); i++) { |
---|
327 | output_(i)=0; |
---|
328 | for (size_t j=0; j<output_.size(); j++) |
---|
329 | output_(i)+=alpha_(j)*target(j) * (*kernel_)(i,j); |
---|
330 | } |
---|
331 | |
---|
332 | if (!sample_.nof_sv()){ |
---|
333 | std::cerr << "SVM::train() error: " |
---|
334 | << "Cannot calculate bias because there is no support vector" |
---|
335 | << std::endl; |
---|
336 | return false; |
---|
337 | } |
---|
338 | |
---|
339 | // For samples with alpha>0, we have: target*output=1-alpha/C |
---|
340 | bias_=0; |
---|
341 | for (size_t i=0; i<sample_.nof_sv(); i++) |
---|
342 | bias_+= ( target(sample_(i)) * (1-alpha_(sample_(i))*C_inverse_) - |
---|
343 | output_(sample_(i)) ); |
---|
344 | bias_=bias_/sample_.nof_sv(); |
---|
345 | for (size_t i=0; i<output_.size(); i++) |
---|
346 | output_(i) += bias_; |
---|
347 | |
---|
348 | return true; |
---|
349 | } |
---|
350 | |
---|
351 | Index::Index(void) |
---|
352 | : nof_sv_(0), vec_(std::vector<size_t>(0)) |
---|
353 | { |
---|
354 | } |
---|
355 | |
---|
356 | Index::Index(const size_t n) |
---|
357 | : nof_sv_(0), vec_(std::vector<size_t>(n)) |
---|
358 | { |
---|
359 | for (size_t i=0; i<vec_.size(); i++) |
---|
360 | vec_[i]=i; |
---|
361 | } |
---|
362 | |
---|
363 | void Index::init(const gslapi::vector& alpha, const double tol) |
---|
364 | { |
---|
365 | nof_sv_=0; |
---|
366 | size_t nof_nsv=0; |
---|
367 | for (size_t i=0; i<alpha.size(); i++) |
---|
368 | if (alpha(i)<tol){ |
---|
369 | nof_nsv++; |
---|
370 | vec_[vec_.size()-nof_nsv]=i; |
---|
371 | } |
---|
372 | else{ |
---|
373 | vec_[nof_sv_]=i; |
---|
374 | nof_sv_++; |
---|
375 | } |
---|
376 | assert(nof_sv_+nof_nsv==vec_.size()); |
---|
377 | |
---|
378 | } |
---|
379 | |
---|
380 | void Index::sv_first(void) |
---|
381 | { |
---|
382 | // if already sv, do nothing |
---|
383 | if (index_first_<nof_sv()) |
---|
384 | return; |
---|
385 | |
---|
386 | // swap elements |
---|
387 | if(index_second_==nof_sv_){ |
---|
388 | index_second_=index_first_; |
---|
389 | } |
---|
390 | vec_[index_first_]=vec_[nof_sv_]; |
---|
391 | vec_[nof_sv_]=value_first_; |
---|
392 | index_first_ = nof_sv_; |
---|
393 | |
---|
394 | nof_sv_++; |
---|
395 | |
---|
396 | } |
---|
397 | |
---|
398 | void Index::sv_second(void) |
---|
399 | { |
---|
400 | // if already sv, do nothing |
---|
401 | if (index_second_<nof_sv()) |
---|
402 | return; |
---|
403 | |
---|
404 | // swap elements |
---|
405 | if(index_first_==nof_sv_){ |
---|
406 | index_first_=index_second_; |
---|
407 | } |
---|
408 | |
---|
409 | vec_[index_second_]=vec_[nof_sv_]; |
---|
410 | vec_[nof_sv_]=value_second_; |
---|
411 | index_second_=nof_sv_; |
---|
412 | |
---|
413 | nof_sv_++; |
---|
414 | } |
---|
415 | |
---|
416 | void Index::nsv_first(void) |
---|
417 | { |
---|
418 | // if already nsv, do nothing |
---|
419 | if ( !(index_first_<nof_sv()) ) |
---|
420 | return; |
---|
421 | |
---|
422 | if(index_second_==nof_sv_-1) |
---|
423 | index_second_=index_first_; |
---|
424 | vec_[index_first_]=vec_[nof_sv_-1]; |
---|
425 | vec_[nof_sv_-1]=value_first_; |
---|
426 | index_first_=nof_sv_-1; |
---|
427 | |
---|
428 | nof_sv_--; |
---|
429 | } |
---|
430 | |
---|
431 | void Index::nsv_second(void) |
---|
432 | { |
---|
433 | // if already nsv, do nothing |
---|
434 | if ( !(index_second_<nof_sv()) ) |
---|
435 | return; |
---|
436 | |
---|
437 | if(index_first_==nof_sv_-1) |
---|
438 | index_first_=index_second_; |
---|
439 | vec_[index_second_]=vec_[nof_sv_-1]; |
---|
440 | vec_[nof_sv_-1]=value_second_; |
---|
441 | index_second_ = nof_sv_-1; |
---|
442 | |
---|
443 | nof_sv_--; |
---|
444 | } |
---|
445 | |
---|
446 | |
---|
447 | void Index::shuffle(void) |
---|
448 | { |
---|
449 | random::DiscreteUniform a; |
---|
450 | random_shuffle(vec_.begin()+nof_sv_, vec_.end(), a); |
---|
451 | } |
---|
452 | |
---|
453 | void Index::update_first(const size_t i) |
---|
454 | { |
---|
455 | assert(i<n()); |
---|
456 | index_first_=i; |
---|
457 | value_first_=vec_[i]; |
---|
458 | } |
---|
459 | |
---|
460 | void Index::update_second(const size_t i) |
---|
461 | { |
---|
462 | assert(i<n()); |
---|
463 | index_second_=i; |
---|
464 | value_second_=vec_[i]; |
---|
465 | } |
---|
466 | |
---|
467 | }} // of namespace classifier and namespace theplu |
---|