1 | // $Id: SVM.cc 514 2006-02-20 09:45:34Z peter $ |
---|
2 | |
---|
3 | #include <c++_tools/classifier/SVM.h> |
---|
4 | |
---|
5 | #include <c++_tools/classifier/DataLookup2D.h> |
---|
6 | #include <c++_tools/gslapi/matrix.h> |
---|
7 | #include <c++_tools/gslapi/vector.h> |
---|
8 | #include <c++_tools/statistics/Averager.h> |
---|
9 | #include <c++_tools/random/random.h> |
---|
10 | |
---|
11 | #include <algorithm> |
---|
12 | #include <cassert> |
---|
13 | #include <cmath> |
---|
14 | #include <limits> |
---|
15 | #include <utility> |
---|
16 | #include <vector> |
---|
17 | |
---|
18 | |
---|
19 | namespace theplu { |
---|
20 | namespace classifier { |
---|
21 | |
---|
22 | Index::Index(void) |
---|
23 | : nof_sv_(0), vec_(std::vector<size_t>(0)) |
---|
24 | { |
---|
25 | } |
---|
26 | |
---|
27 | Index::Index(const size_t n) |
---|
28 | : nof_sv_(0), vec_(std::vector<size_t>(n)) |
---|
29 | { |
---|
30 | for (size_t i=0; i<vec_.size(); i++) |
---|
31 | vec_[i]=i; |
---|
32 | } |
---|
33 | |
---|
34 | void Index::init(const gslapi::vector& alpha, const double tol) |
---|
35 | { |
---|
36 | nof_sv_=0; |
---|
37 | size_t nof_nsv=0; |
---|
38 | for (size_t i=0; i<alpha.size(); i++) |
---|
39 | if (alpha(i)<tol){ |
---|
40 | nof_nsv++; |
---|
41 | vec_[vec_.size()-nof_nsv]=i; |
---|
42 | } |
---|
43 | else{ |
---|
44 | vec_[nof_sv_]=i; |
---|
45 | nof_sv_++; |
---|
46 | } |
---|
47 | assert(nof_sv_+nof_nsv==vec_.size()); |
---|
48 | |
---|
49 | } |
---|
50 | |
---|
51 | void Index::sv_first(void) |
---|
52 | { |
---|
53 | // if already sv, do nothing |
---|
54 | if (index_first_<nof_sv()) |
---|
55 | return; |
---|
56 | |
---|
57 | // swap elements |
---|
58 | if(index_second_==nof_sv_){ |
---|
59 | index_second_=index_first_; |
---|
60 | } |
---|
61 | vec_[index_first_]=vec_[nof_sv_]; |
---|
62 | vec_[nof_sv_]=value_first_; |
---|
63 | index_first_ = nof_sv_; |
---|
64 | |
---|
65 | nof_sv_++; |
---|
66 | |
---|
67 | } |
---|
68 | |
---|
69 | void Index::sv_second(void) |
---|
70 | { |
---|
71 | // if already sv, do nothing |
---|
72 | if (index_second_<nof_sv()) |
---|
73 | return; |
---|
74 | |
---|
75 | // swap elements |
---|
76 | if(index_first_==nof_sv_){ |
---|
77 | index_first_=index_second_; |
---|
78 | } |
---|
79 | |
---|
80 | vec_[index_second_]=vec_[nof_sv_]; |
---|
81 | vec_[nof_sv_]=value_second_; |
---|
82 | index_second_=nof_sv_; |
---|
83 | |
---|
84 | nof_sv_++; |
---|
85 | } |
---|
86 | |
---|
87 | void Index::nsv_first(void) |
---|
88 | { |
---|
89 | // if already nsv, do nothing |
---|
90 | if ( !(index_first_<nof_sv()) ) |
---|
91 | return; |
---|
92 | |
---|
93 | if(index_second_==nof_sv_-1) |
---|
94 | index_second_=index_first_; |
---|
95 | vec_[index_first_]=vec_[nof_sv_-1]; |
---|
96 | vec_[nof_sv_-1]=value_first_; |
---|
97 | index_first_=nof_sv_-1; |
---|
98 | |
---|
99 | nof_sv_--; |
---|
100 | } |
---|
101 | |
---|
102 | void Index::nsv_second(void) |
---|
103 | { |
---|
104 | // if already nsv, do nothing |
---|
105 | if ( !(index_second_<nof_sv()) ) |
---|
106 | return; |
---|
107 | |
---|
108 | if(index_first_==nof_sv_-1) |
---|
109 | index_first_=index_second_; |
---|
110 | vec_[index_second_]=vec_[nof_sv_-1]; |
---|
111 | vec_[nof_sv_-1]=value_second_; |
---|
112 | index_second_ = nof_sv_-1; |
---|
113 | |
---|
114 | nof_sv_--; |
---|
115 | } |
---|
116 | |
---|
117 | |
---|
118 | void Index::shuffle(void) |
---|
119 | { |
---|
120 | random::DiscreteUniform a; |
---|
121 | random_shuffle(vec_.begin()+nof_sv_, vec_.end(), a); |
---|
122 | } |
---|
123 | |
---|
124 | void Index::update_first(const size_t i) |
---|
125 | { |
---|
126 | assert(i<n()); |
---|
127 | index_first_=i; |
---|
128 | value_first_=vec_[i]; |
---|
129 | } |
---|
130 | |
---|
131 | void Index::update_second(const size_t i) |
---|
132 | { |
---|
133 | assert(i<n()); |
---|
134 | index_second_=i; |
---|
135 | value_second_=vec_[i]; |
---|
136 | } |
---|
137 | |
---|
138 | SVM::SVM(const DataLookup2D& kernel, const Target& target) |
---|
139 | : SupervisedClassifier(kernel,target), |
---|
140 | alpha_(target.size(),0), |
---|
141 | bias_(0), |
---|
142 | C_inverse_(0), |
---|
143 | kernel_(kernel), |
---|
144 | max_epochs_(10000000), |
---|
145 | output_(target.size(),0), |
---|
146 | sample_(target.size()), |
---|
147 | target_(target), |
---|
148 | trained_(false), |
---|
149 | tolerance_(0.00000001) |
---|
150 | { |
---|
151 | } |
---|
152 | |
---|
153 | SupervisedClassifier* SVM::make_classifier(const DataLookup2D& data, |
---|
154 | const Target& target) const |
---|
155 | { |
---|
156 | SVM* sc = new SVM(data,target); |
---|
157 | //Copy those variables possible to modify from outside |
---|
158 | return sc; |
---|
159 | } |
---|
160 | |
---|
161 | bool SVM::train(void) |
---|
162 | { |
---|
163 | // initializing variables for optimization |
---|
164 | assert(target_.size()==kernel_.rows()); |
---|
165 | assert(target_.size()==alpha_.size()); |
---|
166 | |
---|
167 | sample_.init(alpha_,tolerance_); |
---|
168 | gslapi::vector E(target_.size()); |
---|
169 | for (size_t i=0; i<E.size(); i++) { |
---|
170 | E(i)=0; |
---|
171 | for (size_t j=0; j<E.size(); j++) |
---|
172 | E(i) += kernel_mod(i,j)*target(j)*alpha_(j); |
---|
173 | E(i)=E(i)-target(i); |
---|
174 | } |
---|
175 | assert(target_.size()==E.size()); |
---|
176 | assert(target_.size()==sample_.n()); |
---|
177 | |
---|
178 | unsigned long int epochs = 0; |
---|
179 | double alpha_new2; |
---|
180 | double alpha_new1; |
---|
181 | double u; |
---|
182 | double v; |
---|
183 | |
---|
184 | // Training loop |
---|
185 | while(choose(E)) { |
---|
186 | bounds(u,v); |
---|
187 | double k = ( kernel_mod(sample_.value_first(), sample_.value_first()) + |
---|
188 | kernel_mod(sample_.value_second(), sample_.value_second()) - |
---|
189 | 2*kernel_mod(sample_.value_first(), sample_.value_second())); |
---|
190 | |
---|
191 | double alpha_old1=alpha_(sample_.value_first()); |
---|
192 | double alpha_old2=alpha_(sample_.value_second()); |
---|
193 | |
---|
194 | alpha_new2 = ( alpha_(sample_.value_second()) + |
---|
195 | target(sample_.value_second())* |
---|
196 | ( E(sample_.value_first())-E(sample_.value_second()) )/k ); |
---|
197 | |
---|
198 | if (alpha_new2 > v) |
---|
199 | alpha_new2 = v; |
---|
200 | else if (alpha_new2<u) |
---|
201 | alpha_new2 = u; |
---|
202 | |
---|
203 | |
---|
204 | // Updating the alphas |
---|
205 | // if alpha is 'zero' make the sample a non-support vector |
---|
206 | if (alpha_new2 < tolerance_){ |
---|
207 | sample_.nsv_second(); |
---|
208 | } |
---|
209 | else{ |
---|
210 | sample_.sv_second(); |
---|
211 | } |
---|
212 | |
---|
213 | |
---|
214 | alpha_new1 = (alpha_(sample_.value_first()) + |
---|
215 | (target(sample_.value_first()) * |
---|
216 | target(sample_.value_second()) * |
---|
217 | (alpha_(sample_.value_second()) - alpha_new2) )); |
---|
218 | |
---|
219 | // if alpha is 'zero' make the sample a non-support vector |
---|
220 | if (alpha_new1 < tolerance_){ |
---|
221 | sample_.nsv_first(); |
---|
222 | } |
---|
223 | else |
---|
224 | sample_.sv_first(); |
---|
225 | |
---|
226 | alpha_(sample_.value_first()) = alpha_new1; |
---|
227 | alpha_(sample_.value_second()) = alpha_new2; |
---|
228 | |
---|
229 | // update E vector |
---|
230 | // Peter, perhaps one should only update SVs, but what happens in choose? |
---|
231 | for (size_t i=0; i<E.size(); i++) { |
---|
232 | E(i)+=( kernel_mod(i,sample_.value_first())* |
---|
233 | target(sample_.value_first()) * |
---|
234 | (alpha_new1-alpha_old1) ); |
---|
235 | E(i)+=( kernel_mod(i,sample_.value_second())* |
---|
236 | target(sample_.value_second()) * |
---|
237 | (alpha_new2-alpha_old2) ); |
---|
238 | } |
---|
239 | |
---|
240 | epochs++; |
---|
241 | if (epochs>max_epochs_) |
---|
242 | return false; |
---|
243 | } |
---|
244 | |
---|
245 | trained_ = calculate_bias(); |
---|
246 | return trained_; |
---|
247 | } |
---|
248 | |
---|
249 | |
---|
250 | bool SVM::choose(const theplu::gslapi::vector& E) |
---|
251 | { |
---|
252 | // First check for violation among SVs |
---|
253 | // E should be the same for all SVs |
---|
254 | // Choose that pair having largest violation/difference. |
---|
255 | sample_.update_second(0); |
---|
256 | sample_.update_first(0); |
---|
257 | if (sample_.nof_sv()>1){ |
---|
258 | |
---|
259 | double max = E(sample_(0)); |
---|
260 | double min = max; |
---|
261 | for (size_t i=1; i<sample_.nof_sv(); i++){ |
---|
262 | assert(alpha_(sample_(i))>tolerance_); |
---|
263 | if (E(sample_(i)) > max){ |
---|
264 | max = E(sample_(i)); |
---|
265 | sample_.update_second(i); |
---|
266 | } |
---|
267 | else if (E(sample_(i))<min){ |
---|
268 | min = E(sample_(i)); |
---|
269 | sample_.update_first(i); |
---|
270 | } |
---|
271 | } |
---|
272 | assert(alpha_(sample_.value_first())>tolerance_); |
---|
273 | assert(alpha_(sample_.value_second())>tolerance_); |
---|
274 | |
---|
275 | |
---|
276 | if (E(sample_.value_second()) - E(sample_.value_first()) > 2*tolerance_){ |
---|
277 | return true; |
---|
278 | } |
---|
279 | |
---|
280 | // If no violation check among non-support vectors |
---|
281 | |
---|
282 | sample_.shuffle(); |
---|
283 | |
---|
284 | for (size_t i=sample_.nof_sv(); i<sample_.n();i++){ |
---|
285 | //std::cout << "nr: " << i << std::endl; |
---|
286 | if (target_.binary(sample_(i))){ |
---|
287 | if(E(sample_(i)) < E(sample_.value_first()) - 2*tolerance_){ |
---|
288 | sample_.update_second(i); |
---|
289 | return true; |
---|
290 | } |
---|
291 | } |
---|
292 | else{ |
---|
293 | if(E(sample_(i)) > E(sample_.value_second()) + 2*tolerance_){ |
---|
294 | sample_.update_first(i); |
---|
295 | return true; |
---|
296 | } |
---|
297 | } |
---|
298 | } |
---|
299 | } |
---|
300 | |
---|
301 | // if no support vectors - special case |
---|
302 | else{ |
---|
303 | for (size_t i=0; i<sample_.n(); i++) { |
---|
304 | if (target_.binary(sample_(i))){ |
---|
305 | for (size_t j=0; j<sample_.n(); j++) { |
---|
306 | if ( !target_.binary(sample_(j)) && |
---|
307 | E(sample_(i)) < E(sample_(j))+2*tolerance_ ){ |
---|
308 | sample_.update_first(i); |
---|
309 | sample_.update_second(j); |
---|
310 | return true; |
---|
311 | } |
---|
312 | } |
---|
313 | } |
---|
314 | } |
---|
315 | } |
---|
316 | |
---|
317 | //std::cout << "Done!" << std::endl; |
---|
318 | // If there is no violation then we should stop training |
---|
319 | return false; |
---|
320 | |
---|
321 | } |
---|
322 | |
---|
323 | |
---|
324 | void SVM::bounds( double& u, double& v) const |
---|
325 | { |
---|
326 | if (target(sample_.value_first())!=target(sample_.value_second())) { |
---|
327 | if (alpha_(sample_.value_second()) > alpha_(sample_.value_first())) { |
---|
328 | v = std::numeric_limits<double>::max(); |
---|
329 | u = alpha_(sample_.value_second()) - alpha_(sample_.value_first()); |
---|
330 | } |
---|
331 | else { |
---|
332 | v = (std::numeric_limits<double>::max() - |
---|
333 | alpha_(sample_.value_first()) + |
---|
334 | alpha_(sample_.value_second())); |
---|
335 | u = 0; |
---|
336 | } |
---|
337 | } |
---|
338 | else { |
---|
339 | if (alpha_(sample_.value_second()) + alpha_(sample_.value_first()) > |
---|
340 | std::numeric_limits<double>::max()) { |
---|
341 | u = (alpha_(sample_.value_second()) + alpha_(sample_.value_first()) - |
---|
342 | std::numeric_limits<double>::max()); |
---|
343 | v = std::numeric_limits<double>::max(); |
---|
344 | } |
---|
345 | else { |
---|
346 | u = 0; |
---|
347 | v = alpha_(sample_.value_first()) + alpha_(sample_.value_second()); |
---|
348 | } |
---|
349 | } |
---|
350 | } |
---|
351 | |
---|
352 | bool SVM::calculate_bias(void) |
---|
353 | { |
---|
354 | |
---|
355 | // calculating output without bias |
---|
356 | for (size_t i=0; i<output_.size(); i++) { |
---|
357 | output_(i)=0; |
---|
358 | for (size_t j=0; j<output_.size(); j++) |
---|
359 | output_(i)+=alpha_(j)*target(j) * kernel_(i,j); |
---|
360 | } |
---|
361 | |
---|
362 | if (!sample_.nof_sv()){ |
---|
363 | std::cerr << "SVM::train() error: " |
---|
364 | << "Cannot calculate bias because there is no support vector" |
---|
365 | << std::endl; |
---|
366 | return false; |
---|
367 | } |
---|
368 | |
---|
369 | // For samples with alpha>0, we have: target*output=1-alpha/C |
---|
370 | bias_=0; |
---|
371 | for (size_t i=0; i<sample_.nof_sv(); i++) |
---|
372 | bias_+= ( target(sample_(i)) * (1-alpha_(sample_(i))*C_inverse_) - |
---|
373 | output_(sample_(i)) ); |
---|
374 | bias_=bias_/sample_.nof_sv(); |
---|
375 | for (size_t i=0; i<output_.size(); i++) |
---|
376 | output_(i) += bias_; |
---|
377 | |
---|
378 | return true; |
---|
379 | } |
---|
380 | |
---|
381 | }} // of namespace classifier and namespace theplu |
---|