1 | // $Id: SVM.cc 37 2004-02-13 15:46:31Z peter $ |
---|
2 | |
---|
3 | #ifndef _THEP_SVM_ |
---|
4 | #define _THEP_SVM_ |
---|
5 | |
---|
6 | // System includes |
---|
7 | #include <math.h> |
---|
8 | |
---|
9 | // Thep C++ Tools |
---|
10 | #include "SVM.h" |
---|
11 | #include "matrix.h" |
---|
12 | #include "vector.h" |
---|
13 | #include "random_singleton.h" |
---|
14 | |
---|
15 | using namespace thep_cpp_tools; |
---|
16 | using namespace std; |
---|
17 | |
---|
18 | SVM::SVM(const thep_gsl_api::matrix& kernel, |
---|
19 | const thep_gsl_api::vector& target) : trained_(false), |
---|
20 | kernel_(kernel), |
---|
21 | target_(target), |
---|
22 | alpha_(target.size(),true,true), |
---|
23 | bias_(0) |
---|
24 | |
---|
25 | { |
---|
26 | } |
---|
27 | |
---|
28 | void SVM::train() //Should be done so one can choose to not train on all the samples |
---|
29 | { |
---|
30 | thep_gsl_api::vector E = thep_gsl_api::vector(-target_); |
---|
31 | |
---|
32 | double upper_bound = pow(10., 32); |
---|
33 | u_int count = 0; |
---|
34 | double alpha_new; |
---|
35 | random_singleton* rnd=random_singleton::get_instance(); |
---|
36 | double u; |
---|
37 | double v; |
---|
38 | thep_gsl_api::vector dalpha; |
---|
39 | |
---|
40 | // Stop criteria |
---|
41 | bool stop_condition = false; |
---|
42 | while(!stop_condition) |
---|
43 | { |
---|
44 | count++; |
---|
45 | dalpha = alpha_.mul_elements(target_); |
---|
46 | E = kernel_*dalpha-target_; //should be done in another way!! |
---|
47 | |
---|
48 | // Choosing a pair of variables to modify (Should be done more clever) |
---|
49 | u_long index1 = rnd->get_uniform_int(kernel_.cols()); |
---|
50 | u_long index2 = rnd->get_uniform_int(kernel_.cols()-1); |
---|
51 | if (index2 >= index1) |
---|
52 | index2++; |
---|
53 | |
---|
54 | //Updating the two variables |
---|
55 | if (target_[index1]!=target_[index2]) |
---|
56 | { |
---|
57 | if (alpha_[index2] > alpha_[index1] ) |
---|
58 | { |
---|
59 | v = upper_bound; |
---|
60 | u = alpha_[index2] - alpha_[index1]; |
---|
61 | } |
---|
62 | else |
---|
63 | { |
---|
64 | v = upper_bound - alpha_[index1] + alpha_[index2]; |
---|
65 | u = 0; |
---|
66 | } |
---|
67 | } |
---|
68 | else |
---|
69 | { |
---|
70 | if (alpha_[index2] + alpha_[index1] > upper_bound) |
---|
71 | { |
---|
72 | u = alpha_[index2] + alpha_[index1] - upper_bound; |
---|
73 | v = upper_bound; |
---|
74 | } |
---|
75 | else |
---|
76 | { |
---|
77 | u = 0; |
---|
78 | v = alpha_[index1] + alpha_[index2]; |
---|
79 | } |
---|
80 | } |
---|
81 | |
---|
82 | double k = kernel_.get(index1, index1) + kernel_.get(index2, index2) - |
---|
83 | 2*kernel_.get(index1, index2); |
---|
84 | alpha_new = alpha_[index2] + target_[index2]* |
---|
85 | (E[index1]-E[index2])/k; |
---|
86 | if (alpha_new > v) |
---|
87 | alpha_new = v; |
---|
88 | else if (alpha_new<u) |
---|
89 | alpha_new = u; |
---|
90 | |
---|
91 | alpha_[index1]+=target_[index1]*target_[index2]*(alpha_[index2]-alpha_new); |
---|
92 | alpha_[index2]=alpha_new; |
---|
93 | stop_condition = stop( target_, kernel_, alpha_); |
---|
94 | if (count>10000000){ |
---|
95 | cerr << "SVM): " << "more than 10,000,000 epochs reached" << endl; |
---|
96 | exit(1); |
---|
97 | } |
---|
98 | } |
---|
99 | |
---|
100 | // Calculating the bias |
---|
101 | double min_output_positive = 10000; |
---|
102 | double max_output_negative = -10000; |
---|
103 | thep_gsl_api::vector output_unbiased = kernel_ * alpha_.mul_elements(target_); |
---|
104 | for (u_int i=0; i<target_.size(); i++){ |
---|
105 | if (target_[i]==1){ |
---|
106 | if (output_unbiased[i] < min_output_positive) |
---|
107 | min_output_positive = output_unbiased[i]; |
---|
108 | } |
---|
109 | else if( output_unbiased[i] > max_output_negative ) |
---|
110 | max_output_negative = output_unbiased[i]; |
---|
111 | |
---|
112 | } |
---|
113 | cout << max_output_negative << endl; |
---|
114 | cout << min_output_positive << endl; |
---|
115 | |
---|
116 | bias_ = ( -min_output_positive - max_output_negative )/2; |
---|
117 | cout << "bias:" << bias_ << endl; |
---|
118 | |
---|
119 | trained_= true; |
---|
120 | } |
---|
121 | |
---|
122 | bool SVM::stop( const thep_gsl_api::vector& target_, |
---|
123 | const thep_gsl_api::matrix& kernel_, |
---|
124 | const thep_gsl_api::vector& alpha_) |
---|
125 | { |
---|
126 | double min_output_positive = 10000; |
---|
127 | double max_output_negative = -10000; |
---|
128 | double epsilon = 0.000001; // used twice, should perhaps be two different values |
---|
129 | thep_gsl_api::vector output_unbiased = kernel_ * alpha_.mul_elements(target_); |
---|
130 | for (u_int i=0; i<target_.size(); i++){ |
---|
131 | if (target_[i]==1){ |
---|
132 | if (output_unbiased[i] < min_output_positive) |
---|
133 | min_output_positive = output_unbiased[i]; |
---|
134 | } |
---|
135 | else if( output_unbiased[i] > max_output_negative ) |
---|
136 | max_output_negative = output_unbiased[i]; |
---|
137 | |
---|
138 | } |
---|
139 | |
---|
140 | if (min_output_positive - max_output_negative < 2-epsilon){ |
---|
141 | return false; |
---|
142 | } |
---|
143 | else{ |
---|
144 | double primal = alpha_.mul_elements(target_) |
---|
145 | * ( alpha_.mul_elements(target_) ); |
---|
146 | double gap = 2 * primal - alpha_.sum(); |
---|
147 | if (gap/(primal+1)<epsilon) |
---|
148 | return true; |
---|
149 | else |
---|
150 | return false; |
---|
151 | } |
---|
152 | |
---|
153 | |
---|
154 | } |
---|
155 | |
---|
156 | |
---|
157 | |
---|
158 | #endif |
---|