Changeset 68
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/SVM.cc
r61 r68 15 15 namespace cpptools { 16 16 17 SVM::SVM(const gslapi::matrix& kernel, const gslapi::vector& target,18 const std::vector< int> train_set)19 : alpha_(target.size() ),17 SVM::SVM(const gslapi::matrix& kernel, const gslapi::vector& target, 18 const std::vector<size_t> train_set) 19 : alpha_(target.size(),1), 20 20 bias_(0), 21 21 c_(0), … … 23 23 target_(target), 24 24 trained_(false), 25 train_set_(train_set) 25 train_set_(train_set), 26 tolerance_(0.001) 26 27 27 {28 if (!train_set_.size())29 for (u_int i=0; i<target_.size(); i++)30 train_set_.push_back(i);31 }28 { 29 if (!train_set_.size()) 30 for (size_t i=0; i<target_.size(); i++) 31 train_set_.push_back(i); 32 } 32 33 33 34 34 void SVM::train()35 {35 void SVM::train() 36 { 36 37 using namespace std; 37 38 38 // defining thevariables for optimization39 // initializing variables for optimization 39 40 gslapi::matrix hessian(train_set_.size(),train_set_.size()); 40 41 gslapi::vector alpha_train(train_set_.size()); … … 48 49 target_(train_set_[j])); 49 50 } 50 51 std::vector<size_t> support_vectors = train_set_; 52 std::vector<size_t> non_support_vectors_; 53 54 51 55 52 56 // Modify the diagonal of the kernel matrix (soft margin) … … 63 67 hessian(i,i) += d; 64 68 65 gslapi::vector E = gslapi::vector( -target_train);69 gslapi::vector E = gslapi::vector(target_train.mul_elements(hessian*alpha_train)-target_train); 66 70 double upper_bound = pow(10., 32.); //Peter, should be static... 67 71 u_int epochs = 0; 68 double alpha_new; 69 double u; 72 double alpha_new2; 73 double alpha_new1; 74 double u; 70 75 double v; 71 gslapi::vector dalpha_train; 72 bool stop_condition = false; 76 bool stop_condition = false; 77 // gslapi::vector dalpha_train; 73 78 74 79 // Training loop 75 80 while(!stop_condition) { 76 epochs++; 77 dalpha_train = alpha_train.mul_elements(target_train); 78 E = target_train.mul_elements(hessian*alpha_train)-target_train; //Peter, could be done faster!! 79 80 // Choosing a pair of variables to modify 81 pair<u_int, u_int> index(0,0); 82 index = choose(target_train.size()); 81 // Choosing a pair of variables to modify 82 pair<size_t, size_t> index(0,0); 83 index = choose(E, support_vectors, non_support_vectors_, 84 target_train, stop_condition); 83 85 84 86 //Updating the two variables … … 109 111 target_train(index.first)*alpha_(index.second) ); 110 112 // new alpha value for second alpha 111 alpha_new = (alpha_train[index.second] + target_train[index.second]*113 alpha_new2 = (alpha_train[index.second] + target_train[index.second]* 112 114 (E[index.first]-E[index.second])/k) ; 113 if (alpha_new > v)114 alpha_new = v;115 else if (alpha_new <u)116 alpha_new = u;115 if (alpha_new2 > v) 116 alpha_new2 = v; 117 else if (alpha_new2<u) 118 alpha_new2 = u; 117 119 118 alpha_train[index.first] += (target_train[index.first]* 119 target_train[index.second] * 120 (alpha_train[index.second]-alpha_new)); 121 alpha_train[index.second]=alpha_new; 122 stop_condition = stop( target_train, hessian, alpha_train); 123 if (epochs>100000){ 124 cerr << "SVM: " << "more than 100,000 epochs reached" << endl; 120 // Updating the alphas and some more 121 alpha_new1 = (alpha_train[index.first] + 122 (target_train[index.first] * 123 target_train[index.second] * 124 (alpha_train[index.second] - alpha_new2))); 125 126 // dalpha_train(index.first) = (alpha_train(index.first) * 127 // target_train(index.first)); 128 // dalpha_train(index.second) = (alpha_train(index.second)* 129 // target_train(index.second)); 130 131 132 E = E + (target_train.mul_elements( 133 hessian[index.first] * (alpha_new1-alpha_train[index.first]) + 134 hessian[index.second] * (alpha_new2-alpha_train[index.second]) )); 135 136 alpha_train[index.first] = alpha_new1; 137 alpha_train[index.second] = alpha_new2; 138 epochs++; 139 140 if (epochs>100000){ 141 cerr << "WARNING: SVM: " << "100,000 epochs reached" << endl; 125 142 exit(1); 126 143 } 127 144 } 128 145 129 // Calculating the bias, Peter Check this! 130 double min_output_positive = 10000; 131 double max_output_negative = -10000; 132 gslapi::vector output_unbiased = target_train.mul_elements(hessian * alpha_train); 133 for (u_int i=0; i<target_train.size(); i++){ 134 if (target_train[i]==1) { 135 if (output_unbiased[i] < min_output_positive) { 136 min_output_positive = output_unbiased[i]; 137 } 138 } 139 else if( output_unbiased[i] > max_output_negative ) { 140 max_output_negative = output_unbiased[i]; 141 } 142 bias_ = ( -min_output_positive - max_output_negative )/2; 143 } 146 // Calculating the bias from support vectors. E = output - bias - target 147 bias_ = 0; 148 for (size_t i=0; i<support_vectors.size(); i++) 149 bias_ += -E(support_vectors[i]); 150 bias_ /= support_vectors.size(); 151 144 152 trained_= true; 145 153 cout << "Training finished." << endl; 154 alpha_.set_all(0); 146 155 for (unsigned int i=0; i<train_set_.size(); i++) 147 156 alpha_(train_set_[i]) = alpha_train(i); 148 149 157 150 158 } 151 159 152 std::pair<u_int,u_int> SVM::choose(const u_int nof_samples, const u_int method) 160 std::pair<size_t,size_t> 161 SVM::choose(const theplu::gslapi::vector& E, 162 const std::vector<size_t>& support_vectors, 163 const std::vector<size_t>& non_support_vectors, 164 const theplu::gslapi::vector& target_train, 165 bool& stop_condition) 153 166 { 154 u_int index1; 155 u_int index2; 167 std::pair<size_t, size_t> index(0,0); 168 // First check for violation among support vectors 169 index = E.minmax_index(support_vectors); 170 if (index.second - index.first > 2*tolerance_) 171 return index; 172 173 // If no violation check among non-support vectors 174 // Peter, could this be done without copying? 175 std::vector<size_t> tmp = non_support_vectors; 176 my_uniform_rng a; 177 random_shuffle(tmp.begin(), tmp.end(), a); 178 for (size_t i=0; i<tmp.size(); i++){ 179 if (target_train(tmp[i])==1){ 180 if ( E(tmp[i]) < E(index.first) - 2*tolerance_){ 181 index.second = tmp[i]; 182 return index; 183 } 184 } 185 else if (target_train(tmp[i])==-1){ 186 if ( E(tmp[i]) > E(index.second) + tolerance_){ 187 index.first = tmp[i]; 188 return index; 189 } 190 } 191 } 192 // If there is no validation then we should stop 193 stop_condition = true; 194 return index; 156 195 157 if (method == 1){158 //(Peter, do it the Cristianini way)159 160 161 }162 else{163 random_singleton* rnd=random_singleton::get_instance();164 index1 = rnd->get_uniform_int(nof_samples);165 index2 = rnd->get_uniform_int(nof_samples-1);166 if (index2 >= index1)167 index2++;168 }169 return std::pair<u_int,u_int>(index1,index2);170 196 } 171 197 172 bool SVM::stop( const gslapi::vector& target_train,173 const gslapi::matrix& hessian,174 const gslapi::vector& alpha_train)175 {176 using namespace std;177 double min_output_positive = 10000;178 double max_output_negative = -10000;179 double epsilon = 0.000001; // Peter, should be static const180 gslapi::vector output_unbiased =181 target_train.mul_elements(hessian*alpha_train);182 for (u_int i=0; i<target_train.size(); i++) {183 if (target_train[i]==1) {184 if (output_unbiased[i] < min_output_positive) {185 min_output_positive = output_unbiased[i];186 }187 }188 else if( output_unbiased[i] > max_output_negative )189 max_output_negative = output_unbiased[i];190 191 }192 193 if (min_output_positive - max_output_negative < 2-epsilon) {194 //double tmp = min_output_positive - max_output_negative;195 //double tmp2 = alpha_train*(hessian*alpha_train)/2 - alpha_train.sum();196 //cout << tmp << "\t" << tmp2 << endl;197 return false;198 }199 else {200 double primal = alpha_train.mul_elements(target_train)201 * ( alpha_train.mul_elements(target_train) );202 double gap = 2 * primal - alpha_train.sum();203 if (gap/(primal+1)<epsilon)204 return true;205 else206 return false;207 }208 209 210 }211 212 213 198 }} // of namespace cpptools and namespace theplu -
trunk/src/SVM.h
r61 r68 17 17 namespace cpptools { 18 18 /// 19 /// Class for SVM using a modified version of the SMO algorithm 20 /// (Cristianini). The difference to the original SMO is that the 21 /// whole Kernel matrix is kept in memory instead of calculating each 22 /// element when it's needed. If the number of samples (size of 23 /// kernel matrix) is too large, the original SMO should be used 24 /// instead (or buying more memory!). True prediction is not possible. 19 /// Class for SVM using Keerthi's second modification of Platt's SMO. Also 20 /// the elements of the kernel is not computed sequentially, but the 21 /// complete kernel matrix is taken as input and stored in memory. This 22 /// means that the training is faster, but also that it is not possible to 23 /// train a large number of samples N, since the memory cost for the kernel 24 /// matrix is N^2. The SVM object does not contain any data, hence any true 25 /// prediction is not possible. 25 26 /// 26 27 class SVM … … 32 33 /// 33 34 SVM(const gslapi::matrix&, const gslapi::vector&, 34 const std::vector< int> = std::vector<int>());35 const std::vector<size_t> = std::vector<size_t>()); 35 36 36 37 /// … … 47 48 /// Function returns the output from SVM 48 49 /// 49 inline gslapi::vector get_output(void) const ;50 inline gslapi::vector get_output(void) const { return (kernel_ * alpha_.mul_elements(target_) + gslapi::vector(target_.size(),bias_) );} 50 51 51 52 /// … … 55 56 56 57 /// 57 /// Training the SVM using the a modified version of the SMO 58 /// algorithm. Instead of calculating an element in the kernel matrix when 59 /// it is needed, the whole kernel is kept in memory. 60 /// 58 /// Training the SVM following Platt's SMO, with Keerti's 59 /// modifacation. However the complete kernel is stored in memory. The 60 /// reason for this is speed. When number of samples N is large this is 61 /// not possible since the memory cost for the kernel scales N^2. In that 62 /// case one should follow the SMO and calculate the kernel elements 63 /// sequentially 64 /// 61 65 62 66 void train(void); … … 70 74 gslapi::vector target_; 71 75 bool trained_; 72 std::vector< int> train_set_;73 76 std::vector<size_t> train_set_; 77 double tolerance_; 74 78 /// 75 79 /// Private function choosing which two elements that should be 76 /// updated. By random from uniform distributions. 80 /// updated. First checking for the biggest violation (output - target = 81 /// 0) among support vectors (alpha!=0). If no violation was found check 82 /// for sequentially among the other samples. If no violation there as 83 /// well, stop_condition is fullfilled. 77 84 /// 78 85 79 std::pair<u_int,u_int> choose(u_int nof_samples, const u_int method = 0); 86 std::pair<u_int,u_int> choose(const theplu::gslapi::vector&, 87 const std::vector<size_t>&, 88 const std::vector<size_t>&, 89 const theplu::gslapi::vector&, 90 bool&); 80 91 81 /// 82 /// Private function that determines when to stop the training. 83 /// The test is done in two steps. First, we check that the 84 /// functional margin is at least 2 - epsilon. Second, we check 85 /// that the gap between the primal and the dual object is less 86 /// than epsilon. 87 /// 88 bool stop(const gslapi::vector& target_, 89 const gslapi::matrix& kernel, 90 const gslapi::vector& alpha_); 91 92 93 94 92 95 93 }; 96 94 97 gslapi::vector SVM::get_output() const98 {99 // Peter, changes to a one-liner and move upto declaration.100 gslapi::vector bias(target_.size(),bias_);101 return kernel_ * alpha_.mul_elements(target_) + bias;102 }103 95 104 96 -
trunk/src/vector.cc
r65 r68 129 129 130 130 131 131 132 std::pair<size_t,size_t> vector::minmax_index(void) const 132 133 { 133 134 size_t min_index=0; 134 135 size_t max_index=0; 135 void gsl_vector_minmax_index (const gsl_vector * v_, size_t * min_index, size_t * max_index); 136 void gsl_vector_minmax_index (const gsl_vector * v_, size_t * 137 min_index, size_t * max_index); 136 138 return std::pair<size_t,size_t>(min_index, max_index); 137 139 } … … 139 141 140 142 141 //Peter, add default checking all elements using GSL 143 142 144 std::pair<size_t,size_t> vector::minmax_index(const std::vector<size_t>& subset ) const 143 145 {
Note: See TracChangeset
for help on using the changeset viewer.