Changeset 114


Ignore:
Timestamp:
Jul 15, 2004, 2:46:15 PM (18 years ago)
Author:
Peter
Message:

fixed tolerance problems in train

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/SVM.cc

    r68 r114  
    77
    88// Thep C++ Tools
     9#include "Averager.h"
     10#include "matrix.h"
     11#include "random_singleton.h"
    912#include "SVM.h"
    10 #include "matrix.h"
    1113#include "vector.h"
    12 #include "random_singleton.h"
     14
     15
    1316
    1417namespace theplu {
     
    1720  SVM::SVM(const gslapi::matrix& kernel, const gslapi::vector& target,
    1821        const std::vector<size_t> train_set)
    19   : alpha_(target.size(),1),
     22  : alpha_(target.size(),0),
    2023    bias_(0),
    2124    c_(0),
     
    2427    trained_(false),
    2528    train_set_(train_set),
    26     tolerance_(0.001)
     29    tolerance_(0.000000000001)
    2730       
    2831  {
     
    3336
    3437
     38  std::pair<size_t,size_t>
     39  SVM::choose(const theplu::gslapi::vector& E, 
     40              const theplu::gslapi::vector& target_train,
     41              const theplu::gslapi::vector& alpha_train,
     42              bool& stop_condition)
     43  {
     44    std::pair<size_t, size_t> index(0,0);     
     45   
     46    // First check for violation among support vectors
     47    double max = -10000000;
     48    double min = 10000000;
     49    // Peter, avoid looping over all alphas
     50    for (size_t i=0; i<E.size(); i++){
     51      if (alpha_train(i)){
     52        if (E(i)>max){
     53          max = E(i);
     54          index.second = i;
     55        }
     56        if (E(i)<min){
     57          min = E(i);
     58          index.first = i;
     59        }
     60      }
     61    }
     62    //index = E.minmax_index(support_vectors);
     63    if (E(index.second) - E(index.first) > 2*tolerance_)
     64      return index;
     65   
     66    // If no violation check among non-support vectors
     67    std::vector<size_t> tmp;
     68    tmp.resize(E.size());
     69    for (size_t i=0; i<E.size(); i++)
     70      tmp[i]=i;
     71    my_uniform_rng a;
     72    random_shuffle(tmp.begin(), tmp.end(), a);
     73    for (size_t i=0; i<tmp.size(); i++){
     74      if (target_train(tmp[i])==1){
     75        if ( E(tmp[i]) < E(index.first) - 2*tolerance_){
     76          index.second = tmp[i];
     77          return index;
     78        }
     79      }
     80      else if (target_train(tmp[i])==-1){
     81        if ( E(tmp[i]) > E(index.second) + 2*tolerance_){
     82          index.first = tmp[i];
     83          return index;
     84        }
     85      }
     86    }
     87    // If there is no violation then we should stop
     88    stop_condition = true;
     89    return index;
     90  }
     91 
    3592  void SVM::train()
    3693  {
     
    3895
    3996  // initializing variables for optimization
    40   gslapi::matrix hessian(train_set_.size(),train_set_.size());
     97  gslapi::matrix kernel_train(train_set_.size(),train_set_.size());
    4198  gslapi::vector alpha_train(train_set_.size());
    4299  gslapi::vector target_train(train_set_.size());
     
    45102    target_train(i) = target_(train_set_[i]);
    46103    for (unsigned int j=0; j<train_set_.size(); j++)
    47       hessian(i,j) = (target_(train_set_[i])*
    48                       kernel_(train_set_[i],train_set_[j]) *
    49                       target_(train_set_[j]));
    50   }
    51   std::vector<size_t> support_vectors = train_set_;
    52   std::vector<size_t> non_support_vectors_;
    53  
    54  
     104      kernel_train(i,j) = kernel_(train_set_[i],train_set_[j]);
     105  }
    55106
    56107  // Modify the diagonal of the kernel matrix (soft margin)
     
    64115    exit(1);
    65116  }
    66   for (unsigned int i=0; i<hessian.columns(); i++)
    67     hessian(i,i) += d;
    68  
    69   gslapi::vector E = gslapi::vector(target_train.mul_elements(hessian*alpha_train)-target_train);
     117  for (unsigned int i=0; i<kernel_train.columns(); i++)
     118    kernel_train(i,i) += d;
     119 
     120  gslapi::vector  E = ( kernel_train*target_train.mul_elements(alpha_train)
     121                        - target_train ) ;
     122
    70123  double upper_bound = pow(10., 32.); //Peter, should be static...
    71124  u_int epochs = 0;
     
    75128  double v;
    76129  bool stop_condition = false;
    77 //  gslapi::vector dalpha_train; 
    78130
    79131  // Training loop
    80132  while(!stop_condition) {
    81      // Choosing a pair of variables to modify
    82     pair<size_t, size_t> index(0,0);     
    83     index = choose(E, support_vectors, non_support_vectors_,
    84                    target_train, stop_condition);
    85    
    86     //Updating the two variables
    87     if (target_train[index.first]!=target_train[index.second]) {       
     133    pair<size_t, size_t> index = choose(E, target_train,
     134                                        alpha_train,stop_condition);
     135    if (target_train[index.first]!=target_train[index.second]) {       
    88136      if (alpha_train[index.second] > alpha_train[index.first]) {
    89137        v = upper_bound;
     
    105153      }
    106154    }
    107     // distance between the two points in feature space
    108     double k = ( hessian(index.first, index.first) +
    109                  hessian(index.second, index.second) -
    110                  2*hessian(index.first, index.second) *
    111                  target_train(index.first)*alpha_(index.second) );
    112     // new alpha value for second alpha
     155   
     156    double k = ( kernel_train(index.first, index.first) +
     157                 kernel_train(index.second, index.second) -
     158                 2*kernel_train(index.first, index.second));
    113159    alpha_new2 = (alpha_train[index.second] + target_train[index.second]*
    114                  (E[index.first]-E[index.second])/k) ;
    115     if (alpha_new2 > v)
     160                 (E[index.first]-E[index.second])/k );
     161   
     162    if (alpha_new2 > v)
    116163      alpha_new2 = v;
    117164    else if (alpha_new2<u)
    118165      alpha_new2 = u;
    119166   
    120     // Updating the alphas and some more
     167    // Updating the alphas and some more
     168    if (alpha_new2 < tolerance_)
     169      alpha_new2=0;
     170   
    121171    alpha_new1 = (alpha_train[index.first] +
    122                   (target_train[index.first] *
    123                    target_train[index.second] *
    124                    (alpha_train[index.second] - alpha_new2)));
    125    
    126 //    dalpha_train(index.first) = (alpha_train(index.first) *
    127 //                                 target_train(index.first));
    128 //    dalpha_train(index.second) = (alpha_train(index.second)*
    129 //                                 target_train(index.second));
    130 
    131    
    132     E = E + (target_train.mul_elements(
    133             hessian[index.first] * (alpha_new1-alpha_train[index.first]) +
    134             hessian[index.second] * (alpha_new2-alpha_train[index.second]) ));
     172                  (target_train[index.first] * target_train[index.second] *
     173                   (alpha_train[index.second] - alpha_new2)) );
     174    if (alpha_new1 < tolerance_){
     175      alpha_new1=0;
     176    }
    135177
    136178    alpha_train[index.first] = alpha_new1;
    137     alpha_train[index.second] = alpha_new2;
     179    alpha_train[index.second] = alpha_new2;
     180    E = kernel_train*target_train.mul_elements(alpha_train) - target_train;
     181    gslapi::vector ones(alpha_train.size(),1);
     182   
    138183    epochs++; 
    139    
    140     if (epochs>100000){
    141       cerr << "WARNING: SVM: " << "100,000 epochs reached" << endl;
     184    if (epochs>1000000){
     185      cerr << "WARNING: SVM: " << "1,000,000 epochs reached" << endl;
    142186      exit(1);
    143187    }
     
    145189
    146190  // Calculating the bias from support vectors. E = output - bias - target
    147   bias_ = 0;
    148   for (size_t i=0; i<support_vectors.size(); i++)
    149     bias_ += -E(support_vectors[i]);
    150   bias_ /= support_vectors.size();
     191  Averager bias;
     192  for (size_t i=0; i<E.size(); i++)
     193    if (alpha_train(i))
     194      bias.add(-E(i));
     195  bias_ = bias.mean();
    151196 
    152197  trained_= true;
    153   cout << "Training finished." << endl;
    154198  alpha_.set_all(0);
    155199  for (unsigned int i=0; i<train_set_.size(); i++)
    156200    alpha_(train_set_[i]) = alpha_train(i);
    157201   
    158 }
    159 
    160 std::pair<size_t,size_t>
    161 SVM::choose(const theplu::gslapi::vector& E, 
    162             const std::vector<size_t>& support_vectors,
    163             const std::vector<size_t>& non_support_vectors,
    164             const theplu::gslapi::vector& target_train,
    165             bool& stop_condition)
    166 {
    167   std::pair<size_t, size_t> index(0,0);     
    168   // First check for violation among support vectors
    169   index = E.minmax_index(support_vectors);
    170   if (index.second - index.first > 2*tolerance_)
    171     return index;
    172    
    173   // If no violation check among non-support vectors
    174   // Peter, could this be done without copying?
    175   std::vector<size_t> tmp = non_support_vectors;
    176   my_uniform_rng a;
    177   random_shuffle(tmp.begin(), tmp.end(), a);
    178   for (size_t i=0; i<tmp.size(); i++){
    179     if (target_train(tmp[i])==1){
    180       if ( E(tmp[i]) < E(index.first) - 2*tolerance_){
    181         index.second = tmp[i];
    182         return index;
    183       }
    184     }
    185     else if (target_train(tmp[i])==-1){
    186       if ( E(tmp[i]) > E(index.second) + tolerance_){
    187         index.first = tmp[i];
    188         return index;
    189       }
    190     }
    191   }
    192   // If there is no validation then we should stop
    193   stop_condition = true;
    194   return index;
     202  }
     203
    195204 
    196 }
     205   
     206   
    197207
    198208}} // of namespace cpptools and namespace theplu
  • trunk/src/SVM.h

    r80 r114  
    7676    std::vector<size_t> train_set_;
    7777    double tolerance_;
    78     ///
     78   
     79    ///
    7980    ///   Private function choosing which two elements that should be
    8081    ///   updated. First checking for the biggest violation (output - target =
     
    8384    ///   well, stop_condition is fullfilled.
    8485    ///
    85    
    86     std::pair<size_t, size_t> choose(const theplu::gslapi::vector&,
    87                                   const std::vector<size_t>&,
    88                                   const std::vector<size_t>&,
    89                                   const theplu::gslapi::vector&,
    90                                   bool&);
    91    
     86    std::pair<size_t, size_t> choose(const theplu::gslapi::vector&,
     87                                     const theplu::gslapi::vector&,
     88                                     const theplu::gslapi::vector&,
     89                                     bool&);
     90
     91       
    9292   
    9393  };
Note: See TracChangeset for help on using the changeset viewer.