Changeset 68


Ignore:
Timestamp:
Apr 28, 2004, 4:28:25 PM (19 years ago)
Author:
Peter
Message:

added a proper choose function

Location:
trunk/src
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/SVM.cc

    r61 r68  
    1515namespace cpptools { 
    1616
    17 SVM::SVM(const gslapi::matrix& kernel, const gslapi::vector& target,
    18         const std::vector<int> train_set)
    19   : alpha_(target.size()),
     17  SVM::SVM(const gslapi::matrix& kernel, const gslapi::vector& target,
     18        const std::vector<size_t> train_set)
     19  : alpha_(target.size(),1),
    2020    bias_(0),
    2121    c_(0),
     
    2323    target_(target),
    2424    trained_(false),
    25     train_set_(train_set)
     25    train_set_(train_set),
     26    tolerance_(0.001)
    2627       
    27 {
    28   if (!train_set_.size())
    29     for (u_int i=0; i<target_.size(); i++)
    30       train_set_.push_back(i); 
    31 }
     28  {
     29    if (!train_set_.size())
     30      for (size_t i=0; i<target_.size(); i++)
     31        train_set_.push_back(i); 
     32  }
    3233
    3334
    34 void SVM::train()
    35 {
     35  void SVM::train()
     36  {
    3637  using namespace std;
    3738
    38   // defining the variables for optimization
     39  // initializing variables for optimization
    3940  gslapi::matrix hessian(train_set_.size(),train_set_.size());
    4041  gslapi::vector alpha_train(train_set_.size());
     
    4849                      target_(train_set_[j]));
    4950  }
    50 
     51  std::vector<size_t> support_vectors = train_set_;
     52  std::vector<size_t> non_support_vectors_;
     53 
     54 
    5155
    5256  // Modify the diagonal of the kernel matrix (soft margin)
     
    6367    hessian(i,i) += d;
    6468 
    65   gslapi::vector E = gslapi::vector(-target_train);
     69  gslapi::vector E = gslapi::vector(target_train.mul_elements(hessian*alpha_train)-target_train);
    6670  double upper_bound = pow(10., 32.); //Peter, should be static...
    6771  u_int epochs = 0;
    68   double alpha_new;
    69   double u;
     72  double alpha_new2;
     73  double alpha_new1;
     74  double u;
    7075  double v;
    71   gslapi::vector dalpha_train; 
    72   bool stop_condition = false;
     76  bool stop_condition = false;
     77//  gslapi::vector dalpha_train; 
    7378
    7479  // Training loop
    7580  while(!stop_condition) {
    76     epochs++; 
    77     dalpha_train = alpha_train.mul_elements(target_train);
    78     E = target_train.mul_elements(hessian*alpha_train)-target_train; //Peter, could be done faster!!
    79    
    80     // Choosing a pair of variables to modify
    81     pair<u_int, u_int> index(0,0);     
    82     index = choose(target_train.size());
     81     // Choosing a pair of variables to modify
     82    pair<size_t, size_t> index(0,0);     
     83    index = choose(E, support_vectors, non_support_vectors_,
     84                   target_train, stop_condition);
    8385   
    8486    //Updating the two variables
     
    109111                 target_train(index.first)*alpha_(index.second) );
    110112    // new alpha value for second alpha
    111     alpha_new = (alpha_train[index.second] + target_train[index.second]*
     113    alpha_new2 = (alpha_train[index.second] + target_train[index.second]*
    112114                 (E[index.first]-E[index.second])/k) ;
    113     if (alpha_new > v)
    114       alpha_new = v;
    115     else if (alpha_new<u)
    116       alpha_new = u;
     115    if (alpha_new2 > v)
     116      alpha_new2 = v;
     117    else if (alpha_new2<u)
     118      alpha_new2 = u;
    117119   
    118     alpha_train[index.first] += (target_train[index.first]*
    119                             target_train[index.second] *
    120                             (alpha_train[index.second]-alpha_new));
    121     alpha_train[index.second]=alpha_new;
    122     stop_condition = stop( target_train, hessian, alpha_train);
    123     if (epochs>100000){
    124       cerr << "SVM: " << "more than 100,000 epochs reached" << endl;
     120    // Updating the alphas and some more
     121    alpha_new1 = (alpha_train[index.first] +
     122                  (target_train[index.first] *
     123                   target_train[index.second] *
     124                   (alpha_train[index.second] - alpha_new2)));
     125   
     126//    dalpha_train(index.first) = (alpha_train(index.first) *
     127//                                 target_train(index.first));
     128//    dalpha_train(index.second) = (alpha_train(index.second)*
     129//                                 target_train(index.second));
     130
     131   
     132    E = E + (target_train.mul_elements(
     133            hessian[index.first] * (alpha_new1-alpha_train[index.first]) +
     134            hessian[index.second] * (alpha_new2-alpha_train[index.second]) ));
     135
     136    alpha_train[index.first] = alpha_new1;
     137    alpha_train[index.second] = alpha_new2;
     138    epochs++; 
     139   
     140    if (epochs>100000){
     141      cerr << "WARNING: SVM: " << "100,000 epochs reached" << endl;
    125142      exit(1);
    126     }
     143    }
    127144  }
    128145
    129   // Calculating the bias, Peter Check this!
    130   double min_output_positive = 10000;
    131   double max_output_negative = -10000;
    132   gslapi::vector output_unbiased = target_train.mul_elements(hessian * alpha_train);
    133   for (u_int i=0; i<target_train.size(); i++){
    134     if (target_train[i]==1) {
    135       if (output_unbiased[i] < min_output_positive) {
    136         min_output_positive = output_unbiased[i];
    137       }
    138     }
    139     else if( output_unbiased[i] > max_output_negative ) {
    140       max_output_negative = output_unbiased[i];
    141     }
    142     bias_ = ( -min_output_positive - max_output_negative )/2;
    143   } 
     146  // Calculating the bias from support vectors. E = output - bias - target
     147  bias_ = 0;
     148  for (size_t i=0; i<support_vectors.size(); i++)
     149    bias_ += -E(support_vectors[i]);
     150  bias_ /= support_vectors.size();
     151 
    144152  trained_= true;
    145153  cout << "Training finished." << endl;
     154  alpha_.set_all(0);
    146155  for (unsigned int i=0; i<train_set_.size(); i++)
    147156    alpha_(train_set_[i]) = alpha_train(i);
    148  
    149  
     157   
    150158}
    151159
    152 std::pair<u_int,u_int> SVM::choose(const u_int nof_samples, const u_int method)
     160std::pair<size_t,size_t>
     161SVM::choose(const theplu::gslapi::vector& E, 
     162            const std::vector<size_t>& support_vectors,
     163            const std::vector<size_t>& non_support_vectors,
     164            const theplu::gslapi::vector& target_train,
     165            bool& stop_condition)
    153166{
    154   u_int index1;
    155   u_int index2;
     167  std::pair<size_t, size_t> index(0,0);     
     168  // First check for violation among support vectors
     169  index = E.minmax_index(support_vectors);
     170  if (index.second - index.first > 2*tolerance_)
     171    return index;
     172   
     173  // If no violation check among non-support vectors
     174  // Peter, could this be done without copying?
     175  std::vector<size_t> tmp = non_support_vectors;
     176  my_uniform_rng a;
     177  random_shuffle(tmp.begin(), tmp.end(), a);
     178  for (size_t i=0; i<tmp.size(); i++){
     179    if (target_train(tmp[i])==1){
     180      if ( E(tmp[i]) < E(index.first) - 2*tolerance_){
     181        index.second = tmp[i];
     182        return index;
     183      }
     184    }
     185    else if (target_train(tmp[i])==-1){
     186      if ( E(tmp[i]) > E(index.second) + tolerance_){
     187        index.first = tmp[i];
     188        return index;
     189      }
     190    }
     191  }
     192  // If there is no validation then we should stop
     193  stop_condition = true;
     194  return index;
    156195 
    157   if (method == 1){
    158 //(Peter, do it the Cristianini way)
    159    
    160    
    161   }
    162   else{
    163     random_singleton* rnd=random_singleton::get_instance();
    164     index1 = rnd->get_uniform_int(nof_samples);
    165     index2 = rnd->get_uniform_int(nof_samples-1);
    166     if (index2 >= index1)
    167       index2++;
    168   }
    169   return std::pair<u_int,u_int>(index1,index2);
    170196}
    171197
    172 bool SVM::stop( const gslapi::vector& target_train,
    173                 const gslapi::matrix& hessian,
    174                 const gslapi::vector& alpha_train)
    175 {
    176 using namespace std;
    177     double min_output_positive = 10000;
    178     double max_output_negative = -10000;
    179     double epsilon = 0.000001; // Peter, should be static const
    180     gslapi::vector output_unbiased =
    181       target_train.mul_elements(hessian*alpha_train);
    182     for (u_int i=0; i<target_train.size(); i++) {
    183       if (target_train[i]==1) {
    184         if (output_unbiased[i] < min_output_positive) {
    185           min_output_positive = output_unbiased[i];
    186         }
    187       }
    188       else if( output_unbiased[i] > max_output_negative )
    189         max_output_negative = output_unbiased[i];
    190      
    191     }
    192  
    193     if (min_output_positive - max_output_negative < 2-epsilon) {
    194       //double tmp = min_output_positive - max_output_negative;
    195       //double tmp2 = alpha_train*(hessian*alpha_train)/2 - alpha_train.sum();
    196       //cout << tmp << "\t" << tmp2 << endl;
    197       return false;
    198     }
    199     else {
    200       double primal = alpha_train.mul_elements(target_train)
    201     *  ( alpha_train.mul_elements(target_train) );
    202       double gap = 2 * primal - alpha_train.sum();
    203       if (gap/(primal+1)<epsilon)
    204     return true;
    205       else
    206     return false;
    207     }
    208 
    209 
    210 }
    211 
    212 
    213198}} // of namespace cpptools and namespace theplu
  • trunk/src/SVM.h

    r61 r68  
    1717namespace cpptools { 
    1818  ///
    19   /// Class for SVM using a modified version of the SMO algorithm
    20   /// (Cristianini). The difference to the original SMO is that the
    21   /// whole Kernel matrix is kept in memory instead of calculating each
    22   /// element when it's needed. If the number of samples (size of
    23   /// kernel matrix) is too large, the original SMO should be used
    24   /// instead (or buying more memory!). True prediction is not possible.
     19  /// Class for SVM using Keerthi's second modification of Platt's SMO. Also
     20  /// the elements of the kernel is not computed sequentially, but the
     21  /// complete kernel matrix is taken as input and stored in memory. This
     22  /// means that the training is faster, but also that it is not possible to
     23  /// train a large number of samples N, since the memory cost for the kernel
     24  /// matrix is N^2. The SVM object does not contain any data, hence any true
     25  /// prediction is not possible.
    2526  ///   
    2627  class SVM
     
    3233    ///
    3334    SVM(const gslapi::matrix&, const gslapi::vector&,
    34         const std::vector<int> = std::vector<int>());
     35        const std::vector<size_t> = std::vector<size_t>());
    3536         
    3637    ///
     
    4748    /// Function returns the output from SVM
    4849    ///
    49     inline gslapi::vector get_output(void) const;
     50    inline gslapi::vector get_output(void) const { return (kernel_ * alpha_.mul_elements(target_) + gslapi::vector(target_.size(),bias_) );}
    5051
    5152    ///
     
    5556
    5657    ///
    57     /// Training the SVM using the a modified version of the SMO
    58     /// algorithm. Instead of calculating an element in the kernel matrix when
    59     /// it is needed, the whole kernel is kept in memory.
    60     ///
     58    /// Training the SVM following Platt's SMO, with Keerti's
     59    /// modifacation. However the complete kernel is stored in memory. The
     60    /// reason for this is speed. When number of samples N is large this is
     61    /// not possible since the memory cost for the kernel scales N^2. In that
     62    /// case one should follow the SMO and calculate the kernel elements
     63    /// sequentially
     64    ///
    6165
    6266    void train(void);
     
    7074    gslapi::vector target_;
    7175    bool trained_;
    72     std::vector<int> train_set_;
    73 
     76    std::vector<size_t> train_set_;
     77    double tolerance_;
    7478    ///
    7579    ///   Private function choosing which two elements that should be
    76     ///   updated. By random from uniform distributions.
     80    ///   updated. First checking for the biggest violation (output - target =
     81    ///   0) among support vectors (alpha!=0). If no violation was found check
     82    ///   for sequentially among the other samples. If no violation there as
     83    ///   well, stop_condition is fullfilled.
    7784    ///
    7885   
    79     std::pair<u_int,u_int> choose(u_int nof_samples, const u_int method = 0);
     86    std::pair<u_int,u_int> choose(const theplu::gslapi::vector&,
     87                                  const std::vector<size_t>&,
     88                                  const std::vector<size_t>&,
     89                                  const theplu::gslapi::vector&,
     90                                  bool&);
    8091   
    81     ///
    82     /// Private function that determines when to stop the training.
    83     /// The test is done in two steps. First, we check that the
    84     /// functional margin is at least 2 - epsilon. Second, we check
    85     /// that the gap between the primal and the dual object is less
    86     /// than epsilon.
    87     ///
    88     bool stop(const gslapi::vector& target_,
    89         const gslapi::matrix& kernel,
    90         const gslapi::vector& alpha_);
    91 
    92    
    93 
    94 
     92   
    9593  };
    9694
    97   gslapi::vector SVM::get_output() const 
    98   {
    99     // Peter, changes to a one-liner and move upto declaration.
    100     gslapi::vector bias(target_.size(),bias_);
    101     return kernel_ * alpha_.mul_elements(target_) + bias;
    102   }
    10395
    10496
  • trunk/src/vector.cc

    r65 r68  
    129129
    130130
     131
    131132  std::pair<size_t,size_t> vector::minmax_index(void) const
    132133  {
    133134    size_t min_index=0;
    134135    size_t max_index=0;
    135     void gsl_vector_minmax_index (const gsl_vector * v_, size_t * min_index, size_t * max_index);
     136    void gsl_vector_minmax_index (const gsl_vector * v_, size_t *
     137                                  min_index, size_t * max_index);
    136138    return std::pair<size_t,size_t>(min_index, max_index);
    137139  }
     
    139141
    140142
    141 //Peter, add default checking all elements using GSL
     143
    142144  std::pair<size_t,size_t> vector::minmax_index(const std::vector<size_t>& subset ) const
    143145  {
Note: See TracChangeset for help on using the changeset viewer.