source: trunk/src/SVM.h @ 227

Last change on this file since 227 was 227, checked in by Jari Häkkinen, 18 years ago

Started reimplementation of the vector class.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.2 KB
Line 
1// $Id: SVM.h 227 2005-02-01 00:52:27Z jari $
2
3#ifndef _theplu_cpptools_svm_
4#define _theplu_cpptools_svm_
5
6// C++ tools include
7/////////////////////
8#include "Kernel.h"
9#include "matrix.h"
10#include "vector.h"
11
12
13// Standard C++ includes
14////////////////////////
15#include <utility>
16#include <vector>
17
18
19namespace theplu {
20namespace cpptools { 
21  ///
22  /// Class for SVM using Keerthi's second modification of Platt's SMO. Also
23  /// the elements of the kernel is not computed sequentially, but the
24  /// complete kernel matrix is taken as input and stored in memory. This
25  /// means that the training is faster, but also that it is not possible to
26  /// train a large number of samples N, since the memory cost for the kernel
27  /// matrix is N^2. The SVM object does not contain any data, hence any true
28  /// prediction is not possible.
29  ///   
30  class SVM
31  {
32 
33  public:
34    ///
35    /// Constructor taking the kernel matrix and the target vector as input
36    ///
37    SVM(const Kernel&, const gslapi::vector&, 
38        const std::vector<size_t>& = std::vector<size_t>());
39         
40    ///
41    /// Function returns \f$\alpha\f$
42    ///
43    inline gslapi::vector get_alpha(void) const { return alpha_; }
44
45    ///
46    /// Function returns the C-parameter
47    ///
48    inline double get_c(void) const { return c_; }
49
50    ///
51    /// @return number of maximal epochs
52    ///
53    inline long int max_epochs(void) const {return max_epochs_;}
54   
55    ///
56    /// Changing number of maximal epochs
57    ///
58    inline void max_epochs(const unsigned long int d) {max_epochs_=d;} 
59   
60    ///
61    /// @return output
62    ///
63    inline theplu::gslapi::vector output(void) 
64    {return kernel_.get() * alpha_.mul(target_)+
65       theplu::gslapi::vector(alpha_.size(),bias_);}
66
67    ///
68    /// Changing the C-parameter
69    ///
70    inline void set_c(const double c) {c_ = c;}
71
72    ///
73    /// Training the SVM following Platt's SMO, with Keerti's
74    /// modifacation. However the complete kernel is stored in
75    /// memory. The reason for this is speed. When number of samples N
76    /// is large this is not possible since the memory cost for the
77    /// kernel scales N^2. In that case one should follow the SMO and
78    /// calculate the kernel elements sequentially. Minimizing \f$
79    /// \frac{1}{2}\sum
80    /// y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) \f$,
81    /// which corresponds to minimizing \f$ \sum w_i^2+\sum
82    /// C_i\xi_i^2 \f$
83    ///
84
85    bool train(void);
86   
87     
88  private:
89    gslapi::vector alpha_;
90    double bias_;
91    double c_;
92    Kernel kernel_;
93    unsigned long int max_epochs_;
94    gslapi::vector target_;
95    bool trained_;
96    std::vector<size_t> train_set_;
97    double tolerance_;
98   
99    ///
100    ///   Private function choosing which two elements that should be
101    ///   updated. First checking for the biggest violation (output - target =
102    ///   0) among support vectors (alpha!=0). If no violation was found check
103    ///   for sequentially among the other samples. If no violation there as
104    ///   well, stop_condition is fullfilled.
105    ///
106    std::pair<size_t, size_t> choose(const theplu::gslapi::vector&, 
107                                     const theplu::gslapi::vector&,
108                                     const theplu::gslapi::vector&,
109                                     bool&);
110
111       
112   
113  };
114
115
116
117
118}} // of namespace cpptools and namespace theplu
119
120#endif
Note: See TracBrowser for help on using the repository browser.