source: trunk/lib/classifier/SVM.h @ 545

Last change on this file since 545 was 545, checked in by Peter, 17 years ago

added feature selection for SVM

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.5 KB
Line 
1// $Id: SVM.h 545 2006-03-06 13:35:45Z peter $
2
3#ifndef _theplu_classifier_svm_
4#define _theplu_classifier_svm_
5
6#include <c++_tools/classifier/DataLookup2D.h>
7#include <c++_tools/classifier/KernelLookup.h>
8#include <c++_tools/classifier/SupervisedClassifier.h>
9#include <c++_tools/classifier/Target.h>
10#include <c++_tools/gslapi/vector.h>
11
12#include <cassert>
13#include <utility>
14#include <vector>
15
16
17namespace theplu {
18namespace classifier { 
19
20  // @internal Class keeping track of which samples are support vectors and
21  // not. The first nof_sv elements in the vector are indices of the
22  // support vectors
23  //
24  class Index
25  {
26
27  public:
28    //Default Contructor
29    Index(); 
30
31    //
32    Index(const size_t);
33
34    // @return index_first
35    inline size_t index_first(void) const 
36    { assert(index_first_<n()); return index_first_; }
37
38    // @return index_second
39    inline size_t index_second(void) const 
40    { assert(index_second_<n()); return index_second_; }
41
42    // synch the object against alpha
43    void init(const gslapi::vector& alpha, const double);
44
45    // @return nof samples
46    inline size_t n(void) const { return vec_.size(); }
47
48    // @return nof support vectors
49    inline size_t nof_sv(void) const { return nof_sv_; }
50
51    // making first to an nsv. If already sv, nothing happens.
52    void nsv_first(void);
53
54    // making first to an nsv. If already sv, nothing happens.
55    void nsv_second(void);   
56
57    // randomizes the nsv part of vector and sets index_first to
58    // nof_sv_ (the first nsv)
59    void shuffle(void);
60
61    // making first to a sv. If already sv, nothing happens.
62    void sv_first(void);
63
64    // making first to a sv. If already sv, nothing happens.
65    void sv_second(void);
66
67    //
68    void update_first(const size_t);
69
70    //
71    void update_second(const size_t);
72
73    // @return value_first
74    inline size_t value_first(void) const 
75    { assert(value_first_<n()); return value_first_; }
76
77    // @return const ref value_second
78    inline size_t value_second(void) const 
79    { assert(value_first_<n()); return value_second_; }
80
81    inline size_t operator()(size_t i) const { 
82      assert(i<n()); assert(vec_[i]<n()); return vec_[i]; }
83
84  private:
85    size_t index_first_;
86    size_t index_second_;
87    size_t nof_sv_;
88    std::vector<size_t> vec_;
89    size_t value_first_; // vec_[index_first_] exists for fast access
90    size_t value_second_; // vec_[index_second_] exists for fast access
91   
92  };
93
94  ///
95  /// Class for SVM using Keerthi's second modification of Platt's
96  /// Sequential Minimal Optimization. The SVM uses all data given for
97  /// training. If validation or testing is wanted this should be
98  /// taken care of outside (in the kernel).
99  ///   
100  class SVM : public SupervisedClassifier
101  {
102 
103  public:
104    ///
105    /// Constructor taking the kernel and the target vector as
106    /// input.
107    ///
108    /// @note if the @a target or @a kernel
109    /// is destroyed the behaviour is undefined.
110    ///
111    SVM(const KernelLookup& kernel, const Target& target);
112
113    ///
114    /// Constructor taking the kernel, the target vector, the score
115    /// used to rank data inputs, and the number of top ranked data
116    /// inputs to use in the classification.
117    ///
118    /// @note if the @a target or @a kernel
119    /// is destroyed the behaviour is undefined.
120    ///
121    /// @note make no effect yet
122    SVM(const KernelLookup& kernel, const Target& target,
123        statistics::Score&, const size_t);
124
125    ///
126    /// @todo doc
127    ///
128    SupervisedClassifier* 
129    make_classifier(const DataLookup2D&, const Target&) const;
130
131    ///
132    /// @return \f$\alpha\f$
133    ///
134    inline const gslapi::vector& alpha(void) const { return alpha_; }
135
136    ///
137    /// The C-parameter is the balance term (see train()). A very
138    /// large C means the training will be focused on getting samples
139    /// correctly classified, with risk for overfitting and poor
140    /// generalisation. A too small C will result in a training where
141    /// misclassifications are not penalized. C is weighted with
142    /// respect to the size, so \f$ n_+C_+ = n_-C_- \f$, meaning a
143    /// misclassificaion of the smaller group is penalized
144    /// harder. This balance is equivalent to the one occuring for
145    /// regression with regularisation, or ANN-training with a
146    /// weight-decay term. Default is C set to infinity.
147    ///
148    /// @returns mean of vector \f$ C_i \f$
149    ///
150    inline double C(void) const { return 1/C_inverse_; }
151
152    ///
153    /// Default is max_epochs set to 10,000,000.
154    ///
155    /// @return number of maximal epochs
156    ///
157    inline long int max_epochs(void) const {return max_epochs_;}
158   
159    ///
160    /// The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
161    /// + bias \f$, where \f$ t \f$ is the target.
162    ///
163    /// @return output
164    ///
165    inline const theplu::gslapi::vector& output(void) const { return output_; }
166
167    ///
168    /// Generate prediction @a output from @a input. The prediction is
169    /// returned in @a output. The output has 2 rows. The first row is
170    /// for binary target true, and the second is for binary target
171    /// false. The second row is superfluous because it the first row
172    /// negated. It exist just to be aligned with multi-class
173    /// SupervisedClassifiers. Each column in @a input and @a output
174    /// corresponds to a sample to predict. Each row in @a input
175    /// corresponds to a training sample, and more exactly row i in @a
176    /// input should correspond to row i in KernelLookup that was used
177    /// for training.
178    ///
179    void predict(const DataLookup2D& input, gslapi::matrix& output) const;
180
181    ///
182    /// @return output from data @a input
183    ///
184    double predict(const DataLookup1D& input) const;
185
186    ///
187    /// @return output from data @a input with corresponding @a weight
188    ///
189    double predict(const DataLookup1D& input, const DataLookup1D& weight) const;
190
191    ///
192    /// Function sets \f$ \alpha=0 \f$ and makes SVM untrained.
193    ///
194    inline void reset(void) 
195    { trained_=false; alpha_=gslapi::vector(target_.size(),0); }
196
197    ///
198    /// Training the SVM following Platt's SMO, with Keerti's
199    /// modifacation. Minimizing \f$ \frac{1}{2}\sum
200    /// y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) \f$,
201    /// which corresponds to minimizing \f$ \sum w_i^2+\sum C_i\xi_i^2
202    /// \f$.
203    ///
204    bool train();
205
206       
207     
208  private:
209    ///
210    /// Copy constructor. (not implemented)
211    ///
212    SVM(const SVM&);
213         
214    ///
215    /// Calculates bounds for alpha2
216    ///
217    void bounds(double&, double&) const;
218
219    ///
220    /// @brief calculates the bias term
221    ///
222    /// @return true if successful
223    ///
224    bool calculate_bias(void);
225
226    ///
227    ///   Private function choosing which two elements that should be
228    ///   updated. First checking for the biggest violation (output - target =
229    ///   0) among support vectors (alpha!=0). If no violation was found check
230    ///   sequentially among the other samples. If no violation there as
231    ///   well training is completed
232    ///
233    ///  @return true if a pair of samples that violate the conditions
234    ///  can be found
235    ///
236    bool choose(const theplu::gslapi::vector&);
237
238    ///
239    /// @return kernel modified with diagonal term (soft margin)
240    ///
241    inline double kernel_mod(const size_t i, const size_t j) const 
242    { return i!=j ? (*kernel_)(i,j) : (*kernel_)(i,j) + C_inverse_; }
243   
244    /// @return 1 if i belong to binary target true else -1
245    inline int target(size_t i) const { return target_.binary(i) ? 1 : -1; }
246
247    gslapi::vector alpha_;
248    double bias_;
249    double C_inverse_;
250    const KernelLookup* kernel_; 
251    unsigned long int max_epochs_;
252    gslapi::vector output_;
253    bool owner_;
254    Index sample_;
255    bool trained_;
256    double tolerance_;
257   
258
259  };
260
261  ///
262  /// @todo The output operator for the SVM class.
263  ///
264  //std::ostream& operator<< (std::ostream& s, const SVM&);
265 
266 
267}} // of namespace classifier and namespace theplu
268
269#endif
Note: See TracBrowser for help on using the repository browser.