source: trunk/c++_tools/classifier/SVM.h @ 641

Last change on this file since 641 was 641, checked in by Peter, 15 years ago

added tag for Doxygen preprocessor to exclude internal Index class in SVM

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.6 KB
Line 
1#ifndef _theplu_classifier_svm_
2#define _theplu_classifier_svm_
3
4// $Id$
5
6#include <c++_tools/classifier/KernelLookup.h>
7#include <c++_tools/classifier/SupervisedClassifier.h>
8#include <c++_tools/classifier/Target.h>
9#include <c++_tools/utility/vector.h>
10
11#include <cassert>
12#include <utility>
13#include <vector>
14
15
16namespace theplu {
17namespace classifier { 
18
19  class DataLookup2D;
20#ifndef DOXYGEN_SHOULD_SKIP_THIS
21  // @internal Class keeping track of which samples are support vectors and
22  // not. The first nof_sv elements in the vector are indices of the
23  // support vectors
24  //
25  class Index
26  {
27
28  public:
29    //Default Contructor
30    Index(); 
31
32    //
33    Index(const size_t);
34
35    // @return index_first
36    inline size_t index_first(void) const 
37    { assert(index_first_<size()); return index_first_; }
38
39    // @return index_second
40    inline size_t index_second(void) const 
41    { assert(index_second_<size()); return index_second_; }
42
43    // synch the object against alpha
44    void init(const utility::vector& alpha, const double);
45
46    // @return nof samples
47    inline size_t size(void) const { return vec_.size(); }
48
49    // @return nof support vectors
50    inline size_t nof_sv(void) const { return nof_sv_; }
51
52    // making first to an nsv. If already sv, nothing happens.
53    void nsv_first(void);
54
55    // making second to an nsv. If already sv, nothing happens.
56    void nsv_second(void);   
57
58    // randomizes the nsv part of vector and sets index_first to
59    // nof_sv_ (the first nsv)
60    void shuffle(void);
61
62    // making first to a sv. If already sv, nothing happens.
63    void sv_first(void);
64
65    // making second to a sv. If already sv, nothing happens.
66    void sv_second(void);
67
68    //
69    void update_first(const size_t);
70
71    //
72    void update_second(const size_t);
73
74    // @return value_first
75    inline size_t value_first(void) const 
76    { assert(value_first_<size()); return value_first_; }
77
78    // @return const ref value_second
79    inline size_t value_second(void) const 
80    { assert(value_first_<size()); return value_second_; }
81
82    inline size_t operator()(size_t i) const { 
83      assert(i<size()); assert(vec_[i]<size()); return vec_[i]; }
84
85  private:
86    size_t index_first_;
87    size_t index_second_;
88    size_t nof_sv_;
89    std::vector<size_t> vec_;
90    size_t value_first_; // vec_[index_first_] exists for fast access
91    size_t value_second_; // vec_[index_second_] exists for fast access
92   
93  };
94#endif /* DOXYGEN_SHOULD_SKIP_THIS */ 
95
96  ///
97  /// @brief Support Vector Machine
98  ///
99  ///
100  ///
101  /// Class for SVM using Keerthi's second modification of Platt's
102  /// Sequential Minimal Optimization. The SVM uses all data given for
103  /// training. If validation or testing is wanted this should be
104  /// taken care of outside (in the kernel).
105  ///   
106  class SVM : public SupervisedClassifier
107  {
108 
109  public:
110    ///
111    /// Constructor taking the kernel and the target vector as
112    /// input.
113    ///
114    /// @note if the @a target or @a kernel
115    /// is destroyed the behaviour is undefined.
116    ///
117    SVM(const KernelLookup& kernel, const Target& target);
118
119    ///
120    /// Destructor
121    ///
122    virtual ~SVM();
123
124    ///
125    /// If DataLookup2D is not a KernelLookup a bad_cast exception is thrown.
126    ///
127    SupervisedClassifier* 
128    make_classifier(const DataLookup2D&, const Target&) const;
129
130    ///
131    /// @return \f$ \alpha \f$
132    ///
133    inline const utility::vector& alpha(void) const { return alpha_; }
134
135    ///
136    /// The C-parameter is the balance term (see train()). A very
137    /// large C means the training will be focused on getting samples
138    /// correctly classified, with risk for overfitting and poor
139    /// generalisation. A too small C will result in a training where
140    /// misclassifications are not penalized. C is weighted with
141    /// respect to the size, so \f$ n_+C_+ = n_-C_- \f$, meaning a
142    /// misclassificaion of the smaller group is penalized
143    /// harder. This balance is equivalent to the one occuring for
144    /// regression with regularisation, or ANN-training with a
145    /// weight-decay term. Default is C set to infinity.
146    ///
147    /// @returns mean of vector \f$ C_i \f$
148    ///
149    inline double C(void) const { return 1/C_inverse_; }
150
151    ///
152    /// Default is max_epochs set to 10,000,000.
153    ///
154    /// @return number of maximal epochs
155    ///
156    inline long int max_epochs(void) const {return max_epochs_;}
157   
158    ///
159    /// The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
160    /// + bias \f$, where \f$ t \f$ is the target.
161    ///
162    /// @return output
163    ///
164    inline const theplu::utility::vector& output(void) const { return output_; }
165
166    ///
167    /// Generate prediction @a predict from @a input. The prediction
168    /// is calculated as the output times the margin, i.e., geometric
169    /// distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
170    /// t_j K_{ij} + bias}{w} \f$ The output has 2 rows. The first row
171    /// is for binary target true, and the second is for binary target
172    /// false. The second row is superfluous as it is the first row
173    /// negated. It exist just to be aligned with multi-class
174    /// SupervisedClassifiers. Each column in @a input and @a output
175    /// corresponds to a sample to predict. Each row in @a input
176    /// corresponds to a training sample, and more exactly row i in @a
177    /// input should correspond to row i in KernelLookup that was used
178    /// for training.
179    ///
180    /// @note
181    ///
182    void predict(const DataLookup2D& input, utility::matrix& predict) const;
183
184    ///
185    /// @return output times margin (i.e. geometric distance from
186    /// decision hyperplane) from data @a input
187    ///
188    double predict(const DataLookup1D& input) const;
189
190    ///
191    /// @return output times margin from data @a input with
192    /// corresponding @a weight
193    ///
194    double predict(const DataLookupWeighted1D& input) const;
195
196    ///
197    /// Function sets \f$ \alpha=0 \f$ and makes SVM untrained.
198    ///
199    inline void reset(void) 
200    { trained_=false; alpha_=utility::vector(target_.size(),0); }
201
202    ///
203    /// @brief sets the C-Parameter
204    ///
205    void set_C(const double);
206
207    ///
208    /// Training the SVM following Platt's SMO, with Keerti's
209    /// modifacation. Minimizing \f$ \frac{1}{2}\sum
210    /// y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) \f$ ,
211    /// which corresponds to minimizing \f$ \sum w_i^2+\sum C_i\xi_i^2
212    /// \f$.
213    ///
214    bool train();
215
216       
217     
218  private:
219    ///
220    /// Copy constructor. (not implemented)
221    ///
222    SVM(const SVM&);
223         
224    ///
225    /// Calculates bounds for alpha2
226    ///
227    void bounds(double&, double&) const;
228
229    ///
230    /// @brief calculates the bias term
231    ///
232    /// @return true if successful
233    ///
234    bool calculate_bias(void);
235
236    ///
237    /// Calculate margin that is inverse of w
238    ///
239    void calculate_margin(void);
240
241    ///
242    ///   Private function choosing which two elements that should be
243    ///   updated. First checking for the biggest violation (output - target =
244    ///   0) among support vectors (alpha!=0). If no violation was found check
245    ///   sequentially among the other samples. If no violation there as
246    ///   well training is completed
247    ///
248    ///  @return true if a pair of samples that violate the conditions
249    ///  can be found
250    ///
251    bool choose(const theplu::utility::vector&);
252
253    ///
254    /// @return kernel modified with diagonal term (soft margin)
255    ///
256    inline double kernel_mod(const size_t i, const size_t j) const 
257    { return i!=j ? (*kernel_)(i,j) : (*kernel_)(i,j) + C_inverse_; }
258   
259    /// @return 1 if i belong to binary target true else -1
260    inline int target(size_t i) const { return target_.binary(i) ? 1 : -1; }
261
262    utility::vector alpha_;
263    double bias_;
264    double C_inverse_;
265    const KernelLookup* kernel_; 
266    double margin_;
267    unsigned long int max_epochs_;
268    utility::vector output_;
269    bool owner_;
270    Index sample_;
271    bool trained_;
272    double tolerance_;
273
274  };
275
276}} // of namespace classifier and namespace theplu
277
278#endif
Note: See TracBrowser for help on using the repository browser.