source: trunk/c++_tools/classifier/SVM.h @ 593

Last change on this file since 593 was 593, checked in by Markus Ringnér, 15 years ago

Fixed std includes to compile with g++ 4.1.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.1 KB
Line 
1// $Id: SVM.h 593 2006-08-25 12:59:21Z markus $
2
3#ifndef _theplu_classifier_svm_
4#define _theplu_classifier_svm_
5
6#include <c++_tools/classifier/DataLookup2D.h>
7#include <c++_tools/classifier/KernelLookup.h>
8#include <c++_tools/classifier/SupervisedClassifier.h>
9#include <c++_tools/classifier/Target.h>
10#include <c++_tools/gslapi/vector.h>
11
12#include <cassert>
13#include <utility>
14#include <vector>
15
16
17namespace theplu {
18namespace classifier { 
19
20  // forward declarations
21  class CrossSplitter;
22
23  // @internal Class keeping track of which samples are support vectors and
24  // not. The first nof_sv elements in the vector are indices of the
25  // support vectors
26  //
27  class Index
28  {
29
30  public:
31    //Default Contructor
32    Index(); 
33
34    //
35    Index(const size_t);
36
37    // @return index_first
38    inline size_t index_first(void) const 
39    { assert(index_first_<size()); return index_first_; }
40
41    // @return index_second
42    inline size_t index_second(void) const 
43    { assert(index_second_<size()); return index_second_; }
44
45    // synch the object against alpha
46    void init(const gslapi::vector& alpha, const double);
47
48    // @return nof samples
49    inline size_t size(void) const { return vec_.size(); }
50
51    // @return nof support vectors
52    inline size_t nof_sv(void) const { return nof_sv_; }
53
54    // making first to an nsv. If already sv, nothing happens.
55    void nsv_first(void);
56
57    // making second to an nsv. If already sv, nothing happens.
58    void nsv_second(void);   
59
60    // randomizes the nsv part of vector and sets index_first to
61    // nof_sv_ (the first nsv)
62    void shuffle(void);
63
64    // making first to a sv. If already sv, nothing happens.
65    void sv_first(void);
66
67    // making second to a sv. If already sv, nothing happens.
68    void sv_second(void);
69
70    //
71    void update_first(const size_t);
72
73    //
74    void update_second(const size_t);
75
76    // @return value_first
77    inline size_t value_first(void) const 
78    { assert(value_first_<size()); return value_first_; }
79
80    // @return const ref value_second
81    inline size_t value_second(void) const 
82    { assert(value_first_<size()); return value_second_; }
83
84    inline size_t operator()(size_t i) const { 
85      assert(i<size()); assert(vec_[i]<size()); return vec_[i]; }
86
87  private:
88    size_t index_first_;
89    size_t index_second_;
90    size_t nof_sv_;
91    std::vector<size_t> vec_;
92    size_t value_first_; // vec_[index_first_] exists for fast access
93    size_t value_second_; // vec_[index_second_] exists for fast access
94   
95  };
96
97  ///
98  /// @brief Support Vector Machine
99  ///
100  ///
101  ///
102  /// Class for SVM using Keerthi's second modification of Platt's
103  /// Sequential Minimal Optimization. The SVM uses all data given for
104  /// training. If validation or testing is wanted this should be
105  /// taken care of outside (in the kernel).
106  ///   
107  class SVM : public SupervisedClassifier
108  {
109 
110  public:
111    ///
112    /// Constructor taking the kernel and the target vector as
113    /// input.
114    ///
115    /// @note if the @a target or @a kernel
116    /// is destroyed the behaviour is undefined.
117    ///
118    SVM(const KernelLookup& kernel, const Target& target);
119
120    ///
121    /// Constructor taking the kernel, the target vector, the score
122    /// used to rank data inputs, and the number of top ranked data
123    /// inputs to use in the classification.
124    ///
125    /// @note if the @a target or @a kernel
126    /// is destroyed the behaviour is undefined.
127    ///
128    /// @note make no effect yet
129    SVM(const KernelLookup& kernel, const Target& target,
130        statistics::Score&, const size_t);
131
132    ///
133    /// Destructor
134    ///
135    virtual ~SVM();
136
137    ///
138    /// @todo doc
139    ///
140    SupervisedClassifier* 
141    make_classifier(const CrossSplitter&) const;
142
143    ///
144    /// @return \f$\alpha\f$
145    ///
146    inline const gslapi::vector& alpha(void) const { return alpha_; }
147
148    ///
149    /// The C-parameter is the balance term (see train()). A very
150    /// large C means the training will be focused on getting samples
151    /// correctly classified, with risk for overfitting and poor
152    /// generalisation. A too small C will result in a training where
153    /// misclassifications are not penalized. C is weighted with
154    /// respect to the size, so \f$ n_+C_+ = n_-C_- \f$, meaning a
155    /// misclassificaion of the smaller group is penalized
156    /// harder. This balance is equivalent to the one occuring for
157    /// regression with regularisation, or ANN-training with a
158    /// weight-decay term. Default is C set to infinity.
159    ///
160    /// @returns mean of vector \f$ C_i \f$
161    ///
162    inline double C(void) const { return 1/C_inverse_; }
163
164    ///
165    /// Default is max_epochs set to 10,000,000.
166    ///
167    /// @return number of maximal epochs
168    ///
169    inline long int max_epochs(void) const {return max_epochs_;}
170   
171    ///
172    /// The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
173    /// + bias \f$, where \f$ t \f$ is the target.
174    ///
175    /// @return output
176    ///
177    inline const theplu::gslapi::vector& output(void) const { return output_; }
178
179    ///
180    /// Generate prediction @a predict from @a input. The prediction
181    /// is calculated as the output times the margin, i.e., geometric
182    /// distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
183    /// t_j K_{ij} + bias}{w} \f$ The output has 2 rows. The first row
184    /// is for binary target true, and the second is for binary target
185    /// false. The second row is superfluous as it is the first row
186    /// negated. It exist just to be aligned with multi-class
187    /// SupervisedClassifiers. Each column in @a input and @a output
188    /// corresponds to a sample to predict. Each row in @a input
189    /// corresponds to a training sample, and more exactly row i in @a
190    /// input should correspond to row i in KernelLookup that was used
191    /// for training.
192    ///
193    /// @note
194    ///
195    void predict(const DataLookup2D& input, gslapi::matrix& predict) const;
196
197    ///
198    /// @return output times margin (i.e. geometric distance from
199    /// decision hyperplane) from data @a input
200    ///
201    double predict(const DataLookup1D& input) const;
202
203    ///
204    /// @return output times margin from data @a input with
205    /// corresponding @a weight
206    ///
207    double predict(const DataLookup1D& input, const DataLookup1D& weight) const;
208
209    ///
210    /// Function sets \f$ \alpha=0 \f$ and makes SVM untrained.
211    ///
212    inline void reset(void) 
213    { trained_=false; alpha_=gslapi::vector(target_.size(),0); }
214
215    ///
216    /// @brief sets the C-Parameter
217    ///
218    void set_C(const double);
219
220    ///
221    /// Training the SVM following Platt's SMO, with Keerti's
222    /// modifacation. Minimizing \f$ \frac{1}{2}\sum
223    /// y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) \f$,
224    /// which corresponds to minimizing \f$ \sum w_i^2+\sum C_i\xi_i^2
225    /// \f$.
226    ///
227    bool train();
228
229       
230     
231  private:
232    ///
233    /// Copy constructor. (not implemented)
234    ///
235    SVM(const SVM&);
236         
237    ///
238    /// Calculates bounds for alpha2
239    ///
240    void bounds(double&, double&) const;
241
242    ///
243    /// @brief calculates the bias term
244    ///
245    /// @return true if successful
246    ///
247    bool calculate_bias(void);
248
249    ///
250    /// Calculate margin that is inverse of w
251    ///
252    void calculate_margin(void);
253
254    ///
255    ///   Private function choosing which two elements that should be
256    ///   updated. First checking for the biggest violation (output - target =
257    ///   0) among support vectors (alpha!=0). If no violation was found check
258    ///   sequentially among the other samples. If no violation there as
259    ///   well training is completed
260    ///
261    ///  @return true if a pair of samples that violate the conditions
262    ///  can be found
263    ///
264    bool choose(const theplu::gslapi::vector&);
265
266    ///
267    /// @return kernel modified with diagonal term (soft margin)
268    ///
269    inline double kernel_mod(const size_t i, const size_t j) const 
270    { return i!=j ? (*kernel_)(i,j) : (*kernel_)(i,j) + C_inverse_; }
271   
272    /// @return 1 if i belong to binary target true else -1
273    inline int target(size_t i) const { return target_.binary(i) ? 1 : -1; }
274
275    gslapi::vector alpha_;
276    double bias_;
277    double C_inverse_;
278    const KernelLookup* kernel_; 
279    double margin_;
280    unsigned long int max_epochs_;
281    gslapi::vector output_;
282    bool owner_;
283    Index sample_;
284    bool trained_;
285    double tolerance_;
286
287  };
288
289  ///
290  /// @todo The output operator for the SVM class.
291  ///
292  //std::ostream& operator<< (std::ostream& s, const SVM&);
293 
294 
295}} // of namespace classifier and namespace theplu
296
297#endif
Note: See TracBrowser for help on using the repository browser.