source: trunk/yat/classifier/SVM.h @ 963

Last change on this file since 963 was 963, checked in by Peter, 14 years ago

fixes #268. SVM throws if when train() fails

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.4 KB
Line 
1#ifndef _theplu_yat_classifier_svm_
2#define _theplu_yat_classifier_svm_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/trac/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "KernelLookup.h"
30#include "SupervisedClassifier.h"
31#include "SVindex.h"
32#include "yat/utility/vector.h"
33
34#include <utility>
35#include <vector>
36
37namespace theplu {
38namespace yat {
39namespace classifier { 
40
41  class DataLookup2D;
42  class Target;
43
44  ///
45  /// @brief Support Vector Machine
46  ///
47  ///
48  ///
49  /// Class for SVM using Keerthi's second modification of Platt's
50  /// Sequential Minimal Optimization. The SVM uses all data given for
51  /// training. If validation or testing is wanted this should be
52  /// taken care of outside (in the kernel).
53  ///   
54  class SVM : public SupervisedClassifier
55  {
56 
57  public:
58    ///
59    /// Constructor taking the kernel and the target vector as
60    /// input.
61    ///
62    /// @note if the @a target or @a kernel
63    /// is destroyed the behaviour is undefined.
64    ///
65    SVM(const KernelLookup& kernel, const Target& target);
66
67    ///
68    /// Destructor
69    ///
70    virtual ~SVM();
71
72    const DataLookup2D& data(void) const;
73
74    ///
75    /// If DataLookup2D is not a KernelLookup a bad_cast exception is thrown.
76    ///
77    SupervisedClassifier* 
78    make_classifier(const DataLookup2D&, const Target&) const;
79
80    ///
81    /// @return \f$ \alpha \f$
82    ///
83    const utility::vector& alpha(void) const;
84
85    ///
86    /// The C-parameter is the balance term (see train()). A very
87    /// large C means the training will be focused on getting samples
88    /// correctly classified, with risk for overfitting and poor
89    /// generalisation. A too small C will result in a training where
90    /// misclassifications are not penalized. C is weighted with
91    /// respect to the size, so \f$ n_+C_+ = n_-C_- \f$, meaning a
92    /// misclassificaion of the smaller group is penalized
93    /// harder. This balance is equivalent to the one occuring for
94    /// regression with regularisation, or ANN-training with a
95    /// weight-decay term. Default is C set to infinity.
96    ///
97    /// @returns mean of vector \f$ C_i \f$
98    ///
99    double C(void) const;
100
101    ///
102    /// Default is max_epochs set to 100,000.
103    ///
104    /// @return number of maximal epochs
105    ///
106    long int max_epochs(void) const;
107   
108    /**
109        The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
110        + bias \f$, where \f$ t \f$ is the target.
111   
112        @return output
113    */ 
114    const theplu::yat::utility::vector& output(void) const;
115
116    /**
117       Generate prediction @a predict from @a input. The prediction
118       is calculated as the output times the margin, i.e., geometric
119       distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
120       t_j K_{ij} + bias}{w} \f$ The output has 2 rows. The first row
121       is for binary target true, and the second is for binary target
122       false. The second row is superfluous as it is the first row
123       negated. It exist just to be aligned with multi-class
124       SupervisedClassifiers. Each column in @a input and @a output
125       corresponds to a sample to predict. Each row in @a input
126       corresponds to a training sample, and more exactly row i in @a
127       input should correspond to row i in KernelLookup that was used
128       for training.
129    */
130    void predict(const DataLookup2D& input, utility::matrix& predict) const;
131
132    ///
133    /// @return output times margin (i.e. geometric distance from
134    /// decision hyperplane) from data @a input
135    ///
136    double predict(const DataLookup1D& input) const;
137
138    ///
139    /// @return output times margin from data @a input with
140    /// corresponding @a weight
141    ///
142    double predict(const DataLookupWeighted1D& input) const;
143
144    ///
145    /// @brief Function sets \f$ \alpha=0 \f$ and makes SVM untrained.
146    ///
147    void reset(void);
148
149    ///
150    /// @brief sets the C-Parameter
151    ///
152    void set_C(const double);
153
154    /**
155       Training the SVM following Platt's SMO, with Keerti's
156       modifacation. Minimizing \f$ \frac{1}{2}\sum
157       y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
158       alpha_i\f$ , which corresponds to minimizing \f$ \sum
159       w_i^2+\sum C_i\xi_i^2 \f$.
160
161       @note If the training problem is not linearly separable and C
162       is set to infinity, the minima will be located in the infinity,
163       and thus the minimum will not be reached within the maximal
164       number of epochs. More exactly, when the problem is not
165       linearly separable, there exists an eigenvector to \f$
166       H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
167       conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
168       \f$. As the eigenvalue is zero in this direction the quadratic
169       term does not contribute to the objective, but the objective
170       only consists of the linear term and hence there is no
171       minumum. This problem only occurs when \f$ C \f$ is set to
172       infinity because for a finite \f$ C \f$ all eigenvalues are
173       finite. However, for a large \f$ C \f$ (and training problem is
174       non-linearly separable) there exists an eigenvector
175       corresponding to a small eigenvalue, which means the minima has
176       moved from infinity to "very far away". In practice this will
177       also result in that the minima is not reached withing the
178       maximal number of epochs and the of \f$ C \f$ should be
179       decreased.
180
181       \throw if maximal number of epoch is reach.
182 
183       @return true if succesful
184    */
185    bool train();
186
187       
188     
189  private:
190    ///
191    /// Copy constructor. (not implemented)
192    ///
193    SVM(const SVM&);
194         
195    ///
196    /// Calculates bounds for alpha2
197    ///
198    void bounds(double&, double&) const;
199
200    ///
201    /// @brief calculates the bias term
202    ///
203    /// @return true if successful
204    ///
205    bool calculate_bias(void);
206
207    ///
208    /// Calculate margin that is inverse of w
209    ///
210    void calculate_margin(void);
211
212    ///
213    ///   Private function choosing which two elements that should be
214    ///   updated. First checking for the biggest violation (output - target =
215    ///   0) among support vectors (alpha!=0). If no violation was found check
216    ///   sequentially among the other samples. If no violation there as
217    ///   well training is completed
218    ///
219    ///  @return true if a pair of samples that violate the conditions
220    ///  can be found
221    ///
222    bool choose(const theplu::yat::utility::vector&);
223
224    ///
225    /// @return kernel modified with diagonal term (soft margin)
226    ///
227    double kernel_mod(const size_t i, const size_t j) const;
228
229    ///
230    /// @return 1 if i belong to binary target true else -1
231    ///
232    int target(size_t i) const;
233
234    utility::vector alpha_;
235    double bias_;
236    double C_inverse_;
237    const KernelLookup* kernel_; 
238    double margin_;
239    unsigned long int max_epochs_;
240    utility::vector output_;
241    bool owner_;
242    SVindex sample_;
243    bool trained_;
244    double tolerance_;
245
246  };
247
248}}} // of namespace classifier, yat, and theplu
249
250#endif
Note: See TracBrowser for help on using the repository browser.