source: trunk/yat/classifier/SVM.h @ 747

Last change on this file since 747 was 747, checked in by Peter, 15 years ago

replaced includes in header files with forward declarations when possible. Added some includes in cc files.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.3 KB
Line 
1#ifndef _theplu_yat_classifier_svm_
2#define _theplu_yat_classifier_svm_
3
4// $Id$
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "KernelLookup.h"
28#include "SupervisedClassifier.h"
29#include "SVindex.h"
30#include "yat/utility/vector.h"
31
32#include <utility>
33#include <vector>
34
35namespace theplu {
36namespace yat {
37namespace classifier { 
38
39  class DataLookup2D;
40  class Target;
41
42  ///
43  /// @brief Support Vector Machine
44  ///
45  ///
46  ///
47  /// Class for SVM using Keerthi's second modification of Platt's
48  /// Sequential Minimal Optimization. The SVM uses all data given for
49  /// training. If validation or testing is wanted this should be
50  /// taken care of outside (in the kernel).
51  ///   
52  class SVM : public SupervisedClassifier
53  {
54 
55  public:
56    ///
57    /// Constructor taking the kernel and the target vector as
58    /// input.
59    ///
60    /// @note if the @a target or @a kernel
61    /// is destroyed the behaviour is undefined.
62    ///
63    SVM(const KernelLookup& kernel, const Target& target);
64
65    ///
66    /// Destructor
67    ///
68    virtual ~SVM();
69
70    const DataLookup2D& data(void) const;
71
72    ///
73    /// If DataLookup2D is not a KernelLookup a bad_cast exception is thrown.
74    ///
75    SupervisedClassifier* 
76    make_classifier(const DataLookup2D&, const Target&) const;
77
78    ///
79    /// @return \f$ \alpha \f$
80    ///
81    const utility::vector& alpha(void) const;
82
83    ///
84    /// The C-parameter is the balance term (see train()). A very
85    /// large C means the training will be focused on getting samples
86    /// correctly classified, with risk for overfitting and poor
87    /// generalisation. A too small C will result in a training where
88    /// misclassifications are not penalized. C is weighted with
89    /// respect to the size, so \f$ n_+C_+ = n_-C_- \f$, meaning a
90    /// misclassificaion of the smaller group is penalized
91    /// harder. This balance is equivalent to the one occuring for
92    /// regression with regularisation, or ANN-training with a
93    /// weight-decay term. Default is C set to infinity.
94    ///
95    /// @returns mean of vector \f$ C_i \f$
96    ///
97    double C(void) const;
98
99    ///
100    /// Default is max_epochs set to 10,000,000.
101    ///
102    /// @return number of maximal epochs
103    ///
104    long int max_epochs(void) const;
105   
106    /**
107        The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
108        + bias \f$, where \f$ t \f$ is the target.
109   
110        @return output
111    */ 
112    const theplu::yat::utility::vector& output(void) const;
113
114    /**
115       Generate prediction @a predict from @a input. The prediction
116       is calculated as the output times the margin, i.e., geometric
117       distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
118       t_j K_{ij} + bias}{w} \f$ The output has 2 rows. The first row
119       is for binary target true, and the second is for binary target
120       false. The second row is superfluous as it is the first row
121       negated. It exist just to be aligned with multi-class
122       SupervisedClassifiers. Each column in @a input and @a output
123       corresponds to a sample to predict. Each row in @a input
124       corresponds to a training sample, and more exactly row i in @a
125       input should correspond to row i in KernelLookup that was used
126       for training.
127    */
128    void predict(const DataLookup2D& input, utility::matrix& predict) const;
129
130    ///
131    /// @return output times margin (i.e. geometric distance from
132    /// decision hyperplane) from data @a input
133    ///
134    double predict(const DataLookup1D& input) const;
135
136    ///
137    /// @return output times margin from data @a input with
138    /// corresponding @a weight
139    ///
140    double predict(const DataLookupWeighted1D& input) const;
141
142    ///
143    /// @brief Function sets \f$ \alpha=0 \f$ and makes SVM untrained.
144    ///
145    void reset(void);
146
147    ///
148    /// @brief sets the C-Parameter
149    ///
150    void set_C(const double);
151
152    /**
153       Training the SVM following Platt's SMO, with Keerti's
154       modifacation. Minimizing \f$ \frac{1}{2}\sum
155       y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
156       alpha_i\f$ , which corresponds to minimizing \f$ \sum
157       w_i^2+\sum C_i\xi_i^2 \f$.
158
159       @note If the training problem is not linearly separable and C
160       is set to infinity, the minima will be located in the infinity,
161       and thus the minumum will not be reached within the maximal
162       number of epochs. More exactly, when the problem is not
163       linearly separable, there exists an eigenvector to \f$
164       H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
165       conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
166       \f$. As the eigenvalue is zero in this direction the quadratic
167       term does not contribute to the objective, but the objective
168       only consists of the linear term and hence there is no
169       minumum. This problem only occurs when \f$ C \f$ is set to
170       infinity because for a finite \f$ C \f$ all eigenvalues are
171       finite. However, for a large \f$ C \f$ (and training problem is
172       non-linearly separable) there exists an eigenvector
173       corresponding to a small eigenvalue, which means the minima has
174       moved from infinity to "very far away". In practice this will
175       also result in that the minima is not reached withing the
176       maximal number of epochs and the of \f$ C \f$ should be
177       decreased.
178 
179       @return true if succesful
180    */
181    bool train();
182
183       
184     
185  private:
186    ///
187    /// Copy constructor. (not implemented)
188    ///
189    SVM(const SVM&);
190         
191    ///
192    /// Calculates bounds for alpha2
193    ///
194    void bounds(double&, double&) const;
195
196    ///
197    /// @brief calculates the bias term
198    ///
199    /// @return true if successful
200    ///
201    bool calculate_bias(void);
202
203    ///
204    /// Calculate margin that is inverse of w
205    ///
206    void calculate_margin(void);
207
208    ///
209    ///   Private function choosing which two elements that should be
210    ///   updated. First checking for the biggest violation (output - target =
211    ///   0) among support vectors (alpha!=0). If no violation was found check
212    ///   sequentially among the other samples. If no violation there as
213    ///   well training is completed
214    ///
215    ///  @return true if a pair of samples that violate the conditions
216    ///  can be found
217    ///
218    bool choose(const theplu::yat::utility::vector&);
219
220    ///
221    /// @return kernel modified with diagonal term (soft margin)
222    ///
223    double kernel_mod(const size_t i, const size_t j) const;
224
225    ///
226    /// @return 1 if i belong to binary target true else -1
227    ///
228    int target(size_t i) const;
229
230    utility::vector alpha_;
231    double bias_;
232    double C_inverse_;
233    const KernelLookup* kernel_; 
234    double margin_;
235    unsigned long int max_epochs_;
236    utility::vector output_;
237    bool owner_;
238    SVindex sample_;
239    bool trained_;
240    double tolerance_;
241
242  };
243
244}}} // of namespace classifier, yat, and theplu
245
246#endif
Note: See TracBrowser for help on using the repository browser.