source: branches/0.4-stable/yat/classifier/SVM.h @ 1392

Last change on this file since 1392 was 1392, checked in by Peter, 15 years ago

trac has moved

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.2 KB
Line 
1#ifndef _theplu_yat_classifier_svm_
2#define _theplu_yat_classifier_svm_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 2 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27  02111-1307, USA.
28*/
29
30#include "SVindex.h"
31#include "Target.h"
32#include "yat/utility/Vector.h"
33
34#include <utility>
35#include <vector>
36
37namespace theplu {
38namespace yat {
39namespace utility{
40  class Matrix;
41}
42
43namespace classifier { 
44
45  class DataLookup1D;
46  class DataLookupWeighted1D;
47  class KernelLookup;
48
49  /**
50     \brief Support Vector Machine
51  */
52  class SVM
53  {
54 
55  public:
56    ///
57    /// \brief Constructor
58    ///
59    SVM(void);
60
61    /**
62       \brief Copy constructor.
63    */ 
64    SVM(const SVM&);
65         
66    ///
67    /// \brief Destructor
68    ///
69    virtual ~SVM();
70
71    /**
72       \brief Create an untrained copy of SVM.
73
74       \returns A dynamically allocated SVM, which has to be deleted
75       by the caller to avoid memory leaks.
76    */
77    SVM* make_classifier(void) const;
78
79    ///
80    /// @return alpha parameters
81    ///
82    const utility::Vector& alpha(void) const;
83
84    ///
85    /// The C-parameter is the balance term (see train()). A very
86    /// large C means the training will be focused on getting samples
87    /// correctly classified, with risk for overfitting and poor
88    /// generalisation. A too small C will result in a training, in
89    /// which misclassifications are not penalized. C is weighted with
90    /// respect to the size such that \f$ n_+C_+ = n_-C_- \f$, meaning
91    /// a misclassificaion of the smaller group is penalized
92    /// harder. This balance is equivalent to the one occuring for
93    /// %regression with regularisation, or ANN-training with a
94    /// weight-decay term. Default is C set to infinity.
95    ///
96    /// @returns mean of vector \f$ C_i \f$
97    ///
98    double C(void) const;
99
100    ///
101    /// Default is max_epochs set to 100,000.
102    ///
103    /// @return number of maximal epochs
104    ///
105    long int max_epochs(void) const;
106   
107    /**
108      \brief set maximal number of epochs in training
109    */
110    void max_epochs(long int);
111   
112    /**
113        The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
114        + bias \f$, where \f$ t \f$ is the target.
115   
116        @return output of training samples
117    */ 
118    const theplu::yat::utility::Vector& output(void) const;
119
120    /**
121       Generate prediction @a predict from @a input. The prediction
122       is calculated as the output times the margin, i.e., geometric
123       distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
124       t_j K_{ij} + bias}{|w|} \f$ The output has 2 rows. The first row
125       is for binary target true, and the second is for binary target
126       false. The second row is superfluous as it is the first row
127       negated. It exist just to be aligned with multi-class
128       SupervisedClassifiers. Each column in @a input and @a output
129       corresponds to a sample to predict. Each row in @a input
130       corresponds to a training sample, and more exactly row i in @a
131       input should correspond to row i in KernelLookup that was used
132       for training.
133    */
134    void predict(const KernelLookup& input, utility::Matrix& predict) const;
135
136    /*
137    ///
138    /// @return output times margin (i.e. geometric distance from
139    /// decision hyperplane) from data @a input
140    ///
141    double predict(const DataLookup1D& input) const;
142
143    ///
144    /// @return output times margin from data @a input with
145    /// corresponding @a weight
146    ///
147    double predict(const DataLookupWeighted1D& input) const;
148    */
149
150    ///
151    /// @brief sets the C-Parameter
152    ///
153    void set_C(const double);
154
155    /**
156       Training the SVM following Platt's SMO, with Keerti's
157       modifacation. Minimizing \f$ \frac{1}{2}\sum
158       y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
159       \alpha_i\f$, which corresponds to minimizing \f$ \sum
160       w_i^2+\sum C_i\xi_i^2 \f$.
161
162       @note If the training problem is not linearly separable and C
163       is set to infinity, the minima will be located in the infinity,
164       and thus the minimum will not be reached within the maximal
165       number of epochs. More exactly, when the problem is not
166       linearly separable, there exists an eigenvector to \f$
167       H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
168       conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
169       \f$. As the eigenvalue is zero in this direction the quadratic
170       term does not contribute to the objective, but the objective
171       only consists of the linear term and hence there is no
172       minumum. This problem only occurs when \f$ C \f$ is set to
173       infinity because for a finite \f$ C \f$ all eigenvalues are
174       finite. However, for a large \f$ C \f$ (and training problem is
175       non-linearly separable) there exists an eigenvector
176       corresponding to a small eigenvalue, which means the minima has
177       moved from infinity to "very far away". In practice this will
178       also result in that the minima is not reached withing the
179       maximal number of epochs and the of \f$ C \f$ should be
180       decreased.
181
182       Class for SVM using Keerthi's second modification of Platt's
183       Sequential Minimal Optimization. The SVM uses all data given for
184       training.
185       
186       \throw std::runtime_error if maximal number of epoch is reach.
187    */
188    void train(const KernelLookup& kernel, const Target& target);
189
190       
191     
192  private:
193    ///
194    /// Calculates bounds for alpha2
195    ///
196    void bounds(double&, double&) const;
197
198    ///
199    /// @brief calculates the bias term
200    ///
201    /// @return true if successful
202    ///
203    void calculate_bias(void);
204
205    ///
206    /// Calculate margin that is inverse of w
207    ///
208    void calculate_margin(void);
209
210    ///
211    ///   Private function choosing which two elements that should be
212    ///   updated. First checking for the biggest violation (output - target =
213    ///   0) among support vectors (alpha!=0). If no violation was found check
214    ///   sequentially among the other samples. If no violation there as
215    ///   well training is completed
216    ///
217    ///  @return true if a pair of samples that violate the conditions
218    ///  can be found
219    ///
220    bool choose(const theplu::yat::utility::Vector&);
221
222    ///
223    /// @return kernel modified with diagonal term (soft margin)
224    ///
225    double kernel_mod(const size_t i, const size_t j) const;
226
227    ///
228    /// @return 1 if i belong to binary target true else -1
229    ///
230    int target(size_t i) const;
231
232    utility::Vector alpha_;
233    double bias_;
234    double C_inverse_;
235    // not owned by SVM
236    const KernelLookup* kernel_; 
237    double margin_;
238    unsigned long int max_epochs_;
239    utility::Vector output_;
240    SVindex sample_;
241    Target target_;
242    double tolerance_;
243    bool trained_;
244
245  };
246
247}}} // of namespace classifier, yat, and theplu
248
249#endif
Note: See TracBrowser for help on using the repository browser.