source: trunk/yat/classifier/SVM.h

Last change on this file was 2384, checked in by Peter, 11 years ago

updating copyright years

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.5 KB
Line 
1#ifndef _theplu_yat_classifier_svm_
2#define _theplu_yat_classifier_svm_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2009, 2010 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "SVindex.h"
29#include "Target.h"
30#include "yat/utility/Vector.h"
31
32#include <utility>
33#include <vector>
34
35namespace theplu {
36namespace yat {
37namespace utility{
38  class Matrix;
39}
40
41namespace classifier { 
42
43  class DataLookup1D;
44  class DataLookupWeighted1D;
45  class KernelLookup;
46
47  /**
48     \brief Support Vector Machine
49  */
50  class SVM
51  {
52 
53  public:
54    ///
55    /// \brief Constructor
56    ///
57    SVM(void);
58
59    /**
60       \brief Copy constructor.
61    */ 
62    SVM(const SVM&);
63         
64    ///
65    /// \brief Destructor
66    ///
67    virtual ~SVM();
68
69    /**
70       \brief Create an untrained copy of SVM.
71
72       \returns A dynamically allocated SVM, which has to be deleted
73       by the caller to avoid memory leaks.
74    */
75    SVM* make_classifier(void) const;
76
77    ///
78    /// @return alpha parameters
79    ///
80    const utility::Vector& alpha(void) const;
81
82    ///
83    /// The C-parameter is the balance term (see train()). A very
84    /// large C means the training will be focused on getting samples
85    /// correctly classified, with risk for overfitting and poor
86    /// generalisation. A too small C will result in a training, in
87    /// which misclassifications are not penalized. C is weighted with
88    /// respect to the size such that \f$ n_+C_+ = n_-C_- \f$, meaning
89    /// a misclassificaion of the smaller group is penalized
90    /// harder. This balance is equivalent to the one occuring for
91    /// %regression with regularisation, or ANN-training with a
92    /// weight-decay term. Default is C set to infinity.
93    ///
94    /// @returns mean of vector \f$ C_i \f$
95    ///
96    double C(void) const;
97
98    ///
99    /// Default is max_epochs set to 100,000.
100    ///
101    /// @return number of maximal epochs
102    ///
103    unsigned long int max_epochs(void) const;
104   
105    /**
106      \brief set maximal number of epochs in training
107    */
108    void max_epochs(unsigned long int);
109   
110    /**
111        The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
112        + bias \f$, where \f$ t \f$ is the target.
113   
114        @return output of training samples
115    */ 
116    const theplu::yat::utility::Vector& output(void) const;
117
118    /**
119       Generate prediction @a predict from @a input. The prediction
120       is calculated as the output times the margin, i.e., geometric
121       distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
122       t_j K_{ij} + bias}{|w|} \f$ The output has 2 rows. The first row
123       is for binary target true, and the second is for binary target
124       false. The second row is superfluous as it is the first row
125       negated. It exist just to be aligned with multi-class
126       SupervisedClassifiers. Each column in @a input and @a output
127       corresponds to a sample to predict. Each row in @a input
128       corresponds to a training sample, and more exactly row i in @a
129       input should correspond to row i in KernelLookup that was used
130       for training.
131    */
132    void predict(const KernelLookup& input, utility::Matrix& predict) const;
133
134    /*
135    ///
136    /// @return output times margin (i.e. geometric distance from
137    /// decision hyperplane) from data @a input
138    ///
139    double predict(const DataLookup1D& input) const;
140
141    ///
142    /// @return output times margin from data @a input with
143    /// corresponding @a weight
144    ///
145    double predict(const DataLookupWeighted1D& input) const;
146    */
147
148    /**
149       \brief make SVM untrained
150
151       Setting variable trained to false; other variables are undefined.
152
153       \since New in yat 0.6
154     */
155    void reset(void);
156
157    ///
158    /// @brief sets the C-Parameter
159    ///
160    void set_C(const double);
161
162    /**
163       Training the SVM following Platt's SMO, with Keerti's
164       modifacation. Minimizing \f$ \frac{1}{2}\sum
165       y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
166       \alpha_i\f$, which corresponds to minimizing \f$ \sum
167       w_i^2+\sum C_i\xi_i^2 \f$.
168
169       @note If the training problem is not linearly separable and C
170       is set to infinity, the minima will be located in the infinity,
171       and thus the minimum will not be reached within the maximal
172       number of epochs. More exactly, when the problem is not
173       linearly separable, there exists an eigenvector to \f$
174       H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
175       conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
176       \f$. As the eigenvalue is zero in this direction the quadratic
177       term does not contribute to the objective, but the objective
178       only consists of the linear term and hence there is no
179       minumum. This problem only occurs when \f$ C \f$ is set to
180       infinity because for a finite \f$ C \f$ all eigenvalues are
181       finite. However, for a large \f$ C \f$ (and training problem is
182       non-linearly separable) there exists an eigenvector
183       corresponding to a small eigenvalue, which means the minima has
184       moved from infinity to "very far away". In practice this will
185       also result in that the minima is not reached withing the
186       maximal number of epochs and the of \f$ C \f$ should be
187       decreased.
188
189       Class for SVM using Keerthi's second modification of Platt's
190       Sequential Minimal Optimization. The SVM uses all data given for
191       training.
192       
193       \throw utility::runtime_error if maximal number of epoch is reach.
194    */
195    void train(const KernelLookup& kernel, const Target& target);
196
197    /**
198       \return true if SVM is trained
199
200       \since New in yat 0.6
201     */
202    bool trained(void) const;
203     
204  private:
205    ///
206    /// Calculates bounds for alpha2
207    ///
208    void bounds(double&, double&) const;
209
210    ///
211    /// @brief calculates the bias term
212    ///
213    /// @return true if successful
214    ///
215    void calculate_bias(void);
216
217    ///
218    /// Calculate margin that is inverse of w
219    ///
220    void calculate_margin(void);
221
222    ///
223    ///   Private function choosing which two elements that should be
224    ///   updated. First checking for the biggest violation (output - target =
225    ///   0) among support vectors (alpha!=0). If no violation was found check
226    ///   sequentially among the other samples. If no violation there as
227    ///   well training is completed
228    ///
229    ///  @return true if a pair of samples that violate the conditions
230    ///  can be found
231    ///
232    bool choose(const theplu::yat::utility::Vector&);
233
234    ///
235    /// @return kernel modified with diagonal term (soft margin)
236    ///
237    double kernel_mod(const size_t i, const size_t j) const;
238
239    ///
240    /// @return 1 if i belong to binary target true else -1
241    ///
242    int target(size_t i) const;
243
244    utility::Vector alpha_;
245    double bias_;
246    double C_inverse_;
247    // not owned by SVM
248    const KernelLookup* kernel_; 
249    double margin_;
250    unsigned long int max_epochs_;
251    utility::Vector output_;
252    SVindex sample_;
253    Target target_;
254    double tolerance_;
255    bool trained_;
256
257  };
258
259}}} // of namespace classifier, yat, and theplu
260
261#endif
Note: See TracBrowser for help on using the repository browser.