source: trunk/yat/classifier/NBC.h @ 1157

Last change on this file since 1157 was 1157, checked in by Markus Ringnér, 14 years ago

Refs #318

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.2 KB
Line 
1#ifndef _theplu_yat_classifier_nbc_
2#define _theplu_yat_classifier_nbc_
3
4// $Id: NBC.h 1157 2008-02-26 13:25:19Z markus $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
8  Copyright (C) 2007 Peter Johansson
9
10  This file is part of the yat library, http://trac.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "SupervisedClassifier.h"
29#include "yat/utility/Matrix.h"
30
31namespace theplu {
32namespace yat {
33namespace classifier { 
34
35  class DataLookup1D;
36  class DataLookup2D;
37  class MatrixLookup;
38  class MatrixLookupWeighted;
39  class Target;
40
41  /**
42     @brief Naive Bayesian Classifier.
43 
44     Each class is modelled as a multinormal distribution with
45     features being independent: \f$ p(x|c) = \prod
46     \frac{1}{\sqrt{2\pi\sigma_i^2}} \exp \left(
47     \frac{(x_i-m_i)^2}{2\sigma_i^2)} \right)\f$
48  */
49  class NBC : public SupervisedClassifier
50  {
51 
52  public:
53    ///
54    /// @brief Constructor
55    ///
56    NBC(void);
57   
58
59    ///
60    /// @brief Destructor
61    ///
62    virtual ~NBC();
63
64
65    NBC* make_classifier(void) const;
66   
67    ///
68    /// Train the classifier using training data and targets.
69    ///
70    /// For each class mean and variance are estimated for each
71    /// feature (see Averager and AveragerWeighted for details).
72    ///
73    /// If variance can not be estimated (only one valid data point)
74    /// for a feature and label, then that feature is ignored for that
75    /// specific label.
76    ///
77    void train(const MatrixLookup&, const Target&);
78
79    ///
80    /// Train the classifier using weighted training data and targets.
81    ///
82    void train(const MatrixLookupWeighted&, const Target&);
83
84
85   
86    /**
87       Each sample (column) in \a data is predicted and predictions
88       are returned in the corresponding column in passed \a res. Each
89       row in \a res corresponds to a class. The prediction is the
90       estimated probability that sample belong to class \f$ j \f$
91
92       \f$ P_j = \frac{1}{Z}\prod_i{\frac{1}{\sqrt{2\pi\sigma_i^2}}}
93       \exp(\frac{w_i(x_i-\mu_i)^2}{\sigma_i^2})\f$, where \f$ \mu_i
94       \f$ and \f$ \sigma_i^2 \f$ are the estimated mean and variance,
95       respectively. If a \f$ \sigma_i \f$ could not be estimated
96       during training, corresponding factor is set to unity, in other
97       words, that feature is ignored for the prediction of that
98       particular class. Z is chosen such that total probability, \f$
99       \sum P_j \f$, equals unity. If \a data is a MatrixLookup is
100       equivalent to using all weight equal to unity.
101    */
102    void predict(const DataLookup2D& data, utility::Matrix& res) const;
103
104
105  private:
106    utility::Matrix centroids_;
107    utility::Matrix sigma2_;
108
109    double sum_logsigma(size_t i) const;
110
111
112  };
113 
114}}} // of namespace classifier, yat, and theplu
115
116#endif
Note: See TracBrowser for help on using the repository browser.