#ifndef _theplu_yat_classifier_nbc_
#define _theplu_yat_classifier_nbc_
// $Id: NBC.h 1042 2008-02-06 18:32:30Z peter $
/*
Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
Copyright (C) 2007 Peter Johansson
This file is part of the yat library, http://trac.thep.lu.se/yat
The yat library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The yat library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
*/
#include "SupervisedClassifier.h"
#include "yat/utility/matrix.h"
namespace theplu {
namespace yat {
namespace classifier {
class DataLookup1D;
class DataLookup2D;
class MatrixLookup;
class MatrixLookupWeighted;
class Target;
/**
@brief Naive Bayesian Classification.
Each class is modelled as a multinormal distribution with
features being independent: \f$ p(x|c) = \prod
\frac{1}{\sqrt{2\pi\sigma_i^2}} \exp \left(
\frac{(x_i-m_i)^2}{2\sigma_i^2)} \right)\f$
*/
class NBC : public SupervisedClassifier
{
public:
///
/// Constructor taking the training data, the target vector, and
/// the distance measure as input.
///
NBC(const MatrixLookup&, const Target&);
///
/// Constructor taking the training data with weights, the target
/// vector, the distance measure, and a weight matrix for the
/// training data as input.
///
NBC(const MatrixLookupWeighted&, const Target&);
virtual ~NBC();
const DataLookup2D& data(void) const;
SupervisedClassifier* make_classifier(const DataLookup2D&,
const Target&) const;
///
/// Train the classifier using the training data.
///
/// For each class mean and variance are estimated for each
/// feature (see Averager and AveragerWeighted for details).
///
/// If variance can not be estimated (too few data points or all
/// points identical) for a feature and label, then that feature
/// is ignored for that specific label.
///
/// @return true if training succedeed.
///
void train();
/**
Each sample (column) in \a data is predicted and predictions
are returned in the corresponding column in passed \a res. Each
row in \a res corresponds to a class. The prediction is the
estimated probability that sample belong to class \f$ j \f$
\f$ P_j = \frac{1}{Z}\prod_i{\frac{1}{\sigma_i}}
\exp(\frac{w_i(x_i-\mu_i)^2}{\sigma_i^2})\f$, where \f$ \mu_i
\f$ and \f$ \sigma_i^2 \f$ are the estimated mean and variance,
respectively. If \a data is a MatrixLookup is equivalent to
using all weight equal to unity.
*/
void predict(const DataLookup2D& data, utility::matrix& res) const;
private:
utility::matrix centroids_;
utility::matrix sigma2_;
const DataLookup2D& data_;
double sum_logsigma(size_t i) const;
};
}}} // of namespace classifier, yat, and theplu
#endif