#ifndef _theplu_yat_statistics_roc_ #define _theplu_yat_statistics_roc_ // $Id: ROC.h 1487 2008-09-10 08:41:36Z jari$ /* Copyright (C) 2004 Peter Johansson Copyright (C) 2005, 2006, 2007, 2008 Jari Häkkinen, Peter Johansson This file is part of the yat library, http://dev.thep.lu.se/yat The yat library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The yat library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with yat. If not, see . */ #include "yat/classifier/Target.h" #include "yat/utility/iterator_traits.h" #include #include #include namespace theplu { namespace yat { namespace statistics { /// /// @brief Class for Reciever Operating Characteristic. /// /// As the area under an ROC curve is equivalent to Mann-Whitney U /// statistica, this class can be used to perform a Mann-Whitney /// U-test (aka Wilcoxon). /// class ROC { public: /// /// @brief Default constructor /// ROC(void); /// /// @brief The destructor /// virtual ~ROC(void); /** Adding a data value to ROC. \see add(T &o, ForwardIterator first, ForwardIterator last, const classifier::Target &target) */ void add(double value, bool target, double weight=1.0); /** The area is defines as \f$\frac{\sum w^+w^-} {\sum w^+w^-}\f$, where the sum in the numerator goes over all pairs where value+ is larger than value-. The denominator goes over all pairs. @return Area under curve. */ double area(void); /// /// minimum_size is the threshold for when a normal /// approximation is used for the p-value calculation. /// /// @return reference to minimum_size /// unsigned int& minimum_size(void); /** minimum_size is the threshold for when a normal approximation is used for the p-value calculation. @return const reference to minimum_size */ const unsigned int& minimum_size(void) const; /// /// @return sum of weights /// double n(void) const; /// /// @return sum of weights with negative target /// double n_neg(void) const; /// /// @return sum of weights with positive target /// double n_pos(void) const; /// ///Calculates the p-value, i.e. the probability of observing an ///area equally or larger if the null hypothesis is true. If P is ///near zero, this casts doubt on this hypothesis. The null ///hypothesis is that the values from the 2 classes are generated ///from 2 identical distributions. The alternative is that the ///median of the first distribution is shifted from the median of ///the second distribution by a non-zero amount. If the smallest ///group size is larger than minimum_size (default = 10), then P ///is calculated using a normal approximation. /// /// \note Weights should be either zero or unity, else present /// implementation is nonsense. /// /// @return One-sided p-value. /// double p_value_one_sided(void) const; /** @brief Two-sided p-value. @return min(2*p_value_one_sided, 2-2*p_value_one_sided) */ double p_value(void) const; /** @brief Set everything to zero */ void reset(void); private: /// Implemented as in MatLab 13.1 double get_p_approx(double) const; /// Implemented as in MatLab 13.1 double get_p_exact(const double, const double, const double) const; double area_; unsigned int minimum_size_; double w_neg_; double w_pos_; // > std::multimap > multimap_; }; }}} // of namespace statistics, yat, and theplu #endif