Ignore:
Timestamp:
Mar 18, 2007, 5:00:05 PM (15 years ago)
Author:
Peter
Message:

Modified ROC class to use AUC class in calculation of ROC area. Refs #101

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/statistics/ROC.h

    r779 r821  
    2525*/
    2626
     27#include <algorithm>
     28#include <map>
    2729#include <utility>
    28 #include <vector>
    2930
    3031namespace theplu {
     
    6162    virtual ~ROC(void);
    6263         
     64    /**
     65       Adding a data value to ROC.
     66    */
     67    void add(double value, bool target, double weight=1.0);
     68
     69    /**
     70       The area is defines as \f$ \frac{\sum w^+w^-} {\sum w^+w^-}\f$,
     71       where the sum in the numerator goes over all pairs where value+
     72       is larger than value-. The denominator goes over all pairs.
     73
     74       @return Area under curve.
     75    */
     76    double area(void);
     77
    6378    ///
    6479    /// minimum_size is the threshold for when a normal
     
    6984    u_int& minimum_size(void);
    7085
     86    /**
     87       minimum_size is the threshold for when a normal
     88       approximation is used for the p-value calculation.
     89       
     90       @return const reference to minimum_size
     91    */
     92    const u_int& minimum_size(void) const;
     93
    7194    ///
    72     /// @return number of samples
     95    /// @return sum of weights
    7396    ///
    7497    size_t n(void) const;
    7598
    7699    ///
    77     /// @return number of positive samples (Target.binary()==true)
     100    /// @return sum of weights with negative target
     101    ///
     102    size_t n_neg(void) const;
     103
     104    ///
     105    /// @return sum of weights with positive target
    78106    ///
    79107    size_t n_pos(void) const;
     
    88116    ///the second distribution by a non-zero amount. If the smallest
    89117    ///group size is larger than minimum_size (default = 10), then P
    90     ///is calculated using a normal approximation.  @return the
    91     ///one-sided p-value( if absolute true is used this is equivalent
    92     ///to the two-sided p-value.)
     118    ///is calculated using a normal approximation. 
     119    ///
     120    /// \note Weights should be either zero or unity, else present
     121    /// implementation is nonsense.
     122    ///
     123    /// @return One-sided p-value.
    93124    ///
    94     double p_value(void) const;
     125    double p_value_one_sided(void) const;
    95126   
    96     /// Function taking \a value, \a target (+1 or -1) and vector
    97     /// defining what samples to use. The score is equivalent to
    98     /// Mann-Whitney statistics.
    99     /// @return the area under the ROC curve. If the area is less
    100     /// than 0.5 and absolute=true, 1-area is returned. Complexity is
    101     /// \f$ N\log N \f$ where \f$ N \f$ is number of samples.
    102     ///
    103     double score(const classifier::Target& target,
    104                  const utility::vector& value);
    105    
    106     /**
    107         Function taking values, target, weight and a vector defining
    108         what samples to use. The area is defines as \f$ \frac{\sum
    109         w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
    110         over all pairs where value+ is larger than value-. The
    111         denominator goes over all pairs. If target is equal to 1,
    112         sample belonges to class + otherwise sample belongs to class
    113         -. @return wheighted version of area under the ROC curve. If
    114         the area is less than 0.5 and absolute=true, 1-area is
    115         returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
    116         of samples.
     127    /**
     128       @brief Two-sided p-value.
     129
     130       @return min(2*p_value_one_sided, 2-2*p_value_one_sided)
    117131    */
    118     double score(const classifier::Target& target,
    119                  const classifier::DataLookupWeighted1D& value);
     132    double p_value(void) const;
    120133
    121     /**
    122         Function taking values, target, weight and a vector defining
    123         what samples to use. The area is defines as \f$ \frac{\sum
    124         w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
    125         over all pairs where value+ is larger than value-. The
    126         denominator goes over all pairs. If target is equal to 1,
    127         sample belonges to class + otherwise sample belongs to class
    128         -. @return wheighted version of area under the ROC curve. If
    129         the area is less than 0.5 and absolute=true, 1-area is
    130         returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
    131         of samples.
     134    /**
     135       @brief Set everything to zero
    132136    */
    133     double score(const classifier::Target& target,
    134                  const utility::vector& value,
    135                  const utility::vector& weight);
    136 
    137     ///
    138     /// Function returning true if target is positive (binary()) for
    139     /// the sample with ith lowest data value, so i=0 corresponds to
    140     /// the sample with the lowest data value and i=n()-1 the sample
    141     /// with highest data value.
    142     ///
    143     bool target(const size_t i) const;
     137    void reset(void);
    144138
    145139  private:
    146140   
    147141    /// Implemented as in MatLab 13.1
    148     double get_p_approx(const double) const;
     142    double get_p_approx(double) const;
    149143
    150144    /// Implemented as in MatLab 13.1
     
    153147    double area_;
    154148    u_int minimum_size_;
    155     u_int nof_pos_;
    156     std::vector<std::pair<bool, double> > vec_pair_; // class-value-pair
    157     bool weighted_;
     149    double w_neg_;
     150    double w_pos_;
     151    // <data pair<class, weight> >
     152    std::multimap<double, std::pair<bool, double> > multimap_;
    158153  };
    159 
    160   ///
    161   /// The output operator for the ROC class. The output is an Nx2
    162   /// matrix, where the first column is the sensitivity and second
    163   /// is the specificity.
    164   ///
    165   std::ostream& operator<< (std::ostream& s, const ROC&);
    166154
    167155}}} // of namespace statistics, yat, and theplu
Note: See TracChangeset for help on using the changeset viewer.