Ignore:
Timestamp:
Jan 7, 2009, 11:53:05 PM (12 years ago)
Author:
Peter
Message:

class KolmogorovSmirnov?:
add (sorted) range (fixes #462)
allow copy and assignment
add a shuffle function
p_value calculation now scales linearly with number of data points and
not N*logN as before

configure.ac: added a test to see if std::multiset::iterator is mutable

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/statistics/KolmogorovSmirnov.h

    r1677 r1701  
    55
    66/*
    7   Copyright (C) 2008 Peter Johansson
     7  Copyright (C) 2008, 2009 Peter Johansson
    88
    99  This file is part of the yat library, http://dev.thep.lu.se/yat
     
    3636  {
    3737  public:
     38    /**
     39       struct used to store data in KolmogorovSmirnov.
     40       
     41       This struct is public to make usage of the add range function
     42       more convenient.
     43
     44       \since New in yat 0.5
     45     */
     46    struct Element
     47    {
     48      /**
     49         \brief default constructor
     50      */
     51      inline Element(void) {};
     52
     53      /**
     54         \brief Constructor
     55       */
     56      inline Element(double x, bool class_label, double w=1.0)
     57        : value(x), label(class_label), weight(w) {};
     58
     59      /**
     60         \brief data value
     61       */
     62      double value;
     63     
     64      /**
     65          bool telling which group the data point belong to
     66      */
     67      bool label;
     68
     69      /**
     70         weight for the data point
     71       */
     72      double weight;
     73
     74      /**
     75         \return true if value is less than rhs.value
     76       */
     77      bool operator<(const Element rhs) const;
     78    };
     79
    3880    /**
    3981       \brief Constructor
     
    4587     */
    4688    void add(double value, bool class_label, double weight=1.0);
     89
     90    /**
     91       \brief add a range
     92       
     93       value_type of \a ForwardIterator must be convertible to
     94       KolmogorovSmirnov::Element
     95       
     96       Insertion takes typically N*log(N). However, if range is
     97       sorted, insertion takes linear time. A common use case of this
     98       function is when calculating several KS statistics on a data
     99       set (values and weights) with different class labels.
     100
     101       \since New in yat 0.5
     102    */
     103    template <typename ForwardIterator>
     104    void add(ForwardIterator first, ForwardIterator last);
    47105
    48106    /**
     
    69127       and calculate how often a score equal or larger than score() is
    70128       obtained.
     129
     130       \see shuffle
    71131    */
    72132    double p_value(size_t perm) const;
     
    86146
    87147    /**
     148       \brief shuffle class labels
     149
     150       This is equivalent to reset and re-add values with shuffled
     151       class labels.
     152
     153       \since New in yat 0.5
     154     */
     155    void shuffle(void);
     156
     157    /**
    88158       Same as score() but keeping the sign, in other words,
    89159       abs(signed_score())==score()
     
    95165  private:
    96166    void scores(std::vector<double>&) const;
    97 
     167    // add weights to sum_w1 and sum_w2 respectively depending on
     168    // label in element.
     169    template <typename ForwardIterator>
     170    void add_sum_w(ForwardIterator first, ForwardIterator last);
     171   
    98172    mutable bool cached_;
    99173    mutable double score_;
    100     typedef std::pair<std::pair<double,double>, bool> trip;
    101     typedef std::multiset<trip> data_w;
     174    typedef std::multiset<Element> data_w;
    102175    data_w data_;
    103176    double sum_w1_;
     
    106179    friend std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
    107180
    108     // Copy not allowed
    109     KolmogorovSmirnov(const KolmogorovSmirnov&);
    110     KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);
     181    // using compiler generated copy and assignment
     182    //KolmogorovSmirnov(const KolmogorovSmirnov&);
     183    //KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);
    111184  };
    112185
     
    117190
    118191
     192  //  template implementations
     193
     194  template <typename ForwardIterator>
     195  void KolmogorovSmirnov::add(ForwardIterator first, ForwardIterator last)
     196  {
     197    ForwardIterator iter(first);
     198    typename data_w::const_iterator hint(data_.begin());
     199    for ( ; iter!=last; ++iter)
     200      if (iter->weight) // ignore data points with zero weight
     201        hint = data_.insert(hint, *iter);
     202    add_sum_w(first, last);
     203    cached_=false;
     204  }
     205
     206
     207  template <typename ForwardIterator>
     208  void KolmogorovSmirnov::add_sum_w(ForwardIterator first,
     209                                    ForwardIterator last)
     210  {
     211    while (first!=last) {
     212      if (first->label)
     213        sum_w1_ += first->weight;
     214      else
     215        sum_w2_ += first->weight;
     216      ++first;
     217    }
     218  }
     219
    119220}}} // of namespace theplu yat statistics
    120221
Note: See TracChangeset for help on using the changeset viewer.