Changeset 4036


Ignore:
Timestamp:
Jan 24, 2021, 2:02:57 AM (2 years ago)
Author:
Peter
Message:

closes #963. New class PercentileConfidenceInterval?

Location:
trunk
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/test/Makefile.am

    r4035 r4036  
    135135
    136136# tests not passing through yet
    137 XFAIL_TESTS = test/percentile_confidence_interval.test
     137XFAIL_TESTS =
    138138
    139139DISTRIBUTED_TESTS = \
  • trunk/test/percentile_confidence_interval.cc

    r4035 r4036  
    2929#include <yat/random/copy_k_of_n.h>
    3030
     31#include <gsl/gsl_cdf.h>
     32#include <gsl/gsl_randist.h>
     33
    3134#include <set>
    3235
     
    4043  for (size_t i=0; i<x.size(); ++i)
    4144    x[i] = i;
    42   double m = statistics::median(x.begin(), x.end());
    43   suite.out() << "true median: " << m << "\n";
     45  double p = 25;
     46  double q1 = statistics::percentile2(x.begin(), x.end(), p, true);
     47  suite.out() << "true 1st quartile: " << q1 << "\n";
     48
     49  size_t N = 100;
     50  double alpha = 0.05;
     51
     52  double left_tail = 0;
     53  double right_tail = 0;
     54  for (size_t k=0; k<=N; ++k) {
     55    double cdf = gsl_cdf_binomial_P(k, 0.01*p, N);
     56    suite.out() << k << "\t" << gsl_ran_binomial_pdf(k, 0.01*p, N)
     57                << "\t" << cdf << "\n";
     58    if (cdf <= alpha/2) {
     59      left_tail = cdf;
     60    }
     61    else if (cdf >= 1.0 - alpha/2) {
     62      right_tail = cdf;
     63      break;
     64    }
     65  }
     66  suite.out() << "left tail: " << left_tail << "\n";
     67  suite.out() << "right tail: " << 1.0-right_tail << "\n";
    4468
    4569  statistics::Averager lower;
    4670  statistics::Averager inside;
    4771  statistics::Averager upper;
    48   double alpha = 0.95;
     72  statistics::PercentileConfidenceInterval interval(p, 1.0-alpha);
    4973  for (size_t epoch=0; epoch<10000; ++epoch) {
    50     std::vector<double> y(100);
    51     random::copy_k_of_n(x.begin(), y.size(), x.size(), y.begin());
    52     statistics::PercentileConfidenceInterval
    53       ci(y.begin(), y.end(), 50.0, 100*alpha);
    54     if (m < ci.lower()) {
     74    std::vector<double> y(N);
     75    random::copy_k_of_n(x.begin(), N, x.size(), y.begin());
     76    interval(y.begin(), y.end(), true);
     77    if (q1 < interval.lower()) {
    5578      lower.add(1.0);
    5679      upper.add(0.0);
    5780      inside.add(0.0);
    5881    }
    59     else if (m > ci.upper()) {
     82    else if (q1 > interval.upper()) {
    6083      lower.add(0.0);
    6184      upper.add(1.0);
     
    7194              << 100*inside.mean() << "%\t"
    7295              << 100*upper.mean() << "%\n";
    73   double slack = 0.005;
    74   if (!suite.add(suite.equal_fix(lower.mean(), (1.0-alpha)/2, slack)))
     96  double slack = 0.002;
     97  if (!suite.add(suite.equal_fix(lower.mean(), left_tail, slack)))
    7598    suite.err() << "incorrect inside mean\n";
    76   if (!suite.add(suite.equal_fix(inside.mean(), alpha, 2*slack)))
     99  if (!suite.add(suite.equal_fix(inside.mean(), right_tail-left_tail, 2*slack)))
    77100    suite.err() << "incorrect inside mean\n";
    78   if (!suite.add(suite.equal_fix(upper.mean(), (1.0-alpha)/2, slack)))
     101  if (!suite.add(suite.equal_fix(upper.mean(), 1.0-right_tail, slack)))
    79102    suite.err() << "incorrect upper mean\n";
    80103  return suite.return_value();
  • trunk/yat/statistics/Makefile.am

    r4035 r4036  
    3636  yat/statistics/LikelihoodRatioTestBinomial.cc \
    3737  yat/statistics/Pearson.cc \
    38   yat/statistics/PearsonCorrelation.cc yat/statistics/Percentiler.cc \
     38  yat/statistics/PearsonCorrelation.cc \
     39  yat/statistics/PercentileConfidenceInterval.cc \
     40  yat/statistics/Percentiler.cc \
    3941  yat/statistics/ROC.cc \
    4042  yat/statistics/SAMScore.cc yat/statistics/Score.cc \
  • trunk/yat/statistics/PercentileConfidenceInterval.h

    r4035 r4036  
    2828#include <boost/iterator/iterator_concepts.hpp>
    2929
     30#include <boost/math/distributions/binomial.hpp>
     31
    3032#include <algorithm>
    3133#include <vector>
     
    3638
    3739  /**
     40     Class calculates the confidence interval. It follows the method
     41     suggested by Martin Bland in An Introduction to Medical
     42     Statistics and <a
     43     href=\"https://www-users.york.ac.uk/~mb55/intro/cicent.htm\">here</a>,
     44     but uses the exact binomial distribution instead of a
     45     large-sample normal approximation.
     46
     47     You need to load the class with data via the operator() before
     48     the interval is available via the lower(void) and upper(void
     49     functions.
     50
     51     A weighted input is currently not supported.
     52
    3853     \since New in yat 0.19
    3954   */
     
    4257  public:
    4358    /**
     59       \param k The kth percentile, for example, 50 for median
     60       \param level Confidence level, for example, 0.95 for 95%
     61       confidence interval
     62     */
     63    PercentileConfidenceInterval(double k, double level);
     64
     65
     66    /**
    4467       \param first First element in range
    4568       \param last One past element in range
    46        \param k The kth percentile, for example, 50 for median
    47        \param alpha Level of confidence, for example, 0.95 for 95%
    48        confidence interval
    4969       \param sorted if \c true the range is assumed to be sorted;
    5070       otherwise the range is copied, the copy sorted, before the
     
    5777     */
    5878    template<typename RandomAccessIterator>
    59     PercentileConfidenceInterval(RandomAccessIterator first,
    60                                  RandomAccessIterator last,
    61                                  double k, double alpha,
    62                                  bool sorted=false)
    63       : alpha_(alpha), k_(k)
     79    void operator()(RandomAccessIterator first, RandomAccessIterator last,
     80                    bool sorted=false)
    6481    {
    6582      typedef RandomAccessIterator T;
     
    6885      BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<T>));
    6986      BOOST_CONCEPT_ASSERT((boost::Convertible<value_type, double>));
    70 
     87      prepare(last - first);
    7188      process(first, last, sorted);
    7289    }
     
    7592       \return alpha, for example 95 for 95% confidence interval
    7693     */
    77     double alpha(void) const { return alpha_; }
     94    double alpha(void) const;
    7895
    7996    /**
    8097       \return lower confidence interval
    8198     */
    82     double lower(void) const { return lower_; }
     99    double lower(void) const;
    83100
    84101    /**
    85102       For example, 50, for median.
    86103     */
    87     double k(void) const { return k_; }
     104    double k(void) const;
    88105
    89106    /**
    90107       \return upper confidence interval
    91108     */
    92     double upper(void) const { return upper_; }
     109    double upper(void) const;
    93110
    94111  private:
    95112    double alpha_;
    96113    double k_;
     114    size_t n_;
     115    size_t lower_index_;
    97116    double lower_;
     117    size_t upper_index_;
    98118    double upper_;
     119
     120    void prepare(size_t n);
    99121
    100122    template<typename RandomAccessIterator>
     
    106128        std::sort(v.begin(), v.end());
    107129        process(v.begin(), v.end(), true);
     130        return;
    108131      }
    109 
    110       // number of observations smaller than the percentile is an
    111       // obervatioon from a Binomal distribution, Bin(n, p), where p =
    112       // 0.01*perc_.
    113 
    114       size_t n = last - first;
    115       size_t lower_idx = 0;
    116       size_t upper_idx = n - 1;
    117       lower_ = first[lower_idx];
    118       upper_ = first[upper_idx];
     132      lower_ = first[lower_index_];
     133      upper_ = first[upper_index_];
    119134    }
    120135
Note: See TracChangeset for help on using the changeset viewer.