Changeset 718 for trunk/yat/statistics


Ignore:
Timestamp:
Dec 26, 2006, 10:56:26 AM (15 years ago)
Author:
Jari Häkkinen
Message:

Addresses #170.

Location:
trunk/yat/statistics
Files:
17 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/statistics/Averager.cc

    r703 r718  
    5050  }
    5151
     52  double Averager::cv(void) const
     53  {
     54    return x_ ? std()/mean() : 0;
     55  }
     56
     57  double Averager::mean(void) const
     58  {
     59    return n_ ? x_/n_ : 0;
     60  }
     61
     62  u_long Averager::n(void) const
     63  {
     64    return n_;
     65  }
     66
    5267  void Averager::rescale(double a)
    5368  {
     
    6075    n_=0;
    6176    x_=xx_=0.0;
     77  }
     78
     79  double Averager::standard_error(void) const
     80  {
     81    return sqrt(variance()/n_);
     82  }
     83
     84  double Averager::std(void) const
     85  {
     86    return sqrt(variance());
     87  }
     88
     89  double Averager::std(double m) const
     90  {
     91    return sqrt(variance(m));
     92  }
     93
     94  double Averager::sum_x(void)  const
     95  {
     96    return x_;
     97  }
     98
     99  double Averager::sum_xx(void) const
     100  {
     101    return xx_;
     102  }
     103
     104  double Averager::sum_xx_centered(void)  const
     105  {
     106    return xx_-x_*x_/n_;
     107  }
     108
     109  double Averager::variance(double m) const
     110  {
     111    return n_ ? (xx_ - 2*m*x_ + m*m*n()) /n_ : 0;
     112  }
     113
     114  double Averager::variance(void) const
     115  {
     116    return n_>1 ? sum_xx_centered()/n_ : 0;
     117  }
     118
     119  double Averager::variance_unbiased(void) const
     120  {
     121    return (n_>1) ? sum_xx_centered()/(n_-1) : 0;
    62122  }
    63123
  • trunk/yat/statistics/Averager.h

    r704 r718  
    7272    ///
    7373    template <typename T>
    74     void  add_values(const T& v, u_long n=1);
     74    void add_values(const T& v, u_long n=1);
    7575
    7676    /**
     
    8282       @return standard deviation divided by mean.
    8383    */
    84     inline double cv(void) const { return x_ ? std()/mean() : 0; }
     84    double cv(void) const;
    8585
    8686    ///
    8787    /// @return Mean of presented data, \f$ \frac{1}{n}\sum x_i \f$
    8888    ///
    89     inline double mean(void) const { return n_ ? x_/n_ : 0; }
     89    double mean(void) const;
    9090
    9191    ///
    9292    /// @return Number of data points
    9393    ///
    94     inline u_long n(void) const { return n_; }
     94    u_long n(void) const;
    9595
    9696    ///
     
    106106    /// \f$ \sqrt{variance()/n} \f$
    107107    ///
    108     inline double standard_error(void) const { return sqrt(variance()/n_); }
     108    double standard_error(void) const;
    109109
    110110    ///
     
    114114    /// @return The standard deviation, root of the variance().
    115115    ///
    116     inline double std(void) const { return sqrt(variance()); }
     116    double std(void) const;
    117117
    118118    ///
     
    122122    /// @return Standard deviation around \a m, root of the variance(m).
    123123    ///
    124     inline double std(double m) const { return sqrt(variance(m)); }
     124    double std(double m) const;
    125125
    126126    ///
    127127    /// @return The sum of x
    128128    ///
    129     inline double sum_x(void) const { return x_; }
     129    double sum_x(void)  const;
    130130
    131131    ///
    132132    /// @return The sum of squares
    133133    ///
    134     inline double sum_xx(void) const { return xx_; }
     134    double sum_xx(void) const;
    135135
    136136    ///
    137137    /// @return \f$ \sum_i (x_i-m)^2 \f$
    138138    ///
    139     inline double sum_xx_centered(void) const { return xx_-x_*x_/n_; }
     139    double sum_xx_centered(void)  const;
    140140
    141141    ///
     
    147147    /// @return Variance when the mean is known to be \a m.
    148148    ///
    149     inline double variance(double m) const
    150     { return n_ ? (xx_ - 2*m*x_ + m*m*n()) /n_ : 0; }
     149    double variance(double m) const;
    151150
    152151    ///
     
    158157    /// @return Estimation of variance
    159158    ///
    160     inline double variance(void) const
    161     { return n_>1 ? sum_xx_centered()/n_ : 0; }
     159    double variance(void) const;
    162160
    163161    ///
     
    169167    /// @return unbiased estimation of variance
    170168    ///
    171     inline double variance_unbiased(void) const
    172     { return (n_>1) ? sum_xx_centered()/(n_-1) : 0; }
     169    double variance_unbiased(void) const;
    173170
    174171    ///
  • trunk/yat/statistics/AveragerPair.cc

    r703 r718  
    4747  }
    4848
     49  double AveragerPair::ccc(void) const
     50  {
     51    return ( (x_.variance() && y_.variance() && (x_.mean()-y_.mean()) ) ?
     52             ((2*covariance()) /
     53              ((x_.variance()+y_.variance()) +
     54               (x_.mean()-y_.mean())*(x_.mean()-y_.mean()))) : 0);
     55  }
     56
     57  double AveragerPair::correlation(void) const
     58  { return ((x_.std()>0 && y_.std()>0) ?
     59            (covariance() / (x_.std()*y_.std()) ) : 0);
     60  }
     61
     62  double AveragerPair::covariance(void) const
     63  {
     64    return (n()>1) ? (xy_ - x_.sum_x()*y_.mean()) / n(): 0;
     65  }
     66
     67  double AveragerPair::mean_xy(void) const
     68  {
     69    return xy_/n();
     70  }
     71
     72  double AveragerPair::msd(void) const
     73  {
     74    return ( x_averager().sum_xx()+y_averager().sum_xx()-2*sum_xy() )/n();
     75  }
     76
     77  unsigned long AveragerPair::n(void) const
     78  {
     79    return x_.n();
     80  }
     81
    4982  void AveragerPair::reset(void)
    5083  {
     
    5891  }
    5992
     93  double AveragerPair::sum_xy(void) const
     94  {
     95    return xy_;
     96  }
     97
     98  double AveragerPair::sum_xy_centered(void) const
     99  {
     100    return xy_-x_.sum_x()*y_.mean();
     101  }
     102
     103  const Averager& AveragerPair::x_averager(void) const
     104  {
     105    return x_;
     106  }
     107
     108  const Averager& AveragerPair::y_averager(void) const
     109  {
     110    return y_;
     111  }
     112
    60113  const AveragerPair& AveragerPair::operator+=(const AveragerPair& a)
    61114  {
  • trunk/yat/statistics/AveragerPair.h

    r705 r718  
    8585    /// @return Concordence correlation coefficient.
    8686    ///
    87     inline double ccc(void) const
    88     { return ( (x_.variance() && y_.variance() && (x_.mean()-y_.mean()) ) ?
    89                  ((2*covariance()) /
    90                   ((x_.variance()+y_.variance()) +
    91                    (x_.mean()-y_.mean())*(x_.mean()-y_.mean()))) : 0); }
    92  
     87    double ccc(void) const;
     88
    9389    ///
    9490    /// \f$ \frac{\sum_i (x_i-m_x)(y_i-m_y)}{\sqrt{\sum_i
     
    9793    /// @return Pearson correlation coefficient.
    9894    ///
    99     inline double correlation(void) const
    100       { return ((x_.std()>0 && y_.std()>0) ?
    101                 (covariance() / (x_.std()*y_.std()) ) : 0); }
     95    double correlation(void) const;
    10296 
    10397    ///
     
    108102    /// @return The covariance.
    109103    ///
    110     inline double covariance(void) const
    111       { return (n()>1) ? (xy_ - x_.sum_x()*y_.mean()) / n(): 0; }
     104    double covariance(void) const;
    112105 
    113106    ///
    114107    /// @return The mean of xy.
    115108    ///
    116     inline double mean_xy(void) const { return xy_/n(); }
     109    double mean_xy(void) const;
    117110
    118111    ///
     
    120113    /// \frac{1}{N} \sum (x-y)^2 \f$
    121114    ///
    122     inline double msd() const
    123     {return ( x_averager().sum_xx()+y_averager().sum_xx()-2*sum_xy() )/n();}
     115    double msd(void) const;
    124116
    125117    ///
    126118    /// @return The number of pair of data points.
    127119    ///
    128     inline unsigned long n(void) const { return x_.n(); }
     120    unsigned long n(void) const;
    129121
    130122    ///
     
    136128    /// @return The sum of xy.
    137129    ///
    138     inline double sum_xy(void) const { return xy_; }
     130    double sum_xy(void) const;
    139131
    140132    ///
    141133    /// @return \f$ \sum_i (x_i-m_x)(y_i-m_y) \f$
    142134    ///
    143     inline double sum_xy_centered(void) const {return xy_-x_.sum_x()*y_.mean();}
     135    double sum_xy_centered(void) const;
    144136
    145137    ///
    146138    /// @return A const refencer to the averager object for x.
    147139    ///
    148     inline const Averager& x_averager(void) const { return x_; }
     140    const Averager& x_averager(void) const;
    149141
    150142    ///
    151143    /// @return A const reference to the averager object for y
    152144    ///
    153     inline const Averager& y_averager(void) const { return y_; }
     145    const Averager& y_averager(void) const;
    154146
    155147    ///
  • trunk/yat/statistics/AveragerPairWeighted.cc

    r703 r718  
    6363
    6464  void AveragerPairWeighted::add(const classifier::DataLookupWeighted1D& x,
     65                                 const classifier::DataLookup1D& y)
     66  {
     67    add(y,x);
     68  }
     69
     70
     71  void AveragerPairWeighted::add(const classifier::DataLookupWeighted1D& x,
    6572                                 const classifier::DataLookupWeighted1D& y)
    6673  {
     
    7178
    7279
     80  double AveragerPairWeighted::correlation(void) const
     81  {
     82    return covariance() / ( x_.std()*y_.std() );
     83  }
     84
     85
     86  double AveragerPairWeighted::covariance(void) const
     87  {
     88    return sum_xy_centered()/sum_w();
     89  }
     90
     91
    7392  void AveragerPairWeighted::reset(void)
    7493  {
     
    7695  }
    7796
     97  double AveragerPairWeighted::sum_w(void) const
     98  {
     99    return w_;
     100  }
     101
     102
     103  double AveragerPairWeighted::sum_xy(void) const
     104  {
     105    return wxy_;
     106  }
     107
     108
     109  double AveragerPairWeighted::sum_xy_centered(void) const
     110  {
     111    return sum_xy() - x_.sum_wx()*y_.mean();
     112  }
     113
     114
     115  const AveragerWeighted& AveragerPairWeighted::x_averager(void) const
     116  {
     117    return x_;
     118  }
     119
     120
     121  const AveragerWeighted& AveragerPairWeighted::y_averager(void) const
     122  {
     123    return y_;
     124  }
     125
    78126}}} // of namespace statistics, yat, and theplu
  • trunk/yat/statistics/AveragerPairWeighted.h

    r703 r718  
    8181    /// @a y will be treated as having all weights equal to unity
    8282    ///
    83     inline void add(const classifier::DataLookupWeighted1D& x,
    84                     const classifier::DataLookup1D& y){ add(y,x); }
     83    void add(const classifier::DataLookupWeighted1D& x,
     84             const classifier::DataLookup1D& y);
    8585
    8686    ///
     
    112112    /// calculated as \f$ m_x = \frac {\sum w_xw_yx}{\sum w} \f$
    113113    ///
    114     inline double correlation(void) const
    115     { return covariance() / ( x_.std()*y_.std() ); }
     114    double correlation(void) const;
    116115 
    117116    ///
     
    119118    /// is calculated as \f$ m_x = \frac {\sum w_xw_yx}{\sum w} \f$
    120119    ///
    121     inline double covariance(void) const { return sum_xy_centered()/sum_w(); }
     120    double covariance(void) const;
    122121
    123122    ///
     
    129128    /// @return \f$ \sum w_xw_y \f$
    130129    ///
    131     inline double sum_w(void) const { return w_; }
     130    double sum_w(void) const;
    132131
    133132    ///
    134133    /// @return \f$ \sum w_xw_yxy \f$
    135134    ///
    136     inline double sum_xy(void) const { return wxy_; }
     135    double sum_xy(void) const;
    137136
    138137    ///
     
    140139    /// \f$ m_x = \frac {\sum w_xw_yx}{\sum w} \f$
    141140    ///
    142     inline double sum_xy_centered(void) const
    143     { return sum_xy() - x_.sum_wx()*y_.mean(); }
     141    double sum_xy_centered(void) const;
    144142
    145143    ///
     
    148146    /// @return AveragerWeighted for x
    149147    ///
    150     inline const AveragerWeighted& x_averager(void) const { return x_; }
     148    const AveragerWeighted& x_averager(void) const;
    151149
    152150    ///
     
    155153    /// @return AveragerWeighted for y
    156154    ///
    157     inline const AveragerWeighted& y_averager(void) const { return y_; }
     155    const AveragerWeighted& y_averager(void) const;
    158156
    159157  private:
  • trunk/yat/statistics/AveragerWeighted.cc

    r703 r718  
    2323
    2424#include "AveragerWeighted.h"
     25#include "Averager.h"
    2526
    2627namespace theplu {
     
    4849  }
    4950
     51  double AveragerWeighted::mean(void) const
     52  {
     53    return sum_w() ? sum_wx()/sum_w() : 0;
     54  }
     55
     56  double AveragerWeighted::n(void) const
     57  {
     58    return sum_w()*sum_w()/sum_ww();
     59  }
     60
    5061  void AveragerWeighted::rescale(double a)
    5162  {
     
    6071  }
    6172
     73  double AveragerWeighted::std(void) const
     74  {
     75    return sqrt(variance());
     76  }
     77
     78  double AveragerWeighted::standard_error(void) const
     79  {
     80    return sqrt(sum_ww()/(sum_w()*sum_w()*sum_w()) * sum_xx_centered());
     81  }
     82
     83  double AveragerWeighted::sum_w(void)  const
     84  {
     85    return w_.sum_x();
     86  }
     87
     88  double AveragerWeighted::sum_ww(void) const
     89  {
     90    return w_.sum_xx();
     91  }
     92
     93  double AveragerWeighted::sum_wwx(void) const
     94  {
     95    return wwx_;
     96  }
     97
     98  double AveragerWeighted::sum_wwxx(void) const
     99  {
     100    return wx_.sum_xx();
     101  }
     102
     103  double AveragerWeighted::sum_wx(void) const
     104  {
     105    return wx_.sum_x();
     106  }
     107
     108  double AveragerWeighted::sum_wxx(void) const
     109  {
     110    return wxx_;
     111  }
     112
     113  double AveragerWeighted::sum_xx_centered(void) const
     114  {
     115    return sum_wxx() - mean()*mean()*sum_w();
     116  }
     117
     118  double AveragerWeighted::variance(const double m) const
     119  {
     120    return (sum_wxx()-2*m*sum_wx())/sum_w()+m*m;
     121  }
     122
     123  double AveragerWeighted::variance(void) const
     124  {
     125    return sum_xx_centered()/sum_w();
     126  }
     127
     128  const Averager& AveragerWeighted::wx(void) const
     129  {
     130    return wx_;
     131  }
     132
     133  const Averager& AveragerWeighted::w(void) const
     134  {
     135    return w_;
     136  }
     137
    62138  const AveragerWeighted& AveragerWeighted::operator+=(const AveragerWeighted& a)
    63139  {
  • trunk/yat/statistics/AveragerWeighted.h

    r703 r718  
    9595    /// @return \f$ \frac{\sum w_ix_i}{\sum w_i} \f$
    9696    ///
    97     inline double mean(void) const { return sum_w() ? sum_wx()/sum_w() : 0; }
     97    double mean(void) const;
    9898
    9999    ///
     
    108108    /// @return \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$
    109109    ///
    110     inline double n(void) const { return sum_w()*sum_w()/sum_ww(); }
     110    double n(void) const;
    111111
    112112    ///
     
    128128    /// @return The standard deviation, root of the variance().
    129129    ///
    130     inline double std(void) const { return sqrt(variance()); }
     130    double std(void) const;
    131131
    132132    ///
     
    141141    /// where \f$ m \f$ is the mean()
    142142    ///
    143     inline double standard_error(void)  const
    144     { return sqrt(sum_ww()/(sum_w()*sum_w()*sum_w()) *
    145                   sum_xx_centered()); }
     143    double standard_error(void) const;
    146144
    147145    ///
     
    150148    /// @return \f$ \sum w_i \f$
    151149    ///
    152     inline double sum_w(void) const
    153     { return w_.sum_x(); }
     150    double sum_w(void) const;
    154151
    155152    ///
    156153    /// @return \f$ \sum w_i^2 \f$
    157154    ///
    158     inline double sum_ww(void)  const
    159     { return w_.sum_xx(); }
     155    double sum_ww(void) const;
    160156
    161157    ///
     
    164160    /// @return weighted sum of x
    165161    ///
    166     inline double sum_wx(void)  const
    167     { return wx_.sum_x(); }
     162    double sum_wx(void) const;
    168163
    169164    ///
    170165    /// @return \f$ \sum_i w_i (x_i-m)^2\f$
    171166    ///
    172     inline double sum_xx_centered(void) const
    173     { return sum_wxx() - mean()*mean()*sum_w(); }
     167    double sum_xx_centered(void) const;
    174168
    175169    /**
     
    179173       @return Variance when the mean is known to be \a m.
    180174    */
    181     inline double variance(const double m) const
    182     { return (sum_wxx()-2*m*sum_wx())/sum_w()+m*m; }
     175    double variance(const double m) const;
    183176
    184177    /**
     
    191184       @return The variance.
    192185    */
    193     inline double variance(void) const
    194     { return sum_xx_centered()/sum_w(); }
     186    double variance(void) const;
    195187
    196188
    197189  private:
    198190    ///
     191    ///  @return \f$ \sum w_i^2x_i \f$
     192    ///
     193    double sum_wwx(void) const;
     194
     195    ///
    199196    ///  @return \f$ \sum w_i^2x_i^2 \f$
    200197    ///
    201     inline double sum_wwxx(void)  const
    202     { return wx_.sum_xx(); }
     198    double sum_wwxx(void) const;
    203199   
    204200    ///
    205     ///  @return \f$ \sum w_i^2x_i \f$
    206     ///
    207     inline double sum_wwx(void) const
    208     { return wwx_; }
    209 
    210     ///
    211201    ///  @return \f$ \sum w_i x_i^2 \f$
    212202    ///
    213     inline double sum_wxx(void) const { return wxx_; }
     203    double sum_wxx(void) const;
     204
     205    const Averager& wx(void) const;
     206    const Averager& w(void) const;
    214207
    215208    ///
     
    222215    double wwx_;
    223216    double wxx_;
    224    
    225     inline Averager wx(void) const { return wx_; }
    226     inline Averager w(void) const { return w_; }
    227217  };
    228218
  • trunk/yat/statistics/Fisher.cc

    r683 r718  
    5959    d =((c_+d_)*(b_+d_)) / N;
    6060  }
     61
     62
     63  u_int& Fisher::minimum_size(void)
     64  {
     65    return minimum_size_;
     66  }
     67
    6168
    6269  double Fisher::oddsratio(const double a,
     
    133140
    134141
    135     double Fisher::score(const classifier::Target& target,
    136                         const utility::vector& value)
     142  double Fisher::score(const classifier::Target& target,
     143                      const utility::vector& value)
    137144  {
    138145    weighted_=false;
     
    186193  }
    187194 
    188     double Fisher::score(const classifier::Target& target,
    189                         const utility::vector& value,
    190                         const utility::vector& weight)
     195  double Fisher::score(const classifier::Target& target,
     196                      const utility::vector& value,
     197                      const utility::vector& weight)
    191198  {
    192199    weighted_=true;
     
    224231  }
    225232
     233
     234  double& Fisher::value_cutoff(void)
     235  {
     236    return value_cutoff_;
     237  }
     238
    226239}}} // of namespace statistics, yat, and theplu
  • trunk/yat/statistics/Fisher.h

    r683 r718  
    8686
    8787    ///
    88     /// Cutoff sets the limit whether a value should go into the left
    89     /// or the right row. @see score
    90     ///
    91     /// @return reference to cutoff for row
    92     ///
    93     inline double& value_cutoff(void) { return value_cutoff_; }
    94 
    95     ///
    9688    /// Calculates the expected values under the null hypothesis.
    9789    /// a' = (a+c)(a+b)/(a+b+c+d)
     
    10597    /// @return reference to minimum_size
    10698    ///
    107     inline u_int& minimum_size(void){ return minimum_size_; } 
     99    u_int& minimum_size(void);
    108100
    109101    ///
     
    166158                 const u_int c, const u_int d);
    167159
    168          
     160    ///
     161    /// Cutoff sets the limit whether a value should go into the left
     162    /// or the right row. @see score
     163    ///
     164    /// @return reference to cutoff for row
     165    ///
     166    double& value_cutoff(void);
     167
    169168  private:
    170169    double oddsratio(const double a, const double b,
  • trunk/yat/statistics/Histogram.cc

    r680 r718  
    3232
    3333
    34 Histogram::Histogram(void)
    35   : xmax_(0), xmin_(0), sum_all_(), sum_histogram_()
    36 {
    37 }
     34  Histogram::Histogram(void)
     35    : xmax_(0), xmin_(0), sum_all_(), sum_histogram_()
     36  {
     37  }
    3838
    3939
    40 
    41 Histogram::Histogram(const Histogram& b)
    42 {
    43   *this=b;
    44 }
     40  Histogram::Histogram(const Histogram& b)
     41  {
     42    *this=b;
     43  }
    4544
    4645
    47 
    48 Histogram::Histogram(const double min, const double max, const size_t n)
    49   : histogram_(std::vector<double>(n,0.0)),
    50     xmax_(max), xmin_(min),
    51     sum_all_(), sum_histogram_()
    52 {
    53 }
     46  Histogram::Histogram(const double min, const double max, const size_t n)
     47    : histogram_(std::vector<double>(n,0.0)),
     48      xmax_(max), xmin_(min),
     49      sum_all_(), sum_histogram_()
     50  {
     51  }
    5452
    5553
    56 
    57 Histogram::~Histogram(void)
    58 {
    59 }
     54  Histogram::~Histogram(void)
     55  {
     56  }
    6057
    6158
     59  int Histogram::add(const double x, const double w)
     60  {
     61    sum_all_.add(x,w);
     62    if (x<xmin_)
     63      return -1;
     64    else if (x>=xmax_)
     65      return 1;
    6266
    63 int Histogram::add(const double x, const double w)
    64 {
    65   sum_all_.add(x,w);
    66   if (x<xmin_)
    67     return -1;
    68   else if (x>=xmax_)
    69     return 1;
    70  
    71   sum_histogram_.add(x,w);
    72   histogram_[bin(x)] += w;
    73   return 0;
    74 }
     67    sum_histogram_.add(x,w);
     68    histogram_[bin(x)] += w;
     69    return 0;
     70  }
    7571
    7672
    77 
    78 void Histogram::normalize(bool choice)
    79 {
    80   double scale_factor;
    81   if (choice)
    82     scale_factor = sum_all_.sum_w();
    83   else
    84     scale_factor = sum_all_.sum_w()*spacing();
    85   for (size_t i=0; i<histogram_.size(); i++)
    86     histogram_[i]/=scale_factor;
    87 
    88 }
     73  const statistics::AveragerWeighted& Histogram::averager_all(void) const
     74  {
     75    return sum_all_;
     76  }
    8977
    9078
    91 void Histogram::reset(void)
    92 {
    93   for (u_int i=0; i<histogram_.size(); i++)
    94     histogram_[i]=0;
    95   sum_all_.reset();
    96   sum_histogram_.reset();
    97 }
     79  const statistics::AveragerWeighted& Histogram::averager_histogram(void) const
     80  {
     81    return sum_histogram_;
     82  }
    9883
    9984
    100 
    101 const Histogram& Histogram::operator=(const Histogram& b)
    102 {
    103   if (this==&b)
    104     return *this;
    105   histogram_=b.histogram_;
    106   xmax_=b.xmax_;
    107   xmin_=b.xmin_;
    108   sum_all_=b.sum_all_;
    109   sum_histogram_=b.sum_histogram_;
    110   return *this;
    111 }
     85  size_t Histogram::bin(double d)
     86  {
     87    return (((d<xmin_) || (d>xmax_)) ? 0 :
     88            static_cast<size_t>(floor((d-xmin_)/spacing() )));
     89  }
    11290
    11391
     92  size_t Histogram::nof_bins(void) const
     93  {
     94    return histogram_.size();
     95  }
    11496
    115 std::ostream& operator<<(std::ostream& s,const Histogram& histogram)
    116 {
    117   s << "# histogram min : " << histogram.xmin() << '\n';
    118   s << "# histogram max : " << histogram.xmax() << '\n';
    119   s << "# number of bins: " << histogram.nof_bins() << '\n';
    120   s << "# nof points in histogram : "
    121     << histogram.averager_histogram().sum_w() << '\n';
    122   s << "# nof points in total:      "
    123     << histogram.averager_all().sum_w() << '\n';
    124   s << "# column 1: center of observation bin\n"
    125     << "# column 2: frequency\n";
    12697
    127   for (u_int i=0; i<histogram.nof_bins(); i++) {
    128     s.width(12);
    129     s << histogram.observation_value(i);
    130     s.width(12);
    131     s << histogram[i] << '\n';
    132   }
     98  void Histogram::normalize(bool choice)
     99  {
     100    double scale_factor;
     101    if (choice)
     102      scale_factor = sum_all_.sum_w();
     103    else
     104      scale_factor = sum_all_.sum_w()*spacing();
     105    for (size_t i=0; i<histogram_.size(); i++)
     106      histogram_[i]/=scale_factor;
     107  }
    133108
    134   return s;
    135 }
     109
     110  double Histogram::observation_value(const size_t k) const
     111  {
     112    return xmin_+spacing()*(k+0.5);
     113  }
     114
     115
     116  void Histogram::reset(void)
     117  {
     118    for (u_int i=0; i<histogram_.size(); i++)
     119      histogram_[i]=0;
     120    sum_all_.reset();
     121    sum_histogram_.reset();
     122  }
     123
     124
     125  double Histogram::spacing(void) const
     126  {
     127    return (xmax_-xmin_)/nof_bins();
     128  }
     129
     130
     131  double Histogram::xmax(void) const
     132  {
     133    return xmax_;
     134  }
     135
     136
     137  double Histogram::xmin(void) const
     138  {
     139    return xmin_;
     140  }
     141
     142
     143  double Histogram::operator[](size_t k) const
     144  {
     145    return histogram_[k];
     146  }
     147
     148
     149  const Histogram& Histogram::operator=(const Histogram& b)
     150  {
     151    if (this==&b)
     152      return *this;
     153    histogram_=b.histogram_;
     154    xmax_=b.xmax_;
     155    xmin_=b.xmin_;
     156    sum_all_=b.sum_all_;
     157    sum_histogram_=b.sum_histogram_;
     158    return *this;
     159  }
     160
     161
     162  std::ostream& operator<<(std::ostream& s,const Histogram& histogram)
     163  {
     164    s << "# histogram min : " << histogram.xmin() << '\n';
     165    s << "# histogram max : " << histogram.xmax() << '\n';
     166    s << "# number of bins: " << histogram.nof_bins() << '\n';
     167    s << "# nof points in histogram : "
     168      << histogram.averager_histogram().sum_w() << '\n';
     169    s << "# nof points in total:      "
     170      << histogram.averager_all().sum_w() << '\n';
     171    s << "# column 1: center of observation bin\n"
     172      << "# column 2: frequency\n";
     173
     174    for (u_int i=0; i<histogram.nof_bins(); i++) {
     175      s.width(12);
     176      s << histogram.observation_value(i);
     177      s.width(12);
     178      s << histogram[i] << '\n';
     179    }
     180
     181    return s;
     182  }
    136183
    137184}}} // of namespace statistics, yat, and theplu
  • trunk/yat/statistics/Histogram.h

    r683 r718  
    8686    /// @return A const reference to an AveragerWeighted object.
    8787    ///
    88     inline const statistics::AveragerWeighted& averager_all(void) const
    89       { return sum_all_; }
     88    const statistics::AveragerWeighted& averager_all(void) const;
    9089
    9190    ///
     
    9897    /// @return A const reference to an AveragerWeighted object.
    9998    ///
    100     inline const statistics::AveragerWeighted& averager_histogram(void) const
    101       { return sum_histogram_; }
     99    const statistics::AveragerWeighted& averager_histogram(void) const;
    102100
    103101    ///
    104102    /// @return The number of bins in the histogram
    105103    ///
    106     inline size_t nof_bins(void) const { return histogram_.size(); }
     104    size_t nof_bins(void) const;
    107105
    108106    ///
     
    140138    /// histogram.
    141139    ///
    142     inline double observation_value(const size_t k) const
    143       { return xmin_+spacing()*(k+0.5); }
     140    double observation_value(const size_t k) const;
    144141
    145142    ///
     
    152149    /// @return The width of the bins in the histogram.
    153150    ///
    154     inline double spacing(void) const { return (xmax_-xmin_)/nof_bins(); }
     151    double spacing(void) const;
    155152
    156153    ///
     
    159156    /// @note The upper boundary value is outside the histogram.
    160157    ///
    161     inline double xmax(void) const { return xmax_; }
     158    double xmax(void) const;
    162159
    163160    ///
     
    166163    /// @note The lower boundary value is inside the histogram.
    167164    ///
    168     inline double xmin(void) const { return xmin_; }
     165    double xmin(void) const;
    169166
    170167    ///
    171168    /// @return The count of bin \a k in the histogram.
    172169    ///
    173     inline double operator[](size_t k) const { return histogram_[k]; }
     170    double operator[](size_t k) const;
    174171
    175172    ///
     
    180177  private:
    181178    // Returns zero if outside boundaries
    182     inline size_t bin(double d)
    183     { return (((d<xmin_) || (d>xmax_)) ? 0 :
    184               static_cast<size_t>(floor((d-xmin_)/spacing() ))); }
     179    size_t bin(double d);
    185180
    186181    std::vector<double> histogram_;
  • trunk/yat/statistics/ROC.cc

    r703 r718  
    7777    }
    7878    return p;
     79  }
     80
     81  u_int& ROC::minimum_size(void)
     82  {
     83    return minimum_size_;
     84  }
     85
     86  size_t ROC::n(void) const
     87  {
     88    return vec_pair_.size();
     89  }
     90
     91  size_t ROC::n_pos(void) const
     92  {
     93    return nof_pos_;
    7994  }
    8095
  • trunk/yat/statistics/ROC.h

    r703 r718  
    5959    virtual ~ROC(void);
    6060         
     61    ///
     62    /// minimum_size is the threshold for when a normal
     63    /// approximation is used for the p-value calculation.
     64    ///
     65    /// @return reference to minimum_size
     66    ///
     67    u_int& minimum_size(void);
     68
     69    ///
     70    /// @return number of samples
     71    ///
     72    size_t n(void) const;
     73
     74    ///
     75    /// @return number of positive samples (Target.binary()==true)
     76    ///
     77    size_t n_pos(void) const;
     78
     79    ///
     80    ///Calculates the p-value, i.e. the probability of observing an
     81    ///area equally or larger if the null hypothesis is true. If P is
     82    ///near zero, this casts doubt on this hypothesis. The null
     83    ///hypothesis is that the values from the 2 classes are generated
     84    ///from 2 identical distributions. The alternative is that the
     85    ///median of the first distribution is shifted from the median of
     86    ///the second distribution by a non-zero amount. If the smallest
     87    ///group size is larger than minimum_size (default = 10), then P
     88    ///is calculated using a normal approximation.  @return the
     89    ///one-sided p-value( if absolute true is used this is equivalent
     90    ///to the two-sided p-value.)
     91    ///
     92    double p_value(void) const;
     93   
    6194    /// Function taking \a value, \a target (+1 or -1) and vector
    6295    /// defining what samples to use. The score is equivalent to
     
    83116    double score(const classifier::Target& target,
    84117                 const classifier::DataLookupWeighted1D& value);
    85        
    86118
    87119    /**
     
    100132                 const utility::vector& value,
    101133                 const utility::vector& weight);
    102        
    103 
    104     ///
    105     ///Calculates the p-value, i.e. the probability of observing an
    106     ///area equally or larger if the null hypothesis is true. If P is
    107     ///near zero, this casts doubt on this hypothesis. The null
    108     ///hypothesis is that the values from the 2 classes are generated
    109     ///from 2 identical distributions. The alternative is that the
    110     ///median of the first distribution is shifted from the median of
    111     ///the second distribution by a non-zero amount. If the smallest
    112     ///group size is larger than minimum_size (default = 10), then P
    113     ///is calculated using a normal approximation.  @return the
    114     ///one-sided p-value( if absolute true is used this is equivalent
    115     ///to the two-sided p-value.)
    116     ///
    117     double p_value(void) const;
    118    
    119     ///
    120     /// minimum_size is the threshold for when a normal
    121     /// approximation is used for the p-value calculation.
    122     ///
    123     /// @return reference to minimum_size
    124     ///
    125     inline u_int& minimum_size(void){ return minimum_size_; } 
    126134
    127135    ///
     
    132140    ///
    133141    bool target(const size_t i) const;
    134 
    135     ///
    136     /// @return number of samples
    137     ///
    138     inline size_t n(void) const { return vec_pair_.size(); }
    139 
    140     ///
    141     /// @return number of positive samples (Target.binary()==true)
    142     ///
    143     inline size_t n_pos(void) const { return nof_pos_; }
    144142
    145143  private:
  • trunk/yat/statistics/Score.cc

    r703 r718  
    4646  }
    4747
     48  void Score::absolute(bool absolute)
     49  {
     50    absolute_=absolute;
     51  }
     52
    4853  double Score::score(const classifier::Target& target,
    4954                      const classifier::DataLookup1D& value,
     
    5762  }
    5863
     64  bool Score::weighted(void) const
     65  {
     66    return weighted_;
     67  }
     68
    5969}}} // of namespace statistics, yat, and theplu
  • trunk/yat/statistics/Score.h

    r703 r718  
    6464    /// @brief Function changing mode of Score
    6565    ///
    66     inline void absolute(bool absolute) {absolute_=absolute;}
     66    void absolute(bool absolute);
    6767
    6868    ///
     
    126126  protected:
    127127    /// return true if method is weighted
    128     inline bool weighted(void) const { return weighted_; }
     128    bool weighted(void) const;
    129129
    130130    /// true if method is absolute, which means if score is below
  • trunk/yat/statistics/utility.h

    r703 r718  
    104104  ///
    105105  template <class T>
    106   inline double median(const std::vector<T>& v, const bool sorted=false)
     106  double median(const std::vector<T>& v, const bool sorted=false)
    107107  { return percentile(v, 50.0, sorted); }
    108108
Note: See TracChangeset for help on using the changeset viewer.