Changeset 589


Ignore:
Timestamp:
Aug 24, 2006, 1:08:40 PM (15 years ago)
Author:
Peter
Message:

closes #79 and cleaned up code

Location:
trunk/c++_tools/statistics
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/c++_tools/statistics/AveragerWeighted.h

    r582 r589  
    2525  /// the more precise is is assumed to be, or more formally the
    2626  /// weight is proportional to the reciprocal variance
    27   /// \f$ \sigma_i^2 = \frac{\sigma^2}{w_i} \f$. 2) Probablity weights
     27  /// \f$ \sigma_i^2 = \frac{\sigma^2}{w_i} \f$. 2) Probability weights
    2828  /// are used for the situation when calculating averages over a
    2929  /// distribution \f$ f \f$ , but sampling from a distribution \f$ f'
     
    8585 
    8686    ///
     87    /// @brief Weighted version of number of data points. If all
     88    /// weights are equal, the unweighted version is identical to the
     89    /// non-weighted version. Adding a data point with zero weight
     90    /// does not change n(). The calculated value is always smaller
     91    /// than the actual number of data points added to object.
     92    ///
    8793    /// @return \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$
    8894    ///
  • trunk/c++_tools/statistics/ROC.h

    r509 r589  
    1616  /// Class for ROC (Reciever Operating Characteristic).
    1717  ///   
     18  /// As the area under an ROC curve is equivalent to Mann-Whitney U
     19  /// statistica, this class can be used to perform a Mann-Whitney
     20  /// U-test (aka Wilcoxon).
     21  ///
    1822  class ROC : public Score
    1923  {
  • trunk/c++_tools/statistics/tScore.cc

    r532 r589  
    3535    double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
    3636
    37     t_=diff/sqrt(s2*(1.0/positive.n()+1.0/negative.n()));
     37    t_=diff/sqrt(s2/positive.n()+s2/negative.n());
    3838    if (t_<0 && absolute_)
    3939      t_=-t_;
     
    5757    }
    5858    double diff = positive.mean() - negative.mean();
    59     dof_=positive.sum_w()*positive.sum_w()/positive.sum_ww() +
    60       negative.sum_w()*negative.sum_w()/negative.sum_ww();
     59    dof_=positive.n()+negative.n()-2;
    6160    double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
    62     t_=diff/sqrt(s2*(1.0/(positive.sum_w()*positive.sum_w()/positive.sum_ww())+
    63                      1.0/(negative.sum_w()*negative.sum_w()/negative.sum_ww())));
     61    t_=diff/sqrt(s2/positive.n()+s2/(negative.n()));
    6462    if (t_<0 && absolute_)
    6563      t_=-t_;
  • trunk/c++_tools/statistics/tScore.h

    r532 r589  
    1818  /// Class for Fisher's t-test.
    1919  ///   
     20  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
     21  /// http://en.wikipedia.org/wiki/Student's_t-test</a> for more
     22  /// details on the t-test.
     23  ///
    2024  class tScore : public Score
    2125  {
     
    3034    /// Calculates the value of t-score, i.e. the ratio between
    3135    /// difference in mean and standard deviation of this
    32     /// difference. \f$ \frac{ \vert \frac{1}{n_x}\sum x_i -
    33     /// \frac{1}{n_y}\sum y_i \vert } {\frac{\sum (x_i-m_x)^2 + \sum
    34     /// (y_i-m_y)^2}{n_x-1+n_y-1}} \f$
     36    /// difference. \f$ t = \frac{ m_x - m_y }
     37    /// {\frac{s^2}{n_x}+\frac{s^2}{n_y}} \f$ where \f$ m \f$ is the
     38    /// mean, \f$ n \f$ is the number of data points and \f$ s^2 =
     39    /// \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
     40    /// 2 }
    3541    ///
    36     /// @return t-score if absolute=true
    37     /// absolute value of t-score is returned
     42    /// @return t-score if absolute=true absolute value of t-score
     43    /// is returned
    3844    ///
    3945    double score(const classifier::Target& target,
     
    4147
    4248    ///
    43     /// Weighted version of t-Score @return t-score if absolute=true
    44     /// absolute value of t-score is returned.
     49    /// Calculates the weighted t-score, i.e. the ratio between
     50    /// difference in mean and standard deviation of this
     51    /// difference. \f$ t = \frac{ m_x - m_y } {
     52    /// \frac{s2}{n_x}+\frac{s2}{n_y} \f$ where \f$ m \f$ is the
     53    /// weighted mean, n is the weighted version of number of data
     54    /// points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
     55    /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
     56    /// + n_y - 2 } \f$. See AveragerWeighted for details.
    4557    ///
    46     /// @todo document
     58    /// @return t-score if absolute=true absolute value of t-score
     59    /// is returned
    4760    ///
    4861    double score(const classifier::Target& target,
     
    5164
    5265    ///
    53     ///Calculates the p-value, i.e. the probability of observing a
    54     ///t-score equally or larger if the null hypothesis is true. If P
    55     ///is near zero, this casts doubt on this hypothesis. The null
    56     ///hypothesis is ...  @return the one-sided p-value( if
    57     ///absolute=true is used the two-sided p-value)
     66    /// Calculates the p-value, i.e. the probability of observing a
     67    /// t-score equally or larger if the null hypothesis is true. If P
     68    /// is near zero, this casts doubt on this hypothesis. The null
     69    /// hypothesis is that the means of the two distributions are
     70    /// equal. Assumtions for this test is that the two distributions
     71    /// are normal distributions with equal variance. The latter
     72    /// assumtion is dropped in Welch's t-test.
     73    ///
     74    /// @return the one-sided p-value( if absolute=true is used
     75    /// the two-sided p-value)
    5876    ///
    5977    double p_value() const;
Note: See TracChangeset for help on using the changeset viewer.