Changeset 179


Ignore:
Timestamp:
Oct 4, 2004, 5:00:48 PM (18 years ago)
Author:
Peter
Message:

modified all the scores to be one-sided OR two-sided

Location:
trunk/src
Files:
1 added
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/Pearson.cc

    r149 r179  
    6363    r_ = a.correlation();
    6464    weighted_=false;
    65     r_=abs(r_);
     65    if (r<0 && absolute_)
     66      r_=-r_;
     67     
    6668    return r_;
    6769  }
     
    9799    weighted_=true;
    98100
    99     r_=abs(r_);
     101    if (r<0 && absolute_)
     102      r_=-r_;
     103     
    100104    return r_;
    101105  }
  • trunk/src/Pearson.h

    r149 r179  
    3939   
    4040    ///
    41     /// \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert }{\sqrt{\sum_i
    42     /// (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}}\f$.
    43     /// @return absolute value of Pearson correlation.
     41    /// \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert
     42    /// }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}}\f$.
     43    /// @return Pearson correlation, if absolute=true absolute value
     44    /// of Pearson is used.
    4445    ///
    4546    double score(const gslapi::vector&, const gslapi::vector&, 
     
    7374                 const std::vector<size_t>& = std::vector<size_t>());
    7475   
     76    ///
     77    /// @return 1 if data correlates with target, other wise -1
     78    ///
     79    inline int sign(void) {return (r_>0) ? 1 : -1; } 
     80   
     81         
    7582    ///
    7683    /// The p-value is the probability of getting a correlation as
  • trunk/src/ROC.cc

    r159 r179  
    1818
    1919  ROC::ROC()
    20     : Score(), area_(-1), data_(), minimum_size_(10), nof_pos_(0), target_(),
     20    : Score(), area_(-1), minimum_size_(10), nof_pos_(0),
    2121      train_set_(std::vector<size_t>()),
    2222      value_(std::vector<std::pair<double, double> >()),
     
    9393   
    9494    //Returning score larger 0.5 that you get by random
    95     if (area_>0.5)
    96       return area_;
    97     else
    98       return 1.0-area_;
     95    if (area_<0.5 && absolute_)
     96      area_=1.0-area_;
     97   
     98    return area_;
    9999  }
    100100
     
    130130    }
    131131    area_/=max_area;
    132     if (area_>0.5)
    133       return area_;
    134     else
    135       return 1-area_;
     132   
     133    if (area_<0.5 && absolute_)
     134      area_=1.0-area_;
     135   
     136    return area_;
    136137  }
    137138
  • trunk/src/ROC.h

    r160 r179  
    3636    /// Function taking \a value, \a target (+1 or -1) and vector
    3737    /// defining what samples to use. The score is equivalent to
    38     /// Mann-Whitney statistics. If target is equal to 1,
    39     /// sample belonges to class + otherwise sample belongs to class
    40     /// -. @return the area under the ROC
    41     /// curve. If the area is less than 0.5, is 1-area returned.
     38    /// Mann-Whitney statistics. If target is equal to 1, sample
     39    /// belonges to class + otherwise sample belongs to class
     40    /// -. @return the area under the ROC curve. If the area is less
     41    /// than 0.5 and absolute=true, 1-area is returned.
    4242    ///
    4343    double score(const gslapi::vector& target, const gslapi::vector& value,
     
    5151    /// sample belonges to class + otherwise sample belongs to class
    5252    /// -. @return wheighted version of area under the ROC curve. If
    53     /// the area is less than 0.5, is 1-area returned.
     53    /// the area is less than 0.5 and absolute=true, 1-area is
     54    /// returned.
    5455    ///
    5556    double score(const gslapi::vector& target, const gslapi::vector& value,
     
    5758                 const std::vector<size_t>& = std::vector<size_t>());
    5859       
     60
    5961    ///
    60     ///Calculates the p-value, i.e. the probability of observing an area
    61     ///equally or larger if the null hypothesis is true. If P is near zero,
    62     ///this casts doubt on this hypothesis. The null hypothesis is that the
    63     ///values from the 2 classes are generated from 2 identical
    64     ///distributions. The alternative is that the median of the first
    65     ///distribution is shifted from the median of the second distribution by a
    66     ///non-zero amount. If the smallest group size is larger than minimum_size
    67     ///(default = 10), then P is calculated using a normal approximation.
    68     /// @return the one-sided p-value
     62    ///Calculates the p-value, i.e. the probability of observing an
     63    ///area equally or larger if the null hypothesis is true. If P is
     64    ///near zero, this casts doubt on this hypothesis. The null
     65    ///hypothesis is that the values from the 2 classes are generated
     66    ///from 2 identical distributions. The alternative is that the
     67    ///median of the first distribution is shifted from the median of
     68    ///the second distribution by a non-zero amount. If the smallest
     69    ///group size is larger than minimum_size (default = 10), then P
     70    ///is calculated using a normal approximation.  @return the
     71    ///one-sided p-value( if absolute true is used this is equivalent
     72    ///to the two-sided p-value.)
    6973    ///
    70     double p_value() ;
     74    double p_value(void) ;
    7175         
    7276    ///
     
    8589  private:
    8690    double area_;
    87     gslapi::vector data_;
    8891    u_int minimum_size_;
    8992    u_int nof_pos_;
    90     gslapi::vector target_;
    91     std::vector<size_t> train_set_;
     93    std::vector<size_t> train_set_;
    9294    std::vector<std::pair<double, double> > value_;
    9395    /// pair of target and data. should always be sorted with respect to
  • trunk/src/Score.h

    r119 r179  
    1919    ///   Constructor
    2020    ///   
    21     Score(void) {};
    22    
     21    Score(bool absolute=true) ;
    2322   
    2423    ///
     
    2726    virtual ~Score(void) {};
    2827   
    29     virtual double
     28    ///
     29    ///  Function changing mode of Score
     30    ///
     31    inline void absolute(bool absolute) {absolute_=absolute;}
     32
     33    virtual double
    3034    score(const gslapi::vector&,
    3135          const gslapi::vector&,
     
    4044   
    4145   
    42   private:
     46  protected:
     47    bool absolute_;
    4348    gslapi::vector data_;   
    4449    gslapi::vector target_;
     50
    4551
    4652  }; // class Score
  • trunk/src/tScore.cc

    r148 r179  
    1414
    1515  tScore::tScore()
    16     : Score(),  t_(0), target_(), train_set_(), value_(), weight_()
     16    : Score(),  t_(0), train_set_(), weight_()
    1717  {
    1818  }
    1919
    2020  double tScore::score(const gslapi::vector& target,
    21                        const gslapi::vector& value,
     21                       const gslapi::vector& data,
    2222                       const std::vector<size_t>& train_set)
    2323  {
     
    2828      train_set_=train_set;
    2929    target_ = target;
    30     value_ = value;
     30    data_ = data;
    3131    weight_ = gslapi::vector(target.size(),1);
    3232    Averager positive;
     
    3434    for(size_t i=0; i<train_set_.size(); i++){
    3535      if (target_[train_set_[i]]==1)
    36         positive.add(value_[train_set_[i]]);
     36        positive.add(data_[train_set_[i]]);
    3737      else
    38         negative.add(value_[train_set_[i]]);
     38        negative.add(data_[train_set_[i]]);
    3939    }
    4040    double diff = positive.mean() - negative.mean();
     
    4242                  /(positive.n()-1+negative.n()-1));
    4343    t_=diff/s;
    44    
    45     if (t_>0)
    46       return t_;
    47     else
    48       return -t_;
     44    if (t_<0 && absolute_)
     45      t_=-t_;
     46     
     47    return t_;
    4948  }
    5049
     
    6059      train_set_=train_set;
    6160    target_ = target;
    62     value_ = value;
    6361    weight_ = weight;
    6462    WeightedAverager positive;
     
    6664    for(size_t i=0; i<train_set_.size(); i++){
    6765      if (target_[train_set_[i]]==1)
    68         positive.add(value_(train_set_[i]),weight_(train_set_[i]));
     66        positive.add(data_(train_set_[i]),weight_(train_set_[i]));
    6967      else
    70         negative.add(value_(train_set_[i]),weight_(train_set_[i]));
     68        negative.add(data_(train_set_[i]),weight_(train_set_[i]));
    7169    }
    7270    double diff = positive.mean() - negative.mean();
     
    7472                  (positive.sum_w()+negative.sum_w()));
    7573    t_=diff/s;
    76     if (t_>0)
    77       return t_;
    78     else
    79       return -t_;
     74    if (t_<0 && absolute_)
     75      t_=-t_;
     76   
     77    return t_;
    8078  }
    8179
  • trunk/src/tScore.h

    r148 r179  
    3737   
    3838    ///
    39     /// Calculates the absolute value of t-score, i.e. the ratio
    40     /// between difference in mean and standard deviation of this
    41     /// difference.  /// @return \f$ \frac{ \vert \frac{1}{n_x}\sum x_i
    42     /// - \frac{1}{n_y}\sum y_i \vert } {\frac{\sum x_i^2 + \sum
    43     /// y_i^2}{n_x-1+n_y-1}} \f$
     39    /// Calculates the value of t-score, i.e. the ratio between
     40    /// difference in mean and standard deviation of this
     41    /// difference. \f$ \frac{ \vert \frac{1}{n_x}\sum x_i -
     42    /// \frac{1}{n_y}\sum y_i \vert } {\frac{\sum x_i^2 + \sum
     43    /// y_i^2}{n_x-1+n_y-1}} \f$ @return t-score if absolute=true
     44    /// absolute value of t-score is returned
    4445    ///
    4546    double score(const gslapi::vector&, const gslapi::vector&, 
     
    4748
    4849    ///
    49     /// Weighted version of t-Score
     50    /// Weighted version of t-Score @return t-score if absolute=true
     51    /// absolute value of t-score is returned
    5052    ///
    5153    double score(const gslapi::vector&, const gslapi::vector&, 
     
    5456
    5557    ///
    56     ///Calculates the p-value, i.e. the probability of observing a t-score
    57     ///equally or larger if the null hypothesis is true. If P is near zero,
    58     ///this casts doubt on this hypothesis. The null hypothesis is ...
    59     /// @return the one-sided p-value
     58    ///Calculates the p-value, i.e. the probability of observing a
     59    ///t-score equally or larger if the null hypothesis is true. If P
     60    ///is near zero, this casts doubt on this hypothesis. The null
     61    ///hypothesis is ...  @return the one-sided p-value( if
     62    ///absolute=true is used the two-sided p-value)
    6063    ///
    6164    double p_value();
     65
     66         
    6267         
    6368  private:
Note: See TracChangeset for help on using the changeset viewer.