Changeset 119


Ignore:
Timestamp:
Jul 20, 2004, 6:18:02 PM (19 years ago)
Author:
Peter
Message:

modified to take weights

Location:
trunk/src
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/ROC.cc

    r112 r119  
    1616namespace cpptools { 
    1717
    18   ROC::ROC(const gslapi::vector& target, const gslapi::vector& data,
    19            const std::vector<size_t>& train_set)
    20  
    21     : Score(), area_(-1), data_(data), minimum_size_(10), nof_pos_(0),
    22       target_(target), train_set_(train_set),
    23       value_(std::vector<std::pair<double, double> >())
    24    
    25   {
    26     if (!train_set_.size())
    27       for (size_t i=0; i<target_.size(); i++)
    28         train_set_.push_back(i); 
    29     sort();
    30   }
    31 
    3218  ROC::ROC()
    3319    : Score(), area_(-1), data_(), minimum_size_(10), nof_pos_(0), target_(),
    3420      train_set_(std::vector<size_t>()),
    35       value_(std::vector<std::pair<double, double> >())
     21      value_(std::vector<std::pair<double, double> >()),
     22      weight_(gslapi::vector())
    3623       
    3724  {
     
    5138                    ( value_.size() - nof_pos_ ) / nof_pos_ );
    5239    double p = gsl_cdf_gaussian_Q(x, sigma);
    53        
     40   
    5441    return p;
    5542  }
     
    7360  double ROC::p_value(void)
    7461  {
    75     if (area_==-1)
    76       area_ = score();
    7762    double p;
    7863    if (nof_pos_ < minimum_size_ & value_.size()-nof_pos_ < minimum_size_)
     
    8065                          nof_pos_, value_.size()-nof_pos_);
    8166    else
    82     p = get_p_approx(area_);
     67      p = get_p_approx(area_);
    8368    return p;
    84   }
    85 
    86   double ROC::score()
    87   {
    88     if (area_==-1){
    89       double area_ = 0;
    90       for (unsigned int i=0; i<value_.size(); i++)
    91         if (value_[i].first==1)
    92           area_+=i;
    93       // Normalizing the area to 0-1
    94       area_ = (area_/nof_pos_ - (nof_pos_ - 1)/2 )/(value_.size() - nof_pos_);
    95     }
    96     return area_;
    9769  }
    9870
     
    11082      train_set_ = train_set;
    11183    sort();
    112     area_ = score();   
     84    area_ = 0;
     85    for (size_t i=0; i<value_.size(); i++)
     86      if (value_[i].first==1)
     87        area_+=i;
     88        // Normalizing the area to 0-1
     89    area_ = (area_/nof_pos_-(nof_pos_-1)/2 )/(value_.size()-nof_pos_);
     90    return area_;
     91  }
     92
     93  double ROC::score(const gslapi::vector& target, const gslapi::vector& data,
     94                    const gslapi::vector& weight,
     95                    const std::vector<size_t>& train_set)
     96  {
     97    target_ = target;
     98    data_ = data;
     99    weight_=weight;
     100    if (!train_set.size()){
     101      train_set_.resize(0);
     102      for (size_t i=0; i<target_.size(); i++)
     103        train_set_.push_back(i); 
     104    }
     105    else
     106      train_set_ = train_set;
     107    sort();
     108    area_=0;
     109    double max_area=0;
     110    //Peter, use the sort to skip some ifs and loops
     111    for (size_t i=0; i<value_.size(); i++){
     112      if (target_(train_set_[i])==1){
     113        for (size_t j=0; j<value_.size(); j++){
     114          if (target_(train_set_[j])==-1){
     115            if (data_(train_set_[i]) > data_(train_set_[j])){
     116              area_+=weight_(train_set_[i])*weight_(train_set_[j]);
     117            }
     118            max_area+=weight_(train_set_[i])*weight_(train_set_[j]);
     119          }
     120        }
     121      }
     122    }
     123    area_/=max_area;
    113124    return area_;
    114125  }
  • trunk/src/ROC.h

    r112 r119  
    3131         
    3232    ///
    33     /// Constructor taking a value vector, a target vector (+1 or -1)
    34     /// and a vector defining what samples to use.
    35     ///
    36     ROC(const gslapi::vector&, const gslapi::vector&,
    37         const std::vector<size_t>& = std::vector<size_t>());
    38          
    39     ///
    4033    /// Destructor
    4134    ///
    4235    virtual ~ROC(void) {};
    4336         
    44     /// Equivalent to the Mann-Whitney score, but normalized to be
    45     /// between zero and one.  @return the area under the ROC curve
     37    /// Function taking \a value, target (+1 or -1) and vector
     38    /// defining what samples to use. The score is equivalent to the
     39    /// Mann-Whitney score but normalized to be between zero and
     40    /// one. @return the area under the ROC curve
    4641    ///
    47     double score() ;
     42    double score(const gslapi::vector& value, const gslapi::vector& target,
     43                 const std::vector<size_t>& = std::vector<size_t>());
    4844   
    49     /// Function taking a vector of values and a vector of target (+1
    50     /// or -1). The score is equivalent to the Mann-Whitney score but
    51     /// normalized to be between zero and one. @return the area under
    52     /// the ROC curve
     45    /// Function taking values, target, weight and a vector defining
     46    /// what samples to use. The area is defines as \f$ \frac{\sum
     47    /// w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
     48    /// over all pairs where value+ is larger than value-. The
     49    /// denominator goes over all pairs. @return wheighted version of
     50    /// area under the ROC curve
    5351    ///
    54     double score(const gslapi::vector&, const gslapi::vector&, 
     52    double score(const gslapi::vector&, const gslapi::vector&,
     53                 const gslapi::vector&,   
    5554                 const std::vector<size_t>& = std::vector<size_t>());
    5655       
     
    8887    gslapi::vector target_;
    8988    std::vector<size_t> train_set_;
     89    std::vector<std::pair<double, double> > value_;
    9090    /// pair of target and data. should always be sorted with respect to
    9191    /// data.
    92     std::vector<std::pair<double, double> > value_;
    93    
     92    gslapi::vector weight_;
     93   
    9494    ///
    9595    ///
  • trunk/src/Score.h

    r112 r119  
    3232          const std::vector<size_t>& = std::vector<size_t>()) = 0;
    3333 
    34     virtual double p_value() = 0;
     34    virtual double
     35    score(const gslapi::vector&,
     36          const gslapi::vector&,
     37          const gslapi::vector&,
     38          const std::vector<size_t>& = std::vector<size_t>()) = 0;
     39 
     40   
    3541   
    3642  private:
  • trunk/src/tScore.cc

    r112 r119  
    66// Thep C++ Tools
    77#include "tScore.h"
     8#include "Averager.h"
    89#include "vector.h"
    9 #include "Averager.h"
     10#include "WeightedAverager.h"
    1011
    1112namespace theplu {
     
    1314
    1415  tScore::tScore()
    15     : Score(), value_(), target_(), train_set_()
     16    : Score(),  t_(0), target_(), train_set_(), value_(), weight_()
    1617  {
    1718  }
    1819
    19   tScore::tScore( const gslapi::vector& target,
    20                   const gslapi::vector& value,
    21                   const std::vector<size_t>& train_set)
    22       : Score(), value_(value), target_(target)
     20  double tScore::score(const gslapi::vector& target,
     21                       const gslapi::vector& value,
     22                       const std::vector<size_t>& train_set)
    2323  {
    2424    if (!train_set_.size())
    2525      for (size_t i=0; i<target_.size(); i++)
    2626        train_set_.push_back(i); 
    27   }
    28    
    29   double tScore::score()
    30   {
     27    else
     28      train_set_=train_set;
     29    target_ = target;
     30    value_ = value;
     31    weight_ = gslapi::vector(target.size(),1);
    3132    Averager positive;
    3233    Averager negative;
     
    4041    double s=sqrt((positive.sum_xsqr()+negative.sum_xsqr())
    4142                  /(positive.n()-1+negative.n()-1));
    42     return diff/s;
     43    t_=diff/s;
     44    return t_;
    4345  }
    4446
    4547  double tScore::score(const gslapi::vector& target,
    4648                       const gslapi::vector& value,
     49                       const gslapi::vector& weight,
    4750                       const std::vector<size_t>& train_set)
    4851  {
    49     train_set_=train_set;
    5052    if (!train_set_.size())
    5153      for (size_t i=0; i<target_.size(); i++)
    5254        train_set_.push_back(i); 
     55    else
     56      train_set_=train_set;
    5357    target_ = target;
    5458    value_ = value;
    55     return score();
     59    weight_ = weight;
     60    WeightedAverager positive;
     61    WeightedAverager negative;
     62    for(size_t i=0; i<train_set_.size(); i++){
     63      if (target_[train_set_[i]]==1)
     64        positive.add(value_(train_set_[i]),weight_(train_set_[i]));
     65      else
     66        negative.add(value_(train_set_[i]),weight_(train_set_[i]));
     67    }
     68    double diff = positive.mean() - negative.mean();
     69    double s=sqrt((positive.squared_sum()+negative.squared_sum())/
     70                  (positive.sum_w()+negative.sum_w()));
     71    t_=diff/s;
     72    return t_;
    5673  }
    5774
    5875  double tScore::p_value(void)
    5976  {
    60     double t = score();
    6177    double dof = target_.size()-2;
    62     double p = gsl_cdf_tdist_Q(t, dof);
     78    double p = gsl_cdf_tdist_Q(t_, dof);
    6379    return dof > 0 ? p : 1;
    6480  }
  • trunk/src/tScore.h

    r112 r119  
    3131
    3232    ///
    33     /// Constructor taking a value vector and a target vector (+1 or -1).
    34     ///
    35     tScore(const gslapi::vector&, const gslapi::vector&,
    36            const std::vector<size_t>& = std::vector<size_t>());
    37          
    38     ///
    3933    /// Destructor
    4034    ///
     
    4842    /// {\frac{\sum x_i^2 + \sum y_i^2}{n_x-1+n_y-1}} \f$
    4943    ///
    50     double score();
    51        
     44    double score(const gslapi::vector&, const gslapi::vector&, 
     45                 const std::vector<size_t>& = std::vector<size_t>());
     46
    5247    ///
    53     /// Calculates the t-score, i.e. the ratio between difference in
    54     /// mean and standard deviation of this difference.
    55     /// @return \f$ \frac{\frac{1}{n_x}\sum x_i - \frac{1}{n_y}\sum y_i}
    56     /// {\frac{\sum x_i^2 + \sum y_i^2}{n_x-1+n_y-1}} \f$
     48    /// Weighted version of t-Score
    5749    ///
    5850    double score(const gslapi::vector&, const gslapi::vector&, 
     51                 const gslapi::vector&,
    5952                 const std::vector<size_t>& = std::vector<size_t>());
    6053
     
    6861         
    6962  private:
    70     gslapi::vector value_;
     63    double t_;
    7164    gslapi::vector target_;
    7265    std::vector<size_t> train_set_;
     66    gslapi::vector value_;
     67    gslapi::vector weight_;
    7368       
    7469  };
Note: See TracChangeset for help on using the changeset viewer.