Changeset 623


Ignore:
Timestamp:
Sep 5, 2006, 4:13:12 AM (15 years ago)
Author:
Peter
Message:

fixes #112 and refs #123 added overloaded function score taking Target and DataLookupWeighted1D, which is needed for InputRanker?.

Location:
trunk/c++_tools/statistics
Files:
15 edited

Legend:

Unmodified
Added
Removed
  • trunk/c++_tools/statistics/Fisher.cc

    r616 r623  
    44#include <c++_tools/statistics/Score.h>
    55#include <c++_tools/statistics/utility.h>
     6#include <c++_tools/classifier/DataLookupWeighted1D.h>
    67#include <c++_tools/classifier/Target.h>
    78
     
    136137  }
    137138 
    138     double Fisher::score(const classifier::Target& target,
    139                          const utility::vector& value,
    140                          const utility::vector& weight)
     139  double Fisher::score(const classifier::Target& target,
     140                       const classifier::DataLookupWeighted1D& value)
    141141  {
    142142    weighted_=true;
     
    144144    for (size_t i=0; i<target.size(); i++)
    145145      if (target.binary(i))
     146        if (value.data(i)>value_cutoff_)
     147          a_+=value.weight(i);
     148        else
     149          c_+=value.weight(i);
     150      else
     151        if (value.data(i)>value_cutoff_)
     152          b_+=value.weight(i);
     153        else
     154          d_+=value.weight(i);
     155       
     156    // If a column sum or a row sum is zero, the table is non-sense
     157    if ((a_==0 || d_==0) && (c_==0 || b_==0)){
     158      // Peter should throw an exception here
     159      std::cerr << "Warning: Fisher: Table is not valid\n";
     160      return 1;
     161    }
     162
     163    return oddsratio(a_,b_,c_,d_);
     164  }
     165 
     166    double Fisher::score(const classifier::Target& target,
     167                         const utility::vector& value,
     168                         const utility::vector& weight)
     169  {
     170    weighted_=true;
     171    a_=b_=c_=d_=0;
     172    for (size_t i=0; i<target.size(); i++)
     173      if (target.binary(i))
    146174        if (value(i)>value_cutoff_)
    147175          a_+=weight(i);
     
    156184    // If a column sum or a row sum is zero, the table is non-sense
    157185    if ((a_==0 || d_==0) && (c_==0 || b_==0)){
     186      // Peter should throw an exception
    158187      std::cerr << "Warning: Fisher: Table is not valid\n";
    159188      return 1;
  • trunk/c++_tools/statistics/Fisher.h

    r616 r623  
    118118    ///
    119119    double score(const classifier::Target& target,
     120                 const classifier::DataLookupWeighted1D& value);
     121
     122
     123    ///
     124    /// Weighted version of score. Each element in 2x2 table is
     125    /// calculated as \f$ \sum w_i \f$, so when each weight is
     126    /// unitary the same table is created as in the unweighted version
     127    ///
     128    /// @return odds ratio
     129    ///
     130    /// @see score
     131    ///
     132    double score(const classifier::Target& target,
    120133                 const utility::vector& value,
    121134                 const utility::vector& weight);
  • trunk/c++_tools/statistics/FoldChange.cc

    r616 r623  
    55#include <c++_tools/statistics/Averager.h>
    66#include <c++_tools/statistics/AveragerWeighted.h>
     7#include <c++_tools/classifier/DataLookupWeighted1D.h>
    78#include <c++_tools/classifier/Target.h>
    89
     
    5051
    5152  double FoldChange::score(const classifier::Target& target,
     53                           const classifier::DataLookupWeighted1D& value)
     54  {
     55    weighted_=true;
     56    AveragerWeighted pos;
     57    AveragerWeighted neg;
     58
     59    for (size_t i=0; i<value.size(); i++)
     60      if (target.binary(i))
     61        pos.add(value.data(i),value.weight(i));
     62      else
     63        neg.add(value.data(i),value.weight(i));
     64         
     65    if (absolute_)
     66      return fabs(pos.mean()-neg.mean());
     67    return pos.mean()-neg.mean();
     68  }
     69
     70
     71  double FoldChange::score(const classifier::Target& target,
    5272                           const utility::vector& value,
    5373                           const utility::vector& weight)
  • trunk/c++_tools/statistics/FoldChange.h

    r616 r623  
    3030    /// @return difference of the means of the two classes
    3131    ///
    32     /// @param target is +1 or -1
     32    /// @param target
    3333    /// @param value vector of the values
    3434    ///
     
    3737 
    3838    ///
     39    /// @return difference of the means of the two classes
     40    ///
     41    /// @param target
     42    /// @param value vector of the values (with weights)
     43    ///
     44    double score(const classifier::Target& target,
     45                 const classifier::DataLookupWeighted1D& value);
     46 
     47    ///
    3948    /// @return difference of the weighted means of the two classes
    4049    ///
    41     /// @param target is +1 or -1
     50    /// @param target
    4251    /// @param value vector of the values
    4352    /// @param weight vector of accompanied weight to the values
  • trunk/c++_tools/statistics/Pearson.cc

    r616 r623  
    55#include <c++_tools/statistics/AveragerPairWeighted.h>
    66#include <c++_tools/utility/vector.h>
     7#include <c++_tools/classifier/DataLookupWeighted1D.h>
    78#include <c++_tools/classifier/Target.h>
    89
     
    5960   
    6061  double Pearson::score(const classifier::Target& target,
     62                        const classifier::DataLookupWeighted1D& value)
     63  {
     64    weighted_=true;
     65    AveragerPairWeighted ap;
     66    for (size_t i=0; i<target.size(); i++){
     67      if (target.binary(i))
     68        ap.add(1, value.data(i),1,value.weight(i));
     69      else
     70        ap.add(-1, value.data(i),1,value.weight(i));
     71      nof_samples_ = target.size();
     72    }
     73    r_ = ap.correlation();
     74    if (r_<0 && absolute_)
     75      return -r_;
     76     
     77    return r_;
     78  }
     79
     80  double Pearson::score(const classifier::Target& target,
    6181                        const utility::vector& value,
    6282                        const utility::vector& weight)
  • trunk/c++_tools/statistics/Pearson.h

    r616 r623  
    5353    ///
    5454    double score(const classifier::Target& target,
     55                 const classifier::DataLookupWeighted1D& value);
     56
     57    ///
     58    /// \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
     59    /// {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
     60    /// \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
     61    /// m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
     62    /// chosen to get a correlation equal to unity when \a x and \a y
     63    /// are equal. @return absolute value of weighted version of
     64    /// Pearson correlation.
     65    ///
     66    double score(const classifier::Target& target,
    5567                 const utility::vector& value,
    5668                 const utility::vector& weight);
  • trunk/c++_tools/statistics/ROC.cc

    r616 r623  
    11// $Id$
    22
    3 #include <c++_tools/statistics/ROC.h>
    4 #include <c++_tools/utility/stl_utility.h>
    5 #include <c++_tools/utility/vector.h>
     3#include "c++_tools/statistics/ROC.h"
     4
     5#include "c++_tools/classifier/DataLookupWeighted1D.h"
     6#include "c++_tools/utility/stl_utility.h"
     7#include "c++_tools/utility/vector.h"
    68
    79#include <gsl/gsl_cdf.h>
     
    9799    return area_;
    98100  }
     101
     102
     103  // Peter, should be possible to do this in NlogN
     104  double ROC::score(const classifier::Target& target,
     105                    const classifier::DataLookupWeighted1D& value)
     106  {
     107    weighted_=true;
     108
     109    vec_pair_.clear();
     110    vec_pair_.reserve(target.size());
     111    for (unsigned int i=0; i<target.size(); i++)
     112      if (value.weight(i))
     113        vec_pair_.push_back(std::make_pair(target.binary(i),value.data(i)));
     114
     115    std::sort(vec_pair_.begin(),vec_pair_.end(),
     116              utility::pair_value_compare<int, double>());
     117
     118    area_=0;
     119    nof_pos_=0;
     120    double max_area=0;
     121
     122    for (size_t i=0; i<n(); i++)
     123      if (target.binary(i))
     124        for (size_t j=0; j<n(); j++)
     125          if (!target.binary(j)){
     126            if (value.data(i)>value.data(j))
     127              area_+=value.weight(i)*value.weight(j);
     128            max_area+=value.weight(i)*value.weight(j);
     129          }
     130   
     131    area_/=max_area;
     132   
     133    if (area_<0.5 && absolute_)
     134      area_=1.0-area_;
     135   
     136    return area_;
     137  }
     138
    99139
    100140  // Peter, should be possible to do this in NlogN
  • trunk/c++_tools/statistics/ROC.h

    r616 r623  
    4747                 const utility::vector& value);
    4848   
     49    /// Function taking values, target, weight and a vector defining
     50    /// what samples to use. The area is defines as \f$ \frac{\sum
     51    /// w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes
     52    /// over all pairs where value+ is larger than value-. The
     53    /// denominator goes over all pairs. If target is equal to 1,
     54    /// sample belonges to class + otherwise sample belongs to class
     55    /// -. @return wheighted version of area under the ROC curve. If
     56    /// the area is less than 0.5 and absolute=true, 1-area is
     57    /// returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number
     58    /// of samples.
     59    ///
     60    double score(const classifier::Target& target,
     61                 const classifier::DataLookupWeighted1D& value);
     62       
     63
    4964    /// Function taking values, target, weight and a vector defining
    5065    /// what samples to use. The area is defines as \f$ \frac{\sum
  • trunk/c++_tools/statistics/SNR.cc

    r616 r623  
    11// $Id$
    22
    3 
    4 // Thep C++ Tools
    5 #include <c++_tools/statistics/SNR.h>
    6 #include <c++_tools/statistics/Averager.h>
    7 #include <c++_tools/statistics/AveragerWeighted.h>
    8 #include <c++_tools/classifier/Target.h>
     3#include "c++_tools/statistics/SNR.h"
     4#include "c++_tools/statistics/Averager.h"
     5#include "c++_tools/statistics/AveragerWeighted.h"
     6#include "c++_tools/classifier/DataLookupWeighted1D.h"
     7#include "c++_tools/classifier/Target.h"
    98
    109namespace theplu {
     
    3938
    4039  double SNR::score(const classifier::Target& target,
     40                    const classifier::DataLookupWeighted1D& value)
     41  {
     42    weighted_=true;
     43    statistics::AveragerWeighted positive;
     44    statistics::AveragerWeighted negative;
     45    for(size_t i=0; i<target.size(); i++){
     46      if (target.binary(i))
     47        positive.add(value.data(i),value.weight(i));
     48      else
     49        negative.add(value.data(i),value.weight(i));
     50    }
     51    double diff = positive.mean() - negative.mean();
     52    double denom=positive.std()+negative.std();
     53    assert(denom);
     54    score_=diff/denom;
     55    if(positive.sum_w()==0 || negative.sum_w()==0)
     56      score_=0;
     57    if (score_<0 && absolute_)
     58      score_=-score_;   
     59    return score_;
     60  }
     61
     62
     63
     64  double SNR::score(const classifier::Target& target,
    4165                    const utility::vector& value,
    4266                    const utility::vector& weight)
  • trunk/c++_tools/statistics/SNR.h

    r616 r623  
    1111  namespace utility {
    1212    class vector;
     13  }
     14  namespace classifier {
     15    class DataLookWeighted1D;
    1316  }
    1417namespace statistics { 
     
    4548    ///
    4649    double score(const classifier::Target& target,
     50                 const classifier::DataLookupWeighted1D& value);         
     51
     52    ///
     53    /// Weighted version of SNR @return t-score if absolute=true
     54    /// absolute value of t-score is returned.
     55    ///
     56    double score(const classifier::Target& target,
    4757                 const utility::vector& value,
    4858                 const utility::vector& weight);         
  • trunk/c++_tools/statistics/Score.h

    r616 r623  
    1717  class Target;
    1818  class DataLookup1D;
     19  class DataLookupWeighted1D;
    1920}
    2021
     
    7576 
    7677    ///
     78    /// Function calculating the score in a weighted fashion. In
     79    /// absolute mode, also the score using negated class labels is
     80    /// calculated, and the largest of the two scores are
     81    /// calculated. Absolute mode should be used when two-tailed test
     82    /// is wanted.
     83    ///
     84    virtual double
     85    score(const classifier::Target& target,
     86          const classifier::DataLookupWeighted1D& value) = 0;
     87 
     88    ///
    7789    /// Function calculating the weighted version of score. In
    7890    /// absolute mode, also the score using negated class labels is
  • trunk/c++_tools/statistics/WilcoxonFoldChange.cc

    r616 r623  
    3838  }
    3939
     40
     41  double WilcoxonFoldChange::score(const classifier::Target& target,
     42                                   const classifier::DataLookupWeighted1D& value)
     43  {
     44    std::cerr << " WilcoxonFoldChange::score  not implemented" << std::endl;
     45    return 0;
     46  }
     47
     48
    4049  double WilcoxonFoldChange::score(const classifier::Target& target,
    4150                                   const utility::vector& value,
  • trunk/c++_tools/statistics/WilcoxonFoldChange.h

    r616 r623  
    3838    /// @return difference of the weighted means of the two classes
    3939    ///
     40    /// @param value vector of the values (with weights)
     41    /// @train_set defining which values to use (number of values used
     42    /// in the calculation is equal to size of \a train_set)
     43    ///
     44    /// @note not implemented
     45    ///
     46    double score(const classifier::Target& target,
     47                 const classifier::DataLookupWeighted1D& value);
     48 
     49    ///
     50    /// @return difference of the weighted means of the two classes
     51    ///
    4052    /// @param value vector of the values
    4153    /// @param weight vector of accompanied weight to the values
  • trunk/c++_tools/statistics/tScore.cc

    r616 r623  
    11// $Id$
    22
    3 // System includes
     3#include "c++_tools/statistics/tScore.h"
     4#include "c++_tools/statistics/Averager.h"
     5#include "c++_tools/statistics/AveragerWeighted.h"
     6#include "c++_tools/classifier/DataLookupWeighted1D.h"
     7#include "c++_tools/classifier/Target.h"
     8
    49#include <cassert>
    510#include <cmath>
    611
    7 // Thep C++ Tools
    8 #include <c++_tools/statistics/tScore.h>
    9 #include <c++_tools/statistics/Averager.h>
    10 #include <c++_tools/statistics/AveragerWeighted.h>
    11 #include <c++_tools/classifier/Target.h>
    1212
    1313namespace theplu {
     
    4141    return t_;
    4242  }
     43
     44
     45  double tScore::score(const classifier::Target& target,
     46                       const classifier::DataLookupWeighted1D& value)
     47  {
     48    weighted_=true;
     49
     50    statistics::AveragerWeighted positive;
     51    statistics::AveragerWeighted negative;
     52    for(size_t i=0; i<target.size(); i++){
     53      if (target.binary(i))
     54        positive.add(value.data(i),value.weight(i));
     55      else
     56        negative.add(value.data(i),value.weight(i));
     57    }
     58    double diff = positive.mean() - negative.mean();
     59    dof_=positive.n()+negative.n()-2;
     60    double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
     61    t_=diff/sqrt(s2/positive.n()+s2/(negative.n()));
     62    if (t_<0 && absolute_)
     63      t_=-t_;
     64
     65    if(positive.sum_w()==0 || negative.sum_w()==0)
     66      t_=0;
     67    return t_;
     68  }
     69
    4370
    4471  double tScore::score(const classifier::Target& target,
  • trunk/c++_tools/statistics/tScore.h

    r616 r623  
    6262    ///
    6363    double score(const classifier::Target& target,
     64                 const classifier::DataLookupWeighted1D& value);
     65
     66    ///
     67    /// Calculates the weighted t-score, i.e. the ratio between
     68    /// difference in mean and standard deviation of this
     69    /// difference. \f$ t = \frac{ m_x - m_y }{
     70    /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
     71    /// weighted mean, n is the weighted version of number of data
     72    /// points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
     73    /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
     74    /// + n_y - 2 } \f$. See AveragerWeighted for details.
     75    ///
     76    /// @return t-score if absolute=true absolute value of t-score
     77    /// is returned
     78    ///
     79    double score(const classifier::Target& target,
    6480                 const utility::vector& value,
    6581                 const utility::vector& weight);
Note: See TracChangeset for help on using the changeset viewer.