Ignore:
Timestamp:
Dec 19, 2005, 3:58:29 PM (17 years ago)
Author:
Peter
Message:

non compiling checking before revision after design meeting

Location:
branches/peters_vector/lib
Files:
38 edited

Legend:

Unmodified
Added
Removed
  • branches/peters_vector/lib/classifier/ConsensusInputRanker.cc

    r464 r469  
    44#include <c++_tools/classifier/ConsensusInputRanker.h>
    55
     6#include <c++_tools/classifier/CrossSplitting.h>
     7#include <c++_tools/classifier/DataView.h>
     8#include <c++_tools/classifier/InputRanker.h>
     9#include <c++_tools/classifier/Target.h>
     10#include <c++_tools/statistics/utility.h>
     11#include <c++_tools/utility/stl_utility.h>
    612#include <c++_tools/gslapi/matrix.h>
    7 #include <c++_tools/statistics/utility.h>
    8 #include <c++_tools/classifier/CrossSplitting.h>
    9 #include <c++_tools/classifier/InputRanker.h>
    10 #include <c++_tools/utility/stl_utility.h>
    1113
    1214#include <utility>
     
    1618namespace classifier { 
    1719
    18   ConsensusInputRanker::ConsensusInputRanker(const gslapi::matrix& data,
    19                                              const gslapi::vector& target,
    20                                              statistics::Score& score_object,
    21                                              const size_t rounds,
    22                                              const size_t k)
    23     :id_(std::vector<size_t>()), input_rankers_(std::vector<InputRanker>()),
    24      k_(k), nof_rankers_(k*rounds), rank_(std::vector<size_t>())
     20  ConsensusInputRanker::ConsensusInputRanker
     21  (const DataView& data, const Target& target,
     22   statistics::Score& score_object, CrossSplitting& sampler,
     23   const size_t n)
     24    : nof_rankers_(n)
    2525
    2626  {
    27     CrossSplitting cv(target, k_);
    28     for (size_t i=0; i<nof_rankers_; i++){
    29       //InputRanker ir(data, target, score_object);
    30       input_rankers_.push_back(InputRanker(data, target, score_object,
    31                                            cv.next()));
     27    for (size_t i=0; i<n; i++){
     28      std::vector<size_t> index=sampler.next();
     29      input_rankers_.push_back(InputRanker(DataView(data,index),
     30                                           Target(target,index),
     31                                           score_object)  );
    3232    }
    3333    // Sorting with respect to median rank
     
    5454  }
    5555
    56   ConsensusInputRanker::ConsensusInputRanker(const gslapi::matrix& data,
    57                                              const gslapi::vector& target,
    58                                              const gslapi::matrix& weight,
    59                                              statistics::Score& score_object,
    60                                              const size_t rounds,
    61                                              const size_t k)
    62     :id_(std::vector<size_t>()), input_rankers_(std::vector<InputRanker>()),
    63      k_(k), nof_rankers_(k*rounds), rank_(std::vector<size_t>())
     56  ConsensusInputRanker::ConsensusInputRanker
     57  (const DataView& data, const Target& target, const DataView& weight,
     58   statistics::Score& score_object, CrossSplitting& sampler,const size_t n)
     59    : nof_rankers_(n)
     60  {
    6461
    65   {
    66     CrossSplitting cv(target, k_);
    67     for (size_t i=0; i<nof_rankers_; i++)
    68       input_rankers_.push_back(InputRanker(data,target,weight,score_object,
    69                                            cv.next()));
     62    for (size_t i=0; i<n; i++){
     63      std::vector<size_t> index=sampler.next();
     64      input_rankers_.push_back(InputRanker(DataView(data,index),
     65                                           Target(target,index),
     66                                           score_object,
     67                                           DataView(weight,index)));
     68    }
    7069   
    7170    // Sorting with respect to median rank
  • branches/peters_vector/lib/classifier/ConsensusInputRanker.h

    r451 r469  
    77
    88namespace theplu {
    9   class gslapi::matrix;
    10   class gslapi::vector;
    119  class statistics::Score;
    1210namespace classifier { 
     11
     12  class CrossSplitting;
     13  class DataView;
     14  class Target;
    1315
    1416  ///
     
    2628    /// manner, and in total there is \a n times \a k ranklists.
    2729    ///
    28     ConsensusInputRanker(const gslapi::matrix& data,
    29                          const gslapi::vector& target,
    30                          statistics::Score&,
    31                          const size_t n = 1 ,
     30    ConsensusInputRanker(const DataView& data,
     31                         const Target& target,
     32                         statistics::Score&, CrossSplitting&,
    3233                         const size_t k = 3 );
    3334
     
    3738    /// manner, and in total there is \a n times \a k ranklists.
    3839    ///
    39     ConsensusInputRanker(const gslapi::matrix& data,
    40                          const gslapi::vector& target,
    41                          const gslapi::matrix& weight,
    42                          statistics::Score&,
    43                          const size_t n = 1 ,
    44                          const size_t k = 3 );
     40    ConsensusInputRanker(const DataView& data,
     41                         const Target& target,
     42                         const DataView& weight,
     43                         statistics::Score&, 
     44                         CrossSplitting&,
     45                         const size_t n = 1 );
    4546
    4647    ///
     
    6061    std::vector<size_t> id_;
    6162    std::vector<InputRanker> input_rankers_;
    62     size_t k_;
    63     size_t nof_rankers_;
     63    u_int nof_rankers_;
    6464    std::vector<size_t> rank_;
    6565  };
  • branches/peters_vector/lib/classifier/DataView.cc

    r455 r469  
    1515  }
    1616 
     17
     18
    1719  DataView::DataView(const gslapi::matrix& data,
    1820                     const std::vector<size_t>& row,
     
    2224  }
    2325 
     26
     27
     28  DataView::DataView(const DataView& data,
     29                     const std::vector<size_t>& row,
     30                     const std::vector<size_t>& col)
     31    : MatrixView(row,col), data_(data.data_)
     32  {
     33  }
     34 
     35
     36
    2437  DataView::DataView(const gslapi::matrix& data,
    25                      const std::vector<size_t>& index, bool row)
     38                     const std::vector<size_t>& index,
     39                     const bool row)
    2640    : MatrixView(index, row), data_(&data)
    2741  {
    28     if(row)
     42  }
     43 
     44
     45
     46  DataView::DataView(const DataView& data,
     47                     const std::vector<size_t>& index, bool row)
     48    : MatrixView(index, row), data_(data.data_)
     49  {
     50    if(row){
     51      row_index_=index;
     52      column_index_.reserve((*data_).columns());
    2953      for(size_t i=0;i<(*data_).columns();i++)
    3054        column_index_.push_back(i);
    31     else
     55    }
     56    else{
     57      column_index_=index;
     58      row_index_.reserve((*data_).rows());
    3259      for(size_t i=0;i<(*data_).rows();i++)
    3360        row_index_.push_back(i);
     61    }
    3462
    3563  }
    3664 
     65
     66
    3767  DataView::DataView(const DataView& dv)
    3868    : MatrixView(dv), data_(dv.data_)
  • branches/peters_vector/lib/classifier/DataView.h

    r455 r469  
    1010namespace theplu {
    1111namespace classifier { 
     12
     13 
    1214
    1315  ///
     
    4547    DataView(const DataView&);
    4648
     49    ///
     50    /// Constructor taking the row index vector and column index vector
     51    /// as input.
     52    ///
     53    DataView(const DataView&, const std::vector<size_t>&,
     54             const std::vector<size_t>&);
     55
     56    ///
     57    /// Constructor taking the column (default) or row index vector
     58    /// as input.
     59    ///
     60    DataView(const DataView&, const std::vector<size_t>&, const bool row=false);
     61
    4762    ///
    4863    /// Destructor
  • branches/peters_vector/lib/classifier/InputRanker.cc

    r451 r469  
    33#include <c++_tools/classifier/InputRanker.h>
    44
    5 #include <c++_tools/gslapi/matrix.h>
    6 #include <c++_tools/gslapi/vector.h>
     5#include <c++_tools/classifier/DataView.h>
     6#include <c++_tools/classifier/VectorView.h>
     7#include <c++_tools/classifier/Target.h>
    78#include <c++_tools/statistics/ROC.h>
    89#include <c++_tools/utility/stl_utility.h>
     
    1516
    1617
    17   InputRanker::InputRanker(const gslapi::matrix& data,
    18                            const gslapi::vector& target,
    19                            statistics::Score& score_object,
    20                            const std::vector<size_t>& train_set)
    21     :train_set_(train_set),
    22      id_(std::vector<size_t>(data.rows())),
    23      rank_(std::vector<size_t>(data.rows()))
    24  
     18  InputRanker::InputRanker(const DataView& data,
     19                           const Target& target,
     20                           statistics::Score& score_object)
    2521  {
    2622    size_t nof_genes = data.rows();
     
    3531    std::vector<std::pair<size_t, double> > score;
    3632    for (size_t i=0; i<nof_genes; i++) {
    37       double area = score_object.score(target,gslapi::vector(data,i),train_set_);
     33      double area = score_object.score(target,VectorView(data,i,false));
    3834      std::pair<size_t, double> tmp(i,area);
    3935      score.push_back(tmp);
     
    5248
    5349
    54   InputRanker::InputRanker(const gslapi::matrix& data,
    55                            const gslapi::vector& target,
    56                            const gslapi::matrix& weight,
     50  InputRanker::InputRanker(const DataView& data,
     51                           const Target& target,
    5752                           statistics::Score& score_object,
    58                            const std::vector<size_t>& train_set)
    59     :train_set_(train_set),
    60      id_(std::vector<size_t>(data.rows())),
    61      rank_(std::vector<size_t>(data.rows()))
    62  
     53                           const DataView& weight)
    6354  {
    6455    size_t nof_genes = data.rows();
     
    7263    std::vector<std::pair<size_t, double> > score;
    7364    for (size_t i=0; i<nof_genes; i++) {
    74       double area = score_object.score(target, gslapi::vector(data,i),
    75                                        gslapi::vector(weight,i), train_set_);
     65      double area = score_object.score(target, VectorView(data,i,false),
     66                                       VectorView(weight,i,false));
    7667      std::pair<size_t, double> tmp(i,area);
    7768      score.push_back(tmp);
  • branches/peters_vector/lib/classifier/InputRanker.h

    r451 r469  
    1515}
    1616namespace classifier { 
     17
     18  class DataView;
     19  class Target;
     20
    1721  ///
    1822  /// Class for ranking rows in a matrix, using a Score and a
     
    2428  public:
    2529    ///
    26     /// Default constructor.
    27     ///
    28     inline InputRanker(void) : train_set_(std::vector<size_t>()),
    29                                id_(std::vector<size_t>()),
    30                                rank_(std::vector<size_t>()) {}
    31 
    32     ///
    3330    /// Constructor taking data, target, a Score
    3431    /// object and vector defining what samples to use (default is to
    3532    /// use all samples)
    3633    ///
    37     InputRanker(const gslapi::matrix&,
    38                 const gslapi::vector&,
    39                 statistics::Score&,
    40                 const std::vector<size_t>& = std::vector<size_t>());
     34    InputRanker(const DataView&, const Target&, statistics::Score&);
     35
    4136
    4237    ///
     
    4540    /// use all samples)
    4641    ///
    47     InputRanker(const gslapi::matrix&,
    48                 const gslapi::vector&,
    49                 const gslapi::matrix&,
    50                 statistics::Score&,
    51                 const std::vector<size_t>& = std::vector<size_t>());
     42    InputRanker(const DataView&, const Target&, statistics::Score&,
     43                const DataView&);
     44
    5245
    5346    ///
  • branches/peters_vector/lib/classifier/Kernel_MEV.h

    r451 r469  
    66#include <c++_tools/classifier/Kernel.h>
    77#include <c++_tools/classifier/KernelFunction.h>
     8#include <c++_tools/gslapi/vector.h>
    89#include <c++_tools/gslapi/matrix.h>
    910
  • branches/peters_vector/lib/classifier/Makefile.am

    r467 r469  
    2121  PolynomialKernelFunction.cc \
    2222  SVM.cc \
     23  Target.cc \
    2324  VectorView.cc
     25
    2426
    2527include_classifierdir = $(includedir)/c++_tools/classifier
     
    4042  PolynomialKernelFunction.h \
    4143  SVM.h \
    42   VectorAbstract.h \
     44  Target.h \
    4345  VectorView.h
  • branches/peters_vector/lib/classifier/MatrixView.cc

    r455 r469  
    99
    1010
     11  MatrixView::MatrixView(const MatrixView& m,
     12                         const std::vector<size_t>& row,
     13                         const std::vector<size_t>& col)
     14  {
     15    for (size_t i=0; i<row.size(); i++)
     16      row_index_[i] = m.row_index_[row[i]];
     17    for (size_t i=0; i<col.size(); i++)
     18      column_index_[i] = m.column_index_[col[i]];
     19  }
     20   
     21
     22
     23  MatrixView::MatrixView(const MatrixView& m,
     24                         const std::vector<size_t>& index,
     25                         const bool row)
     26  {
     27    if (row){
     28      row_index_=index;
     29      column_index_= m.column_index_;
     30    }
     31    else{
     32      column_index_=index;
     33      row_index_= m.row_index_;
     34    }
     35  }   
     36
     37
    1138  MatrixView::MatrixView(const std::vector<size_t>& row,
    1239                         const std::vector<size_t>& col)
     
    1542  }
    1643
     44
     45
    1746  MatrixView::MatrixView(const std::vector<size_t>& index, bool row)
    1847  {
    19     if(row)
    20       row_index_=index;
    21     else
    22       column_index_=index;
    2348  }
    2449
  • branches/peters_vector/lib/classifier/MatrixView.h

    r455 r469  
    3737
    3838    ///
     39    ///
     40    ///
     41    MatrixView(const MatrixView&, const std::vector<size_t>& row,
     42               const std::vector<size_t>& col);
     43
     44    ///
     45    ///
     46    ///
     47    MatrixView(const MatrixView&, const std::vector<size_t>& index,
     48               const bool row);
     49
     50
     51    ///
    3952    /// Destructor
    4053    ///
    4154    virtual ~MatrixView() {};
    4255
     56    ///
     57    /// @return number of columns
     58    ///
    4359    inline size_t columns(void) const { return column_index_.size(); }
    4460
     61    ///
     62    /// @return number of rows
     63    ///
    4564    inline size_t rows(void) const { return row_index_.size(); }
    4665
     66    ///
     67    /// @return
     68    ///
    4769    virtual double operator()(const size_t row, const size_t column) const=0;
    4870
  • branches/peters_vector/lib/classifier/NCC.cc

    r456 r469  
    44
    55#include <c++_tools/classifier/DataView.h>
     6#include <c++_tools/classifier/Target.h>
    67#include <c++_tools/gslapi/vector.h>
    78#include <c++_tools/statistics/Averager.h>
     9#include <c++_tools/statistics/AveragerPairWeighted.h>
    810#include <c++_tools/utility/stl_utility.h>
    911
     
    1618namespace classifier {
    1719
    18   NCC::NCC(const DataView& data, const gslapi::vector& target)
     20  NCC::NCC(const DataView& data, const Target& target)
    1921  {
    20     gslapi::vector sorted_target(target);
     22    Target sorted_target(target);
    2123    sorted_target.sort(); // if targets contain NaN => infinite loop
    2224   
     
    5153  {
    5254    prediction=gslapi::vector(classes_.size());   
    53     std::vector<size_t> use;
     55    gslapi::vector w(input.size(),0);
    5456    for(size_t i=0; i<input.size(); i++)  // take care of missing values
    5557      if(!std::isnan(input(i)))
    56         use.push_back(i);
    57     for(size_t j=0; j<centroids_.columns(); j++)
    58       prediction(j)=score.score(input,gslapi::vector(centroids_,j,false),use);
     58        w(i)=1.0;
     59    statistics::AveragerPairWeighted ap;
     60    for(size_t j=0; j<centroids_.columns(); j++) {
     61      ap.add(input,gslapi::vector(centroids_,j,false),w, w);
     62      prediction(j)=ap.correlation();
     63      ap.reset();
     64    }
    5965  }
    6066
  • branches/peters_vector/lib/classifier/NCC.h

    r456 r469  
    44#define _theplu_classifier_ncc_
    55
    6 #include <c++_tools/classifier/DataView.h>
    76#include <c++_tools/gslapi/matrix.h>
    8 #include <c++_tools/statistics/Score.h>
    97
    108#include <map>
    119
    1210namespace theplu {
     11namespace gslapi {
     12  class vector;
     13}
     14namespace statistics {
     15  class Score;
     16}
     17
    1318namespace classifier { 
    1419
     20  class DataView;
    1521  class Score;
     22  class Target;
     23  class VectorView;
    1624
    1725  ///
     
    3240    /// input. Performs the training of the NCC.
    3341    ///
    34     NCC(const DataView&, const gslapi::vector&);
     42    NCC(const DataView&, const Target&);
    3543
    3644    ///
  • branches/peters_vector/lib/classifier/PolynomialKernelFunction.h

    r451 r469  
    1111
    1212namespace theplu {
    13 
    14 namespace gslapi {
    15   class vector;
    16 }
    17 
    1813namespace classifier {
    1914
  • branches/peters_vector/lib/classifier/SVM.cc

    r463 r469  
    137137  }
    138138
    139   SVM::SVM(const KernelView& kernel, const gslapi::vector& target)
     139  SVM::SVM(const KernelView& kernel, const Target& target)
    140140  : alpha_(target.size(),0),
    141141    bias_(0),
     
    149149    tolerance_(0.00000001)
    150150  {
    151     if (max_epochs_>ULONG_MAX)
    152       max_epochs_=ULONG_MAX;
    153151  }
    154152
  • branches/peters_vector/lib/classifier/SVM.h

    r461 r469  
    55
    66#include <c++_tools/classifier/KernelView.h>
     7#include <c++_tools/classifier/Target.h>
    78#include <c++_tools/gslapi/vector.h>
    89
     
    106107    /// the SVM is no longer defined.
    107108    ///
    108     SVM(const KernelView&, const gslapi::vector&);
     109    SVM(const KernelView&, const Target&);
    109110
    110111    ///
     
    209210    gslapi::vector output_;
    210211    Index sample_;
    211     const gslapi::vector& target_;
     212    const Target& target_;
    212213    bool trained_;
    213214    double tolerance_;
  • branches/peters_vector/lib/classifier/VectorAbstract.h

    r467 r469  
    3535
    3636
     37
    3738  }; 
    3839 
  • branches/peters_vector/lib/classifier/VectorView.cc

    r467 r469  
    77namespace classifier {
    88
    9   VectorView::VectorView(const gslapi::vector& vec)
    10     : VectorAbstract(), vector_(&vec)
    11   {
    12     for(size_t i=0;i<vec.size();i++)
    13       index_.push_back(i);
    14   }
    15  
    16   VectorView::VectorView(const gslapi::vector& vec,
    17                          const std::vector<size_t>& index)
    18     : VectorAbstract(), index_(index), vector_(&vec)
     9  VectorView::VectorView(const MatrixView& m, const size_t i, bool row=false)
     10    : VectorAbstract(), column_vector_(row), index_(i), matrix_(&m)
    1911  {
    2012  }
  • branches/peters_vector/lib/classifier/VectorView.h

    r467 r469  
    55
    66#include <c++_tools/classifier/VectorAbstract.h>
    7 #include <c++_tools/gslapi/vector.h>
    87
    98#include <vector>
     
    1110namespace theplu {
    1211namespace classifier { 
     12
     13  class MatrixView;
    1314
    1415  ///
     
    2122  public:
    2223
    23 
    2424    ///
    2525    /// Constructor
    2626    ///
    27     VectorView(const gslapi::vector&);
    28 
    29     ///
    30     /// Constructor taking the index vector as input.
    31     ///
    32     VectorView(const gslapi::vector&, const std::vector<size_t>&);
     27    VectorView(const MatrixView&, const size_t, const bool row);
    3328
    3429    ///
    35     /// Destructor
    3630    ///
    37     ~VectorView() {};
     31    ///
     32    size_t size(void) const;
    3833
    39     inline size_t size(void) const { return index_.size(); }
    40 
    41     inline const double& operator()(const size_t i) const
    42     { return (*vector_)(index_[i]); }
     34    ///
     35    ///
     36    ///
     37    const double& operator()(const size_t) const;
    4338
    4439  private:
    45     std::vector<size_t> index_;
    46     const gslapi::vector* vector_;
     40    VectorView();
     41
     42    const bool column_vector_;
     43    const size_t index_;
     44    const MatrixView* matrix_;
     45
    4746  }; 
    4847 
  • branches/peters_vector/lib/gslapi/vector.cc

    r439 r469  
    178178
    179179
    180   double vector::sum(void) const
     180  double VectorAbstract::sum(void) const
    181181  {
    182182    double sum = 0;
    183183    for (size_t i=0; i<size(); i++)
    184       sum += gsl_vector_get( v_, i );
     184      sum += (*this)(i);
    185185    return( sum );
    186186  } 
    187 
    188 
    189 
    190   double vector::operator*( const vector &other ) const
    191   {
    192     // Jari, check for gsl_support
    193     double res = 0.0;;
    194     for ( size_t i = 0; i < size(); ++i )
    195       res += other[i] * (*this)[i];
    196     return res;
    197   }
    198 
    199 
    200187
    201188  bool vector::operator==(const vector& a) const
     
    207194        return false;
    208195    return true;
     196  }
     197
     198
     199
     200  double VectorAbstract::operator*( const VectorAbstract &other ) const
     201  {
     202    double res = 0.0;;
     203    for ( size_t i = 0; i < size(); ++i )
     204      res += other(i) * (*this)(i);
     205    return res;
    209206  }
    210207
  • branches/peters_vector/lib/gslapi/vector.h

    r468 r469  
    44#define _theplu_gslapi_vector_
    55
    6 #include <c++_tools/classifier/VectorAbstract.h>
    76#include <c++_tools/utility/Exception.h>
    87
     
    4039  /// community.
    4140  ///
    42   class vector : public classifier::VectorAbstract
     41  class vector
    4342  {
    4443  public:
     
    337336    /// @return The sum.
    338337    ///
    339     // Jari, doxygen group as "Extends GSL".
    340338    double sum(void) const;
    341339
     
    390388    inline const double&
    391389    operator[](size_t i) const { return *gsl_vector_const_ptr(v_,i); }
    392 
    393     ///
    394     /// @return The dot product.
    395     ///
    396     double operator*( const vector &other ) const;
    397390
    398391    ///
     
    420413
    421414    ///
     415    /// @return The dot product.
     416    ///
     417    double operator*( const vector &other ) const;
     418
     419    ///
    422420    /// Addition and assign operator.
    423421    ///
  • branches/peters_vector/lib/statistics/AveragerPair.h

    r427 r469  
    1111
    1212namespace theplu{
    13 
    14   class gslapi::vector;
    15 
    1613namespace statistics{
    1714  ///
  • branches/peters_vector/lib/statistics/AveragerWeighted.h

    r420 r469  
    8888
    8989    ///
     90    /// The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2
     91    /// }{\sum w_i} \f$.
     92    ///
     93    /// @return The variance.
     94    ///
     95    inline double variance(void) const
     96    { return sum_xx_centered()/sum_w(); }
     97
     98    ///
    9099    /// The variance is calculated as \f$ \frac{\sum w_i}{\left(\sum
    91100    /// w_i\right)^2 - \sum w_i^2} \sum w_i (x_i - m)^2 \f$. This
     
    94103    /// @return The variance.
    95104    ///
    96     inline double variance(void) const
     105    inline double variance_unbiased(void) const
    97106    { return sum_ww() / (sum_w()*sum_w()-sum_ww()) * sum_xx_centered(); }
    98107
  • branches/peters_vector/lib/statistics/Fisher.cc

    r449 r469  
    44#include <c++_tools/statistics/Score.h>
    55#include <c++_tools/statistics/utility.h>
     6#include <c++_tools/classifier/Target.h>
    67
    78#include <gsl/gsl_cdf.h>
     
    1314 
    1415  Fisher::Fisher(bool b)
    15     : Score(b), a_(0), b_(0), c_(0), d_(0), cutoff_column_(0), cutoff_row_(0),
    16       oddsratio_(1.0)
     16    : Score(b), a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0), value_cutoff_(0)
    1717  {
    1818  }
     
    4444    // If a column sum or a row sum is zero, the table is nonsense
    4545    if ((a==0 || d==0) && (c==0 || b==0)){
    46       //Peter, should through exception
     46      //Peter, should throw exception
    4747      std::cerr << "Warning: Fisher: Table is not valid\n";
    4848      return oddsratio_ = 1.0;
     
    5959  {
    6060    double p=1;
    61     if (a_<minimum_size_ || b_<minimum_size_ ||
    62         c_<minimum_size_ || d_<minimum_size_){
     61    if ( ( a_<minimum_size_ || b_<minimum_size_ ||
     62           c_<minimum_size_ || d_<minimum_size_) && !weighted_ ){
    6363
    6464      p=p_value_exact();
     
    7171      if (oddsratio_<0.5){
    7272        // last term because >= not equal to !(<=)
    73         return 1-p+gsl_ran_hypergeometric_pdf(a_, a_+b_, c_+d_, a_+c_);
     73        u_int a = static_cast<u_int>(a_);
     74        u_int b = static_cast<u_int>(b_);
     75        u_int c = static_cast<u_int>(c_);
     76        u_int d = static_cast<u_int>(d_);
     77        return 1-p+gsl_ran_hypergeometric_pdf(a, a+b, c+d, a+c);
    7478      }
    7579    }
     
    8488
    8589  double Fisher::p_value_exact() const
    86   {
     90  {
     91    u_int a = static_cast<u_int>(a_);
     92    u_int b = static_cast<u_int>(b_);
     93    u_int c = static_cast<u_int>(c_);
     94    u_int d = static_cast<u_int>(d_);
     95   
    8796    // Since the calculation is symmetric and cdf_hypergeometric_P
    8897    // loops up to k we choose the smallest number to be k and mirror
    8998    // the matrix. This choice makes the p-value two-sided.
    90     if (a_<b_ && a_<c_ && a_<d_)
    91       return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);
    92     else if (b_<a_ && b_<c_ && b_<d_)
    93       return  statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
    94     else if (c_<a_ && c_<b_ && c_<d_)
    95       return  statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
    9699
    97     return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
     100    if (a<b && a<c && a<d)
     101      return statistics::cdf_hypergeometric_P(a,a+b,c+d,a+c);
     102    else if (b<a && b<c && b<d)
     103      return  statistics::cdf_hypergeometric_P(b,a+b,c+d,b+d);
     104    else if (c<a && c<b && c<d)
     105      return  statistics::cdf_hypergeometric_P(c,c+d,a+b,a+c);
     106
     107    return statistics::cdf_hypergeometric_P(d,c+d,a+b,b+d);
    98108
    99109  }
    100110
    101111
    102   double Fisher::score(const gslapi::vector& x,
    103                        const gslapi::vector& y,
    104                        const std::vector<size_t>& train_set)
     112    double Fisher::score(const classifier::Target& target,
     113                         const classifier::VectorAbstract& value)
    105114  {
    106     if (!train_set.size()){
    107       train_set_.resize(0);
    108       for (size_t i=0; i<target_.size(); i++)
    109         train_set_.push_back(i); 
    110     }
    111     else
    112       train_set_ = train_set;
     115    weighted_=false;
    113116    a_=b_=c_=d_=0;
    114     for (size_t i=0; i<train_set_.size(); i++)
    115       if (x(train_set_[i])<cutoff_column_)
    116         if (y(train_set_[i])<cutoff_row_)
     117    for (size_t i=0; i<target.size(); i++)
     118      if (class_one(target(i)))
     119        if (value(i)>value_cutoff_)
    117120          a_++;
    118121        else
    119122          c_++;
    120123      else
    121         if (y(train_set_[i])<cutoff_row_)
     124        if (value(i)>value_cutoff_)
    122125          b_++;
    123126        else
    124127          d_++;
    125128       
    126     // If a column sum or a row sum is zero, the table is nonsense
     129    // If a column sum or a row sum is zero, the table is non-sense
    127130    if ((a_==0 || d_==0) && (c_==0 || b_==0)){
    128131      std::cerr << "Warning: Fisher: Table is not valid\n";
    129       return 0;
     132      return 1;
    130133    }
    131134
     
    133136  }
    134137 
    135   double Fisher::score(const gslapi::vector& x,
    136                        const gslapi::vector& y,             
    137                        const gslapi::vector& w,
    138                        const std::vector<size_t>& train_set)
     138    double Fisher::score(const classifier::Target& target,
     139                         const classifier::VectorAbstract& value,
     140                         const classifier::VectorAbstract& weighted)
    139141  {
    140     if (!train_set.size()){
    141       train_set_.resize(0);
    142       for (size_t i=0; i<target_.size(); i++)
    143         train_set_.push_back(i); 
     142    weighted_=true;
     143    a_=b_=c_=d_=0;
     144    for (size_t i=0; i<target.size(); i++)
     145      if (class_one(target(i)))
     146        if (value(i)>value_cutoff_)
     147          a_+=value(i);
     148        else
     149          c_+=value(i);
     150      else
     151        if (value(i)>value_cutoff_)
     152          b_+=value(i);
     153        else
     154          d_+=value(i);
     155       
     156    // If a column sum or a row sum is zero, the table is non-sense
     157    if ((a_==0 || d_==0) && (c_==0 || b_==0)){
     158      std::cerr << "Warning: Fisher: Table is not valid\n";
     159      return 1;
    144160    }
    145     else
    146       train_set_ = train_set;
    147 
    148     double a=0;
    149     double b=0;
    150     double c=0;
    151     double d=0;
    152 
    153     for (size_t i=0; i<train_set_.size(); i++)
    154       if (x(train_set_[i]) < cutoff_column_)
    155         if (y(train_set_[i]) < cutoff_row_)
    156           a+=w(train_set_[i]);
    157         else
    158           c+=w(train_set_[i]);
    159       else
    160         if (y(train_set_[i]) < cutoff_column_)
    161           b+=w(train_set_[i]);
    162         else
    163           d+=w(train_set_[i]);
    164161
    165162    return oddsratio(a_,b_,c_,d_);
    166163  }
    167 
     164 
    168165  double Fisher::score(const u_int a, const u_int b,
    169166                       const u_int c, const u_int d)
  • branches/peters_vector/lib/statistics/Fisher.h

    r449 r469  
    6464    ///
    6565    /// Cutoff sets the limit whether a value should go into the left
    66     /// or the right column. @see score
    67     ///
    68     /// @return reference to cutoff for column
    69     ///
    70     inline double& cutoff_column(void) { return cutoff_column_; }
    71 
    72     ///
    73     /// Cutoff sets the limit whether a value should go into the left
    7466    /// or the right row. @see score
    7567    ///
    7668    /// @return reference to cutoff for row
    7769    ///
    78     inline double& cutoff_row(void) { return cutoff_row_; }
     70    inline double& value_cutoff(void) { return value_cutoff_; }
    7971
    8072    ///
     
    9991    /// @return p-value
    10092    ///
     93    /// @note in weighted case, approximation Chi2 is always used.
     94    ///
    10195    double p_value() const;
    10296   
     
    10498    /// Function calculating score from 2x2 table for which the
    10599    /// elements are calculated as follows \n
    106     /// a: #data \f$ x=1 \f$ AND \f$ y=1 \f$ \n
    107     /// b: #data \f$ x=-1 \f$ AND \f$ y=1 \f$ \n
    108     /// c: #data \f$ x=1 \f$ AND \f$ y=-1 \f$ \n
    109     /// d: #data \f$ x=-1 \f$ AND \f$ y=1 \f$ \n
     100    /// target=positive_label (a,c)
     101    /// \f$ value > \f$ value_cutoff() a,b \n
    110102    ///
    111103    /// @return odds ratio. If absolute_ is true and odds ratio is
    112104    /// less than unity 1 divided by odds ratio is returned
    113105    ///
    114     double score(const gslapi::vector& x, const gslapi::vector& y, 
    115                  const std::vector<size_t>& = std::vector<size_t>());
     106    double score(const classifier::Target& target,
     107                 const classifier::VectorAbstract& value);
    116108
    117109    ///
     
    122114    /// @return odds ratio
    123115    ///
    124     /// @note
     116    /// @see score
    125117    ///
    126     double score(const gslapi::vector& x, const gslapi::vector& y, 
    127                  const gslapi::vector& w,
    128                  const std::vector<size_t>& = std::vector<size_t>());
     118    double score(const classifier::Target& target,
     119                 const classifier::VectorAbstract& value,
     120                 const classifier::VectorAbstract& weight);
    129121
    130122    ///
     
    148140    double p_value_exact(void) const;
    149141
    150     std::vector<size_t> train_set_;
    151     gslapi::vector weight_;
    152     u_int a_;
    153     u_int b_;
    154     u_int c_;
    155     u_int d_;
    156     double cutoff_column_;
    157     double cutoff_row_;
     142    double a_;
     143    double b_;
     144    double c_;
     145    double d_;
    158146    u_int minimum_size_;
    159147    double oddsratio_;
     148    double value_cutoff_;
    160149  };
    161150
  • branches/peters_vector/lib/statistics/FoldChange.cc

    r414 r469  
    11// $Id$
    22
    3 #include "FoldChange.h"
    4 
    5 #include "Score.h"
    6 #include "Averager.h"
    7 #include "AveragerWeighted.h"
     3#include <c++_tools/statistics/FoldChange.h>
     4#include <c++_tools/statistics/Score.h>
     5#include <c++_tools/statistics/Averager.h>
     6#include <c++_tools/statistics/AveragerWeighted.h>
     7#include <c++_tools/classifier/Target.h>
    88
    99namespace theplu {
     
    3131  }
    3232
    33   double FoldChange::score(const gslapi::vector& target,
    34                            const gslapi::vector& value,
    35                            const std::vector<size_t>& train_set)
     33  double FoldChange::score(const classifier::Target& target,
     34                           const classifier::VectorAbstract& value)
    3635  {
    37     if (!train_set_.size())
    38       for (size_t i=0; i<target_.size(); i++)
    39         train_set_.push_back(i); 
    40     else
    41       train_set_=train_set;
    42 
    4336    weighted_=false;
    4437    Averager pos;
    4538    Averager neg;
    4639
    47     for (size_t i=0; i<train_set.size(); i++)
    48       if (target(train_set[i])==1)
    49         pos.add(value(train_set[i]));
     40    for (size_t i=0; i<value.size(); i++)
     41      if (class_one(target(i)))
     42        pos.add(value(i));
    5043      else
    51         neg.add(value(train_set[i]));
     44        neg.add(value(i));
    5245         
    5346    if (absolute_)
     
    5649  }
    5750
    58   double FoldChange::score(const gslapi::vector& target,
    59                            const gslapi::vector& value,
    60                            const gslapi::vector& weight,
    61                            const std::vector<size_t>& train_set)
     51  double FoldChange::score(const classifier::Target& target,
     52                           const classifier::VectorAbstract& value,
     53                           const classifier::VectorAbstract& weight)
    6254  {
    63     if (!train_set_.size())
    64       for (size_t i=0; i<target_.size(); i++)
    65         train_set_.push_back(i); 
    66     else
    67       train_set_=train_set;
    68 
    6955    weighted_=true;
    7056    AveragerWeighted pos;
    7157    AveragerWeighted neg;
    7258
    73     for (size_t i=0; i<train_set.size(); i++)
    74       if (target(train_set[i])==1)
    75         pos.add(value(train_set[i]),weight(train_set[i]));
     59    for (size_t i=0; i<value.size(); i++)
     60      if (class_one(target(i)))
     61        pos.add(value(i),weight(i));
    7662      else
    77         neg.add(value(train_set[i]),weight(train_set[i]));
     63        neg.add(value(i),weight(i));
    7864         
    7965    if (absolute_)
  • branches/peters_vector/lib/statistics/FoldChange.h

    r414 r469  
    77
    88namespace theplu {
     9
     10  class classifier::VectorAbstract;
     11
     12
    913namespace statistics {
    1014
     
    2832    /// @param target is +1 or -1
    2933    /// @param value vector of the values
    30     /// @train_set defining which values to use (number of values used
    31     /// in the calculation is equal to size of \a train_set)
    3234    ///
    33     double
    34     score(const gslapi::vector& target,
    35           const gslapi::vector& value,
    36           const std::vector<size_t>& train_set = std::vector<size_t>());
     35    double score(const classifier::Target& target,
     36                 const classifier::VectorAbstract& value);
    3737 
    3838    ///
     
    4545    /// in the calculation is equal to size of \a train_set)
    4646    ///
    47     double
    48     score(const gslapi::vector& target,
    49           const gslapi::vector& value,
    50           const gslapi::vector& weight,
    51           const std::vector<size_t>& train_set = std::vector<size_t>());
     47    double score(const classifier::Target& target,
     48                 const classifier::VectorAbstract& value,
     49                 const classifier::VectorAbstract& weight);
    5250 
    5351  private:
  • branches/peters_vector/lib/statistics/Makefile.am

    r461 r469  
    88noinst_LTLIBRARIES = libstatistics.la
    99libstatistics_la_SOURCES = \
    10   Averager.cc AveragerPair.cc AveragerWeighted.cc Fisher.cc FoldChange.cc \
     10  Averager.cc AveragerPair.cc AveragerWeighted.cc \
     11  AveragerPairWeighted.cc \
     12  Fisher.cc FoldChange.cc \
    1113  Histogram.cc Kernel.cc KernelBox.cc KernelTriCube.cc Linear.cc \
    1214  LinearWeighted.cc Local.cc \
     
    1820
    1921include_statistics_HEADERS = \
    20   Averager.h AveragerPair.h AveragerWeighted.h Fisher.h FoldChange.h \
     22  Averager.h AveragerPair.h AveragerWeighted.h \
     23  AveragerPairWeighted.h \
     24  Fisher.h FoldChange.h \
    2125  Histogram.h Kernel.h KernelBox.h KernelTriCube.h Linear.h LinearWeighted.h \
    2226  Local.h MultiDimensional.h Naive.h NaiveWeighted.h OneDimensional.h \
  • branches/peters_vector/lib/statistics/Pearson.cc

    r295 r469  
    33
    44#include <c++_tools/statistics/Pearson.h>
    5 
    65#include <c++_tools/statistics/AveragerPair.h>
     6#include <c++_tools/statistics/AveragerPairWeighted.h>
    77#include <c++_tools/gslapi/vector.h>
     8#include <c++_tools/classifier/Target.h>
    89
    910#include <cmath>
     
    1920  }
    2021
    21   void Pearson::centralize(gslapi::vector& x, const gslapi::vector& w)
    22   {
    23     double m = (x*w)/w.sum();
    24     for (size_t i=0; i<x.size(); i++)
    25       x(i)=x(i)-m;
    26   }
    27 
    2822  double Pearson::p_value() const
    2923  {
    3024    if(weighted_)
    3125      return 1;
    32     else if(nof_samples_<3){
     26    if(nof_samples_<2){
    3327      std::cerr << "Warning: Only " << nof_samples_ << "samples. "
    3428                << "Need at lest 3.\n";
    3529      return 1;
    3630    }
    37     else{
    38       double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_);
    39       return 2*gsl_cdf_tdist_Q (t, nof_samples_ -2 );
    40     }
     31
     32    double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_);
     33    double p = gsl_cdf_tdist_Q(t, nof_samples_ -2 );
     34    if (absolute_)
     35      return 2*p;
     36    if (r_<0)
     37      return 1-p;
     38    return p;
     39
    4140  }
    4241
    43   double Pearson::score(const gslapi::vector& vec1, const gslapi::vector& vec2,
    44                         const std::vector<size_t>& train_set)
     42  double Pearson::score(const classifier::Target& target,
     43                        const classifier::VectorAbstract& value)
    4544  {
    4645    weighted_=false;
    4746    AveragerPair ap;
    48     if (!train_set.size()){
    49       ap = AveragerPair(vec1.sum(),vec1*vec1,vec2.sum(),vec2*vec2,
    50                         vec1*vec2,vec1.size());
    51       nof_samples_ = vec1.size();
    52     }
    53     else{
    54       for (size_t i=0; i<train_set.size(); i++){
    55         ap.add(vec1(train_set[i]), vec2(train_set[i]));
    56       }
    57       nof_samples_ = train_set.size();
     47    for (size_t i=0; i<target.size(); i++){
     48      if (class_one(target(i)))
     49        ap.add(1, value(i));
     50      else
     51        ap.add(-1, value(i));
     52      nof_samples_ = target.size();
    5853    }
    5954    r_ = ap.correlation();
    6055    if (r_<0 && absolute_)
    61       r_=-r_;
     56      return -r_;
    6257     
    6358    return r_;
    6459  }
    6560   
    66   double Pearson::score(const gslapi::vector& vec1, const gslapi::vector& vec2,
    67                         const gslapi::vector& w1, const gslapi::vector& w2,
    68                         const std::vector<size_t>& train_set =
    69                         std::vector<size_t>())
     61  double Pearson::score(const classifier::Target& target,
     62                        const classifier::VectorAbstract& value,
     63                        const classifier::VectorAbstract& weight)
    7064  {
    7165    weighted_=true;
    72     gslapi::vector x;
    73     gslapi::vector y;
    74     gslapi::vector wx;
    75     gslapi::vector wy;
    76     if (!train_set.size()){
    77       x = vec1;
    78       y = vec2;
    79       wx = w1;
    80       wy = w2;
    81       nof_samples_ = vec1.size();
     66    AveragerPairWeighted ap;
     67    for (size_t i=0; i<target.size(); i++){
     68      if (class_one(target(i)))
     69        ap.add(1, value(i),1,weight(i));
     70      else
     71        ap.add(-1, value(i),1,weight(i));
     72      nof_samples_ = target.size();
    8273    }
    83     else{
    84       nof_samples_ = train_set.size();
    85       x = gslapi::vector(nof_samples_);
    86       y = gslapi::vector(nof_samples_);
    87       wx = gslapi::vector(nof_samples_);
    88       wy = gslapi::vector(nof_samples_);
    89       for (size_t i=0; i<train_set.size(); i++){
    90         x(i)=vec1(train_set[i]);
    91         y(i)=vec2(train_set[i]);
    92         wx(i)=w1(train_set[i]);
    93         wy(i)=w2(train_set[i]);
    94       }
    95     }
    96 
    97     centralize(x,wx);
    98     centralize(y,wy);
    99 
    100     x.mul(wx);
    101     y.mul(wy);
    102 
    103     r_ = ( (x*y) / sqrt( (x*x)*(y*y)) );
    104     weighted_=true;
    105 
     74    r_ = ap.correlation();
    10675    if (r_<0 && absolute_)
    107       r_=-r_;
     76      return -r_;
    10877     
    10978    return r_;
  • branches/peters_vector/lib/statistics/Pearson.h

    r414 r469  
    77
    88namespace theplu {
    9   class gslapi::vector;
     9namespace classifier {
     10  class VectorAbstract;
     11}
    1012
    1113namespace statistics { 
     
    3638    /// of Pearson is used.
    3739    ///
    38     double score(const gslapi::vector&, const gslapi::vector&, 
    39                  const std::vector<size_t>& = std::vector<size_t>());
     40    double score(const classifier::Target& target,
     41                 const classifier::VectorAbstract& value);
    4042
    4143    ///
     
    4850    /// correlation.
    4951    ///
    50     inline double score(const gslapi::vector& x, const gslapi::vector& y,
    51                         const gslapi::vector& w,
    52                         const std::vector<size_t>& train_set =
    53                         std::vector<size_t>())
    54     { return score(x,y,w,w,train_set); }
     52    double score(const classifier::Target& target,
     53                 const classifier::VectorAbstract& value,
     54                 const classifier::VectorAbstract& weight);
    5555
    56     ///
    57     /// \f$ \frac{\vert \sum_iw^x_iw^y_i(x_i-m_x)(y_i-m_y)\vert }
    58     /// {\sqrt{\sum_iw^x_iw^y_i(x_i-m_x)^2 ///
    59     /// \sum_iw^x_iw^y_i(y_i-m_y)^2}}\f$, where \f$m_x = \frac{\sum
    60     /// w^x_iw^y_ix_i}{\sum w^x_iw^y_i}\f$ and \f$m_y = \frac{\sum
    61     /// w_ix_i}{\sum w_i}\f$. This expression is chosen to get a
    62     /// correlation equal to unity when \a x and \a y are
    63     /// equal. @return absolute value of weighted version of Pearson
    64     /// correlation.
    65     ///
    66     double score(const gslapi::vector& x, const gslapi::vector& y, 
    67                  const gslapi::vector& wx, const gslapi::vector& wy,
    68                  const std::vector<size_t>&);
    69    
    7056    ///
    7157    /// The p-value is the probability of getting a correlation as
     
    8268
    8369
    84     void centralize(gslapi::vector&, const gslapi::vector&);
     70    //    void centralize(gslapi::vector&, const gslapi::vector&);
    8571  };
    8672
  • branches/peters_vector/lib/statistics/ROC.cc

    r303 r469  
    44
    55#include <c++_tools/utility/stl_utility.h>
    6 #include <c++_tools/gslapi/vector.h>
     6#include <c++_tools/classifier/VectorAbstract.h>
    77
    88#include <gsl/gsl_cdf.h>
     
    1515
    1616namespace theplu {
     17  class classifier::VectorAbstract;
    1718namespace statistics { 
    1819
    1920  ROC::ROC(bool b)
    20     : Score(b), area_(-1), minimum_size_(10), nof_pos_(0), 
    21       value_(std::vector<std::pair<double, double> >())
    22      
    23        
     21    : Score(b), area_(0.5), minimum_size_(10), nof_pos_(0) 
    2422  {
    2523  }
     
    3028    // Not integrating from the middle of the bin, but from the inner edge.
    3129    if (x>0)
    32       x -= 0.5/nof_pos_/(value_.size()-nof_pos_);
     30      x -= 0.5/nof_pos_/(n()-nof_pos_);
    3331    else if(x<0)
    34       x += 0.5/nof_pos_/(value_.size()-nof_pos_);
     32      x += 0.5/nof_pos_/(n()-nof_pos_);
    3533
    36     double sigma = (std::sqrt( (value_.size()-nof_pos_)*nof_pos_*
    37                                (value_.size()+1.0)/12 ) /
    38                     ( value_.size() - nof_pos_ ) / nof_pos_ );
     34    double sigma = (std::sqrt( (n()-nof_pos_)*nof_pos_*
     35                               (n()+1.0)/12 ) /
     36                    ( n() - nof_pos_ ) / nof_pos_ );
    3937    double p = gsl_cdf_gaussian_Q(x, sigma);
    4038   
     
    6260    if (weighted_)
    6361      return 1.0;
    64     else if (nof_pos_ < minimum_size_ & value_.size()-nof_pos_ < minimum_size_)
    65       return get_p_exact(area_*nof_pos_*(value_.size()-nof_pos_),
    66                           nof_pos_, value_.size()-nof_pos_);
     62    else if (nof_pos_ < minimum_size_ & n()-nof_pos_ < minimum_size_)
     63      return get_p_exact(area_*nof_pos_*(n()-nof_pos_),
     64                          nof_pos_, n()-nof_pos_);
    6765    else
    6866      return get_p_approx(area_);
     
    7068  }
    7169
    72   double ROC::score(const gslapi::vector& target, const gslapi::vector& data,
    73                     const std::vector<size_t>& train_set)
     70  double ROC::score(const classifier::Target& target,
     71                    const classifier::VectorAbstract& value)
    7472  {
    7573    weighted_=false;
    76     target_ = target;
    77     data_ = data;
    78     if (!train_set.size()){
    79       train_set_.resize(0);
    80       for (size_t i=0; i<target_.size(); i++)
    81         train_set_.push_back(i); 
     74
     75    vec_pair_.clear();
     76    vec_pair_.reserve(target.size());
     77    for (unsigned int i=0; i<target.size(); i++)
     78      vec_pair_.push_back(std::make_pair(target(i),value(i)));
     79    std::sort(vec_pair_.begin(),vec_pair_.end(),
     80              utility::pair_value_compare<int, double>());
     81
     82    area_ = 0;
     83    nof_pos_=0;
     84    for (size_t i=0; i<n(); i++){
     85      if (class_one(target(i))){
     86        area_+=i;
     87        nof_pos_++;
     88      }
    8289    }
    83     else
    84       train_set_ = train_set;
    85     sort();
    86     area_ = 0;
    87     for (size_t i=0; i<value_.size(); i++)
    88       if (value_[i].first==1)
    89         area_+=i;
    9090
    9191    // Normalizing the area to [0,1]
    9292    area_ = ( (area_-nof_pos_*(nof_pos_-1)/2 ) /
    93               (nof_pos_*(value_.size()-nof_pos_)) );
     93              (nof_pos_*(n()-nof_pos_)) );
    9494   
    9595    //Returning score larger 0.5 that you get by random
     
    100100  }
    101101
    102   double ROC::score(const gslapi::vector& target, const gslapi::vector& data,
    103                     const gslapi::vector& weight,
    104                     const std::vector<size_t>& train_set)
     102  double ROC::score(const classifier::Target& target,
     103                    const classifier::VectorAbstract& value,
     104                    const classifier::VectorAbstract& weight)
    105105  {
    106106    weighted_=true;
    107     target_ = target;
    108     data_ = data;
    109     weight_=weight;
    110     if (!train_set.size()){
    111       train_set_.resize(0);
    112       for (size_t i=0; i<target_.size(); i++)
    113         train_set_.push_back(i); 
    114     }
    115     else
    116       train_set_ = train_set;
    117     sort();
     107
     108    vec_pair_.clear();
     109    vec_pair_.reserve(target.size());
     110    for (unsigned int i=0; i<target.size(); i++)
     111      vec_pair_.push_back(std::make_pair(target(i),value(i)));
     112    std::sort(vec_pair_.begin(),vec_pair_.end(),
     113              utility::pair_value_compare<int, double>());
     114
    118115    area_=0;
     116    nof_pos_=0;
    119117    double max_area=0;
    120     //Peter, use the sort to skip some ifs and loops
    121     for (size_t i=0; i<value_.size(); i++){
    122       if (target_(train_set_[i])==1){
    123         for (size_t j=0; j<value_.size(); j++){
    124           if (target_(train_set_[j])!=1){
    125             if (data_(train_set_[i]) > data_(train_set_[j])){
    126               area_+=weight_(train_set_[i])*weight_(train_set_[j]);
     118
     119    for (size_t i=0; i<n(); i++){
     120      if (class_one(target(i))){
     121        for (size_t j=0; j<n(); j++){
     122          if (!class_one(target(j))){
     123            if (value(i) > value(j)){
     124              area_+=weight(i)*weight(j);
    127125            }
    128             max_area+=weight_(train_set_[i])*weight_(train_set_[j]);
     126            max_area+=weight(i)*weight(j);
    129127          }
    130128        }
     
    139137  }
    140138
    141   void ROC::sort()
     139  int ROC::target(const size_t i) const
    142140  {
    143     value_.resize(0);
    144     nof_pos_=0;
    145     for (unsigned int i=0; i<train_set_.size(); i++){
    146       std::pair<double, double> tmp(target_(train_set_[i]),
    147                                     data_(train_set_[i]));
    148       value_.push_back(tmp);
    149       if (target_(train_set_[i])==1)
    150         nof_pos_++;
    151     }
    152     std::sort(value_.begin(),value_.end(),
    153          utility::pair_value_compare<double, double>());
    154   }
    155 
    156   gslapi::vector ROC::target(void) const
    157   {
    158     gslapi::vector target(value_.size());
    159     for(size_t i=0; i<target.size(); ++i)
    160       target(i) = value_[i].first;
    161     return target;
     141    return vec_pair_[i].first;
    162142  }
    163143
     
    168148    double sens = 1;
    169149    double spec = 0;
    170     gslapi::vector target = r.target();
    171     size_t n = target.size();
    172     double nof_pos = (target.sum()+n)/2;
     150    size_t n = r.n();
     151    double nof_pos = r.n_pos();
    173152    for(size_t i=0; i<n-1; ++i) {
    174153      s << sens << "\t";
    175154      s << spec << "\n";
    176       if (target(i)==1)
     155      if (r.class_one(r.target(i)))
    177156        spec -= 1/(n-nof_pos);
    178157      else
  • branches/peters_vector/lib/statistics/ROC.h

    r465 r469  
    44#define _theplu_statistics_roc_
    55
    6 #include <c++_tools/gslapi/vector.h>
     6#include <c++_tools/classifier/Target.h>
    77#include <c++_tools/statistics/Score.h>
    88
     
    3232    /// Function taking \a value, \a target (+1 or -1) and vector
    3333    /// defining what samples to use. The score is equivalent to
    34     /// Mann-Whitney statistics. If target is equal to 1, sample
    35     /// belonges to class + otherwise sample belongs to class
    36     /// -. @return the area under the ROC curve. If the area is less
     34    /// Mann-Whitney statistics.
     35    /// @return the area under the ROC curve. If the area is less
    3736    /// than 0.5 and absolute=true, 1-area is returned. Complexity is
    3837    /// \f$ N\log N \f$ where \f$ N \f$ is number of samples.
    3938    ///
    40     double score(const gslapi::vector& target, const gslapi::vector& value,
    41                  const std::vector<size_t>& = std::vector<size_t>());
     39    double score(const classifier::Target& target,
     40                 const classifier::VectorAbstract& value);
    4241   
    4342    /// Function taking values, target, weight and a vector defining
     
    5251    /// of samples.
    5352    ///
    54     double score(const gslapi::vector& target, const gslapi::vector& value,
    55                  const gslapi::vector& weight, 
    56                  const std::vector<size_t>& = std::vector<size_t>());
     53    double score(const classifier::Target& target,
     54                 const classifier::VectorAbstract& value,
     55                 const classifier::VectorAbstract& weight);
    5756       
    5857
     
    7170    ///
    7271    double p_value(void) const;
    73          
    74     ///
    75     /// @return the targets in train_set sorted with respect to the
    76     /// corresponding data
    77     ///
    78     gslapi::vector target(void) const;
    79 
     72   
    8073    ///
    8174    /// minimum_size is the threshold for when a normal
     
    8679    inline u_int& minimum_size(void){ return minimum_size_; } 
    8780
    88   private:
    89     double area_;
    90     u_int minimum_size_;
    91     u_int nof_pos_;
    92     /// pair of target and data. should always be sorted with respect to
    93     /// data.
    94     std::vector<std::pair<double, double> > value_;
    95    
     81    int target(const size_t i) const;
     82    inline size_t n(void) const { return vec_pair_.size(); }
     83    inline size_t n_pos(void) const { return nof_pos_; }
     84
     85  private:
     86   
    9687    /// Implemented as in MatLab 13.1
    9788    double get_p_approx(const double) const;
     
    10091    double get_p_exact(const double, const double, const double) const;
    10192
    102     /// sorting value_, should always be done when changing train_set_
    103     void sort(void);
    104        
     93    double area_;
     94    u_int minimum_size_;
     95    u_int nof_pos_;
     96    std::vector<std::pair<int, double> > vec_pair_;
    10597  };
    10698
  • branches/peters_vector/lib/statistics/Score.cc

    r303 r469  
    77
    88  Score::Score(bool absolute)
    9     : absolute_(absolute), train_set_(std::vector<size_t>()), weighted_(true)
     9    : absolute_(absolute), positive_label_(1)
    1010  {
    1111  }
  • branches/peters_vector/lib/statistics/Score.h

    r428 r469  
    44#define _theplu_statistics_score_
    55
    6 #include <c++_tools/gslapi/vector.h>
     6namespace theplu {
     7namespace classifier {
     8  class Target;
     9  class VectorAbstract;
     10}
    711
    8 namespace theplu {
    912namespace statistics {
    1013
     
    3235
    3336    ///
     37    /// Targets with this label are considered to be in positive
     38    /// group. All others are considered to be in negative
     39    /// group. Default is 1.
     40    ///
     41    /// @return label for positive class
     42    ///
     43    inline int& positive_label(void) { return positive_label_; }
     44
     45    ///
    3446    /// Function calculating the score. In absolute mode, also the
    3547    /// score using negated class labels is calculated, and the
     
    4153    /// @param target vector of targets (most often +1 -1)
    4254    /// @param value vector of the values
    43     /// @train_set defining which values to use (number of values used
    44     /// in the calculation is equal to size of \a train_set)
    4555    ///
    4656    virtual double
    47     score(const gslapi::vector& target,
    48           const gslapi::vector& value,
    49           const std::vector<size_t>& train_set = std::vector<size_t>()) = 0;
     57    score(const classifier::Target& target,
     58          const classifier::VectorAbstract& value) = 0;
    5059 
    5160    ///
     
    6574    ///
    6675    virtual double
    67     score(const gslapi::vector& target,
    68           const gslapi::vector& value,
    69           const gslapi::vector& weight,
    70           const std::vector<size_t>& = std::vector<size_t>()) = 0;
     76    score(const classifier::Target& target,
     77          const classifier::VectorAbstract& value,
     78          const classifier::VectorAbstract& weight) = 0;
    7179
    72     ///
    73     /// @return the one-sided p-value( if absolute true is used
    74     /// this is equivalent to the two-sided p-value.)
    75     ///
    76     /// virtual double p_value(void) const = 0;
    77  
    78    
     80    inline bool class_one(int i) const { return i==positive_label_; }
    7981   
    8082  protected:
     83    inline bool weighted(void) const { return weighted_; }
     84
    8185    bool absolute_;
    82     gslapi::vector data_;   
    83     gslapi::vector target_;
    84     std::vector<size_t> train_set_;
    85     gslapi::vector weight_;
     86    int positive_label_;
    8687    bool weighted_;
    8788
  • branches/peters_vector/lib/statistics/WilcoxonFoldChange.cc

    r465 r469  
    22
    33#include <c++_tools/statistics/WilcoxonFoldChange.h>
     4#include <c++_tools/classifier/VectorAbstract.h>
    45#include <c++_tools/statistics/utility.h>
     6#include <c++_tools/classifier/Target.h>
    57
    68#include <cmath>
     9#include <vector>
    710
    811namespace theplu {
     
    1720
    1821
    19   double WilcoxonFoldChange::score(const gslapi::vector& target,
    20                                    const gslapi::vector& value,
    21                                    const std::vector<size_t>& train_set)
     22  double WilcoxonFoldChange::score(const classifier::Target& target,
     23                                   const classifier::VectorAbstract& value)
    2224  {
    23     if (!train_set_.size())
    24       for (size_t i=0; i<target_.size(); i++)
    25         train_set_.push_back(i); 
    26     else
    27       train_set_=train_set;
    28 
    2925    std::vector<double> distance;
    3026    //Peter, should reserve the vector to avoid reallocations   
    3127    weighted_=false;
    32     for (size_t i=0; i<train_set_.size(); i++) {
     28    for (size_t i=0; i<target.size(); i++) {
    3329      for (size_t j=0; i<j; j++) {
    34         if (target(train_set_[i])==target(train_set_[j])) continue;
    35         distance.push_back(value(train_set_[i])-value(train_set_[j]));
     30        if (target(i)==target(j)) continue;
     31        distance.push_back(value(i)-value(j));
    3632      }
    3733    }
     
    4137  }
    4238
    43   double WilcoxonFoldChange::score(const gslapi::vector& target,
    44                                    const gslapi::vector& value,
    45                                    const gslapi::vector& weight,
    46                                    const std::vector<size_t>& train_set)
     39  double WilcoxonFoldChange::score(const classifier::Target& target,
     40                                   const classifier::VectorAbstract& value,
     41                                   const classifier::VectorAbstract& weight)
    4742  {
    4843    return 0;
  • branches/peters_vector/lib/statistics/WilcoxonFoldChange.h

    r465 r469  
    2929    /// in the calculation is equal to size of \a train_set)
    3030    ///
    31     double
    32     score(const gslapi::vector& target,
    33           const gslapi::vector& value,
    34           const std::vector<size_t>& train_set = std::vector<size_t>());
     31    double score(const classifier::Target& target,
     32                 const classifier::VectorAbstract& value);
    3533 
    36     /// @todo
     34    ///
    3735    /// @return difference of the weighted means of the two classes
    3836    ///
    39     /// @param target is +1 or -1
    4037    /// @param value vector of the values
    4138    /// @param weight vector of accompanied weight to the values
     
    4542    /// @note not implemented
    4643    ///
    47     double
    48     score(const gslapi::vector& target,
    49           const gslapi::vector& value,
    50           const gslapi::vector& weight,
    51           const std::vector<size_t>& train_set = std::vector<size_t>());
     44    double score(const classifier::Target& target,
     45                 const classifier::VectorAbstract& value,
     46                 const classifier::VectorAbstract& weight);
    5247 
    5348  private:
  • branches/peters_vector/lib/statistics/tScore.cc

    r414 r469  
    77#include <c++_tools/statistics/tScore.h>
    88#include <c++_tools/statistics/Averager.h>
    9 #include <c++_tools/gslapi/vector.h>
    109#include <c++_tools/statistics/AveragerWeighted.h>
     10#include <c++_tools/classifier/Target.h>
     11#include <c++_tools/classifier/VectorAbstract.h>
    1112
    1213namespace theplu {
     
    1415
    1516  tScore::tScore(bool b)
    16     : Score(b),  t_(0), train_set_(), weight_()
     17    : Score(b),  t_(0)
    1718  {
    1819  }
    1920
    20   double tScore::score(const gslapi::vector& target,
    21                        const gslapi::vector& data,
    22                        const std::vector<size_t>& train_set)
     21  double tScore::score(const classifier::Target& target,
     22                       const classifier::VectorAbstract& value)
    2323  {
    2424    weighted_=false;
    25     if (!train_set_.size())
    26       for (size_t i=0; i<target_.size(); i++)
    27         train_set_.push_back(i); 
    28     else
    29       train_set_=train_set;
    30     target_ = target;
    31     data_ = data;
    32     weight_ = gslapi::vector(target.size(),1);
    3325    statistics::Averager positive;
    3426    statistics::Averager negative;
    35     for(size_t i=0; i<train_set_.size(); i++){
    36       if (target_[train_set_[i]]==1)
    37         positive.add(data_[train_set_[i]]);
     27    dof_=target.size()-2;
     28    for(size_t i=0; i<target.size(); i++){
     29      if (class_one(target(i)))
     30        positive.add(value(i));
    3831      else
    39         negative.add(data_[train_set_[i]]);
     32        negative.add(value(i));
    4033    }
    4134    double diff = positive.mean() - negative.mean();
     
    4942  }
    5043
    51   double tScore::score(const gslapi::vector& target,
    52                        const gslapi::vector& data,
    53                        const gslapi::vector& weight,
    54                        const std::vector<size_t>& train_set)
     44  double tScore::score(const classifier::Target& target,
     45                       const classifier::VectorAbstract& value,
     46                       const classifier::VectorAbstract& weight)
    5547  {
    5648    weighted_=true;
    57     if (!train_set_.size())
    58       for (size_t i=0; i<target_.size(); i++)
    59         train_set_.push_back(i); 
    60     else
    61       train_set_=train_set;
    62     target_ = target;
    63     weight_ = weight;
     49
    6450    statistics::AveragerWeighted positive;
    6551    statistics::AveragerWeighted negative;
    66     for(size_t i=0; i<train_set_.size(); i++){
    67       if (target_[train_set_[i]]==1)
    68         positive.add(data_(train_set_[i]),weight_(train_set_[i]));
     52    dof_=target.size()-2;
     53    for(size_t i=0; i<target.size(); i++){
     54      if (class_one(target(i)))
     55        positive.add(value(i),weight(i));
    6956      else
    70         negative.add(data_(train_set_[i]),weight_(train_set_[i]));
     57        negative.add(value(i),weight(i));
    7158    }
    7259    double diff = positive.mean() - negative.mean();
     
    8269  double tScore::p_value(void) const
    8370  {
    84     double dof = target_.size()-2;
    85     double p = gsl_cdf_tdist_Q(t_, dof);
    86     return (dof > 0 && !weighted_) ? p : 1;
     71    double p = gsl_cdf_tdist_Q(t_, dof_);
     72    return (dof_ > 0 && !weighted_) ? p : 1;
    8773  }
    8874
  • branches/peters_vector/lib/statistics/tScore.h

    r414 r469  
    3737    /// absolute value of t-score is returned
    3838    ///
    39     double score(const gslapi::vector&, const gslapi::vector&, 
    40                  const std::vector<size_t>& = std::vector<size_t>());
     39    double score(const classifier::Target& target,
     40                 const classifier::VectorAbstract& value);
    4141
    4242    ///
     
    4444    /// absolute value of t-score is returned
    4545    ///
    46     double score(const gslapi::vector&, const gslapi::vector&, 
    47                  const gslapi::vector&,
    48                  const std::vector<size_t>& = std::vector<size_t>());
     46    double score(const classifier::Target& target,
     47                 const classifier::VectorAbstract& value,
     48                 const classifier::VectorAbstract& weight);
    4949
    5050    ///
     
    6161  private:
    6262    double t_;
    63     gslapi::vector target_;
    64     std::vector<size_t> train_set_;
    65     gslapi::vector value_;
    66     gslapi::vector weight_;
     63    int dof_;
    6764       
    6865  };
  • branches/peters_vector/lib/utility/stl_utility.h

    r437 r469  
    4141  template <class T1,class T2>
    4242  struct pair_value_compare
     43  {
     44    ///
     45    /// @return true if x.second<y.second or (x.second==y.second and
     46    /// x.first<y.first)
     47    ///
     48    inline bool operator()(const std::pair<T1,T2>& x,
     49                           const std::pair<T1,T2>& y) {
     50      return ((x.second<y.second) ||
     51              (!(y.second<x.second) && (x.first<y.first)));
     52    }
     53  };
     54
     55  ///
     56  ///
     57  template <class T1,class T2>
     58  struct pointer_compare
    4359  {
    4460    ///
Note: See TracChangeset for help on using the changeset viewer.