Changeset 475


Ignore:
Timestamp:
Dec 22, 2005, 4:03:51 PM (16 years ago)
Author:
Peter
Message:

I dont know what happened, but everything is changed...

Location:
trunk
Files:
5 deleted
40 edited
12 copied

Legend:

Unmodified
Added
Removed
  • trunk/lib/classifier/ConsensusInputRanker.cc

    r464 r475  
    44#include <c++_tools/classifier/ConsensusInputRanker.h>
    55
     6#include <c++_tools/classifier/CrossSplitting.h>
     7#include <c++_tools/classifier/MatrixLookup.h>
     8#include <c++_tools/classifier/InputRanker.h>
     9#include <c++_tools/classifier/Target.h>
     10#include <c++_tools/statistics/utility.h>
     11#include <c++_tools/utility/stl_utility.h>
    612#include <c++_tools/gslapi/matrix.h>
    7 #include <c++_tools/statistics/utility.h>
    8 #include <c++_tools/classifier/CrossSplitting.h>
    9 #include <c++_tools/classifier/InputRanker.h>
    10 #include <c++_tools/utility/stl_utility.h>
    1113
    1214#include <utility>
    1315#include <vector>
     16#include <iostream>
    1417
    1518namespace theplu {
    1619namespace classifier { 
    1720
    18   ConsensusInputRanker::ConsensusInputRanker(const gslapi::matrix& data,
    19                                              const gslapi::vector& target,
    20                                              statistics::Score& score_object,
    21                                              const size_t rounds,
    22                                              const size_t k)
    23     :id_(std::vector<size_t>()), input_rankers_(std::vector<InputRanker>()),
    24      k_(k), nof_rankers_(k*rounds), rank_(std::vector<size_t>())
     21  ConsensusInputRanker::ConsensusInputRanker
     22  (const MatrixLookup& data, const Target& target,
     23   statistics::Score& score_object, CrossSplitting& sampler,
     24   const size_t n)
     25    : nof_rankers_(n)
    2526
    2627  {
    27     CrossSplitting cv(target, k_);
    2828    for (size_t i=0; i<nof_rankers_; i++){
    29       //InputRanker ir(data, target, score_object);
    30       input_rankers_.push_back(InputRanker(data, target, score_object,
    31                                            cv.next()));
     29      std::vector<size_t> index=sampler.next();
     30      input_rankers_.push_back(InputRanker(MatrixLookup(data,index),
     31                                           Target(target,index),
     32                                           score_object)  );
    3233    }
    3334    // Sorting with respect to median rank
    34     std::vector<std::pair<size_t,double> > medians(data.rows());
     35    std::vector<std::pair<double,size_t> > medians(data.rows());
     36    for (size_t i=0; i<data.rows(); i++){
     37      std::vector<size_t> ranks(nof_rankers_);
     38      for (size_t j=0; j<nof_rankers_; j++) {
     39        ranks[j]=input_rankers_[j].rank(i);
     40      }
     41      medians[i].first = statistics::median(ranks);
     42      medians[i].second = i;
     43    }
     44   
     45    //sort medians and assign id_ and rank_
     46    sort(medians.begin(), medians.end());
     47    id_.resize(data.rows());
     48    rank_.resize(data.rows());
     49    for (size_t i=0; i<data.rows(); i++){
     50      id_[i]=medians[i].second;
     51      rank_[id_[i]]=i;
     52    }
     53
     54  }
     55
     56  ConsensusInputRanker::ConsensusInputRanker
     57  (const MatrixLookup& data, const Target& target, const MatrixLookup& weight,
     58   statistics::Score& score_object, CrossSplitting& sampler,const size_t n)
     59    : nof_rankers_(n)
     60  {
     61    for (size_t i=0; i<nof_rankers_; i++){
     62      std::vector<size_t> index = sampler.next();
     63      input_rankers_.push_back(InputRanker(MatrixLookup(data,index),
     64                                           Target(target,index),
     65                                           score_object,
     66                                           MatrixLookup(weight,index)));
     67    }
     68   
     69    // Sorting with respect to median rank
     70    std::vector<std::pair<double, size_t> > median(data.rows());
    3571    for (size_t i=0; i<data.rows(); i++){
    3672      std::vector<size_t> ranks(nof_rankers_);
    3773      for (size_t j=0; j<nof_rankers_; j++)
    3874        ranks[j]=input_rankers_[j].rank(i);
    39      
    40       medians[i].first = i;
    41       medians[i].second = statistics::median(ranks);
     75      median[i].first = statistics::median(ranks);
     76      median[i].second = i;
    4277    }
    4378   
    4479    //sort medians and assign id_ and rank_
    45     sort(medians.begin(), medians.end(),
    46          utility::pair_value_compare<size_t, double>());
     80    sort(median.begin(), median.end());
    4781    id_.resize(data.rows());
    4882    rank_.resize(data.rows());
    4983    for (size_t i=0; i<data.rows(); i++){
    50       id_[i]=medians[i].first;
    51       rank_[id_[i]]=i;           
    52     }
    53 
    54   }
    55 
    56   ConsensusInputRanker::ConsensusInputRanker(const gslapi::matrix& data,
    57                                              const gslapi::vector& target,
    58                                              const gslapi::matrix& weight,
    59                                              statistics::Score& score_object,
    60                                              const size_t rounds,
    61                                              const size_t k)
    62     :id_(std::vector<size_t>()), input_rankers_(std::vector<InputRanker>()),
    63      k_(k), nof_rankers_(k*rounds), rank_(std::vector<size_t>())
    64 
    65   {
    66     CrossSplitting cv(target, k_);
    67     for (size_t i=0; i<nof_rankers_; i++)
    68       input_rankers_.push_back(InputRanker(data,target,weight,score_object,
    69                                            cv.next()));
    70    
    71     // Sorting with respect to median rank
    72     std::vector<std::pair<size_t,double> > median(data.rows());
    73     for (size_t i=0; i<data.rows(); i++){
    74       std::vector<size_t> ranks(nof_rankers_);
    75       for (size_t j=0; j<nof_rankers_; j++)
    76         ranks[j]=input_rankers_[j].rank(i);
    77       median[i].first = i;
    78       median[i].second = statistics::median(ranks);
    79     }
    80    
    81     //sort medians and assign id_ and rank_
    82     sort(median.begin(), median.end(),
    83          utility::pair_value_compare<size_t, double>());
    84     id_.resize(data.rows());
    85     rank_.resize(data.rows());
    86     for (size_t i=0; i<data.rows(); i++){
    87       id_[i]=median[i].first;
     84      id_[i]=median[i].second;
    8885      rank_[id_[i]]=i;           
    8986    }
  • trunk/lib/classifier/ConsensusInputRanker.h

    r451 r475  
    77
    88namespace theplu {
    9   class gslapi::matrix;
    10   class gslapi::vector;
    119  class statistics::Score;
    1210namespace classifier { 
     11
     12  class CrossSplitting;
     13  class MatrixLookup;
     14  class Target;
    1315
    1416  ///
     
    2628    /// manner, and in total there is \a n times \a k ranklists.
    2729    ///
    28     ConsensusInputRanker(const gslapi::matrix& data,
    29                          const gslapi::vector& target,
    30                          statistics::Score&,
    31                          const size_t n = 1 ,
    32                          const size_t k = 3 );
     30    ConsensusInputRanker(const MatrixLookup& data,
     31                         const Target& target,
     32                         statistics::Score&, 
     33                         CrossSplitting&,
     34                         const size_t n);
    3335
    3436    ///
     
    3739    /// manner, and in total there is \a n times \a k ranklists.
    3840    ///
    39     ConsensusInputRanker(const gslapi::matrix& data,
    40                          const gslapi::vector& target,
    41                          const gslapi::matrix& weight,
    42                          statistics::Score&,
    43                          const size_t n = 1 ,
    44                          const size_t k = 3 );
     41    ConsensusInputRanker(const MatrixLookup& data,
     42                         const Target& target,
     43                         const MatrixLookup& weight,
     44                         statistics::Score&, 
     45                         CrossSplitting&,
     46                         const size_t n);
    4547
    4648    ///
    47     /// Highest ranked row is ranked as number zero @return index of
    48     /// row ranked as number \a i
     49    /// Row with lowest rank (highest score) is ranked as number zero
     50    /// @return index of row ranked as number \a i
    4951    ///
    5052    inline size_t id(const size_t i) const {return id_[i];}
    5153   
    5254    ///
    53     /// Highest ranked row is ranked as number zero @return rank for
    54     /// row \a i
     55    /// Row with lowest rank (highest score) is ranked as number zero
     56    /// @return rank for row \a i
    5557    ///
    5658    inline size_t rank(const size_t i) const {return rank_[i];}
     
    6062    std::vector<size_t> id_;
    6163    std::vector<InputRanker> input_rankers_;
    62     size_t k_;
    63     size_t nof_rankers_;
     64    u_int nof_rankers_;
    6465    std::vector<size_t> rank_;
    6566  };
  • trunk/lib/classifier/CrossSplitting.cc

    r460 r475  
    22
    33
    4 #include <c++_tools/gslapi/vector.h>
     4#include <c++_tools/classifier/Target.h>
    55#include <c++_tools/classifier/CrossSplitting.h>
    66#include <c++_tools/random/random.h>
     
    1111namespace classifier { 
    1212
    13   CrossSplitting::CrossSplitting(const theplu::gslapi::vector& target,
    14                                    const size_t k)
    15     :count_(0),index_negative_(std::vector<size_t>()),
    16      index_positive_(std::vector<size_t>()), k_(k)
     13  CrossSplitting::CrossSplitting(const Target& target, const size_t k)
     14    :count_(0), k_(k)
    1715 
    1816  {
     
    4442    size_t end = int(index_positive_.size()*count_/k_);
    4543    for (size_t i=0; i<index_positive_.size(); i++)
    46       if (i<begin || i>=end)
     44      if (i<begin || i>=end){
    4745        training_set.push_back(index_positive_[i]);
     46      }
    4847   
    4948    begin = int(index_negative_.size()*(count_-1)/k_);
  • trunk/lib/classifier/CrossSplitting.h

    r460 r475  
    77
    88namespace theplu {
    9 namespace gslapi {
    10   class vector;
    11 }
    129namespace classifier { 
     10  class Target;
    1311
    1412  ///
     
    1614  /// crossvalidation manner.
    1715  ///   
     16  /// @note The interface of this class will most likely change pretty soon.
     17  ///
    1818  class CrossSplitting
    1919  {
     
    2323    /// Constructor taking \a target and \a k for k-fold cross validation
    2424    ///
    25     CrossSplitting(const theplu::gslapi::vector& target, const size_t k = 3);
     25    CrossSplitting(const theplu::classifier::Target& target, const size_t k);
    2626
    2727    ///
  • trunk/lib/classifier/InputRanker.cc

    r451 r475  
    33#include <c++_tools/classifier/InputRanker.h>
    44
    5 #include <c++_tools/gslapi/matrix.h>
    6 #include <c++_tools/gslapi/vector.h>
     5#include <c++_tools/classifier/MatrixLookup.h>
     6#include <c++_tools/classifier/DataLookup1D.h>
     7#include <c++_tools/classifier/Target.h>
    78#include <c++_tools/statistics/ROC.h>
    89#include <c++_tools/utility/stl_utility.h>
    910
     11#include <functional>
     12#include <utility>
    1013#include <vector>
    11 #include <utility>
     14
    1215
    1316namespace theplu {
     
    1518
    1619
    17   InputRanker::InputRanker(const gslapi::matrix& data,
    18                            const gslapi::vector& target,
    19                            statistics::Score& score_object,
    20                            const std::vector<size_t>& train_set)
    21     :train_set_(train_set),
    22      id_(std::vector<size_t>(data.rows())),
    23      rank_(std::vector<size_t>(data.rows()))
    24  
     20  InputRanker::InputRanker(const MatrixLookup& data,
     21                           const Target& target,
     22                           statistics::Score& score_object)
    2523  {
    2624    size_t nof_genes = data.rows();
    27     size_t nof_samples = data.columns();
    28     if (!train_set_.size()){
    29       train_set_.resize(nof_samples);
    30       for (size_t i=0; i<nof_samples; i++)
    31         train_set_[i]=i; 
    32     }
     25    //    size_t nof_samples = data.columns();
    3326
    3427    //scoring each input
    35     std::vector<std::pair<size_t, double> > score;
     28    std::vector<std::pair<double, size_t> > score;
    3629    for (size_t i=0; i<nof_genes; i++) {
    37       double area = score_object.score(target,gslapi::vector(data,i),train_set_);
    38       std::pair<size_t, double> tmp(i,area);
     30      DataLookup1D vector_value(data,i,false);
     31      double area = score_object.score(target,vector_value);
     32      //double area = score_object.score(target,DataLookup1D(data,i,false));
     33      std::pair<double, size_t> tmp(area,i);
    3934      score.push_back(tmp);
    4035    }
    4136
    4237    //sort the scores and assign id_ and rank_
    43     sort(score.begin(), score.end(),
    44          utility::pair_value_compare<size_t, double>());
     38    sort(score.begin(), score.end(), std::greater<std::pair<double,size_t> >());
    4539   
     40    id_.resize(nof_genes);
     41    rank_.resize(nof_genes);
    4642    for (size_t i=0; i<nof_genes; i++){
    47       id_[i]=score[nof_genes-i-1].first;
     43      id_[i]=score[i].second;
    4844      rank_[id_[i]]=i;           
    4945    }
     
    5248
    5349
    54   InputRanker::InputRanker(const gslapi::matrix& data,
    55                            const gslapi::vector& target,
    56                            const gslapi::matrix& weight,
     50  InputRanker::InputRanker(const MatrixLookup& data,
     51                           const Target& target,
    5752                           statistics::Score& score_object,
    58                            const std::vector<size_t>& train_set)
    59     :train_set_(train_set),
    60      id_(std::vector<size_t>(data.rows())),
    61      rank_(std::vector<size_t>(data.rows()))
    62  
     53                           const MatrixLookup& weight)
    6354  {
    6455    size_t nof_genes = data.rows();
    65     size_t nof_samples = data.columns();
    66     if (!train_set_.size()){
    67       train_set_.resize(nof_samples);
    68       for (size_t i=0; i<nof_samples; i++)
    69         train_set_[i]=i; 
    70     }
     56
    7157    //scoring each input
    72     std::vector<std::pair<size_t, double> > score;
     58    std::vector<std::pair<double, size_t> > score;
    7359    for (size_t i=0; i<nof_genes; i++) {
    74       double area = score_object.score(target, gslapi::vector(data,i),
    75                                        gslapi::vector(weight,i), train_set_);
    76       std::pair<size_t, double> tmp(i,area);
     60      double area = score_object.score(target, DataLookup1D(data,i,false),
     61                                       DataLookup1D(weight,i,false));
     62      std::pair<double, size_t> tmp(area,i);
    7763      score.push_back(tmp);
    7864    }
     65
    7966    //sort the scores and assign id_ and rank_
    80     sort(score.begin(), score.end(),
    81          utility::pair_value_compare<size_t, double>());
     67    sort(score.begin(), score.end(), std::greater<std::pair<double,size_t> >());
    8268
     69    id_.resize(nof_genes);
     70    rank_.resize(nof_genes);
    8371    for (size_t i=0; i<nof_genes; i++){
    84       id_[i]=score[nof_genes-i-1].first;
     72      id_[i]=score[i].second;
    8573      rank_[id_[i]]=i;           
    8674    }
     75
    8776  }
    8877
  • trunk/lib/classifier/InputRanker.h

    r451 r475  
    1515}
    1616namespace classifier { 
     17
     18  class MatrixLookup;
     19  class Target;
     20
    1721  ///
    1822  /// Class for ranking rows in a matrix, using a Score and a
     
    2428  public:
    2529    ///
    26     /// Default constructor.
    27     ///
    28     inline InputRanker(void) : train_set_(std::vector<size_t>()),
    29                                id_(std::vector<size_t>()),
    30                                rank_(std::vector<size_t>()) {}
    31 
    32     ///
    3330    /// Constructor taking data, target, a Score
    3431    /// object and vector defining what samples to use (default is to
    3532    /// use all samples)
    3633    ///
    37     InputRanker(const gslapi::matrix&,
    38                 const gslapi::vector&,
    39                 statistics::Score&,
    40                 const std::vector<size_t>& = std::vector<size_t>());
     34    InputRanker(const MatrixLookup&, const Target&, statistics::Score&);
     35
    4136
    4237    ///
     
    4540    /// use all samples)
    4641    ///
    47     InputRanker(const gslapi::matrix&,
    48                 const gslapi::vector&,
    49                 const gslapi::matrix&,
    50                 statistics::Score&,
    51                 const std::vector<size_t>& = std::vector<size_t>());
     42    InputRanker(const MatrixLookup&, const Target&, statistics::Score&,
     43                const MatrixLookup&);
     44
    5245
    5346    ///
     
    6558
    6659  private:
    67     std::vector<size_t> train_set_;
    6860    std::vector<size_t> id_;
    6961    std::vector<size_t> rank_;
  • trunk/lib/classifier/KernelView.h

    r463 r475  
    1313  class KernelFunction;
    1414
    15   ///
    16   ///   @brief View into sub Kernel
    17   ///
    18   class KernelView : public MatrixView
    19   {
     15  ///
     16  ///   @brief View into Kernel
     17  ///
     18  class KernelView : public MatrixView
     19  {
    2020   
    21   public:
    22    
     21  public:
     22
    2323    ///
    2424    /// Constructor
  • trunk/lib/classifier/Kernel_MEV.h

    r451 r475  
    66#include <c++_tools/classifier/Kernel.h>
    77#include <c++_tools/classifier/KernelFunction.h>
     8#include <c++_tools/gslapi/vector.h>
    89#include <c++_tools/gslapi/matrix.h>
    910
  • trunk/lib/classifier/Makefile.am

    r459 r475  
    1111  ConsensusInputRanker.cc \
    1212  CrossSplitting.cc \
    13   DataView.cc \
     13  DataLookup1D.cc \
     14  DataLookup2D.cc \
    1415  GaussianKernelFunction.cc \
    1516  InputRanker.cc \
    1617  Kernel_MEV.cc \
    1718  Kernel_SEV.cc \
    18   KernelView.cc \
    19   MatrixView.cc \
     19  KernelLookup.cc \
     20  MatrixLookup.cc \
    2021  NCC.cc \
    2122  PolynomialKernelFunction.cc \
    22   SVM.cc
     23  SVM.cc \
     24  Target.cc
     25
    2326
    2427include_classifierdir = $(includedir)/c++_tools/classifier
     
    2730  ConsensusInputRanker.h \
    2831  CrossSplitting.h \
    29   DataView.h \
     32  DataLookup2D.h \
    3033  GaussianKernelFunction.h \
    3134  InputRanker.h \
     
    3437  Kernel_MEV.h \
    3538  Kernel_SEV.h \
    36   KernelView.h \
    37   MatrixView.h \
     39  KernelLookup.h \
     40  MatrixLookup.h \
    3841  NCC.h \
    3942  PolynomialKernelFunction.h \
    40   SVM.h
     43  SVM.h \
     44  Target.h \
     45  DataLookup1D.h
  • trunk/lib/classifier/NCC.cc

    r456 r475  
    33#include <c++_tools/classifier/NCC.h>
    44
    5 #include <c++_tools/classifier/DataView.h>
     5#include <c++_tools/classifier/MatrixLookup.h>
     6#include <c++_tools/classifier/Target.h>
    67#include <c++_tools/gslapi/vector.h>
    78#include <c++_tools/statistics/Averager.h>
     9#include <c++_tools/statistics/AveragerPairWeighted.h>
    810#include <c++_tools/utility/stl_utility.h>
    911
     
    1618namespace classifier {
    1719
    18   NCC::NCC(const DataView& data, const gslapi::vector& target)
     20  NCC::NCC(const MatrixLookup& data, const Target& target)
    1921  {
    20     gslapi::vector sorted_target(target);
    21     sorted_target.sort(); // if targets contain NaN => infinite loop
     22    Target sorted_target(target);
     23    sorted_target.sort();
    2224   
    2325    // Find the classes of targets
     
    5153  {
    5254    prediction=gslapi::vector(classes_.size());   
    53     std::vector<size_t> use;
     55    gslapi::vector w(input.size(),0);
    5456    for(size_t i=0; i<input.size(); i++)  // take care of missing values
    5557      if(!std::isnan(input(i)))
    56         use.push_back(i);
    57     for(size_t j=0; j<centroids_.columns(); j++)
    58       prediction(j)=score.score(input,gslapi::vector(centroids_,j,false),use);
     58        w(i)=1.0;
     59    statistics::AveragerPairWeighted ap;
     60    for(size_t j=0; j<centroids_.columns(); j++) {
     61      ap.add(input,gslapi::vector(centroids_,j,false),w, w);
     62      prediction(j)=ap.correlation();
     63      ap.reset();
     64    }
    5965  }
    6066
  • trunk/lib/classifier/NCC.h

    r456 r475  
    44#define _theplu_classifier_ncc_
    55
    6 #include <c++_tools/classifier/DataView.h>
    76#include <c++_tools/gslapi/matrix.h>
    8 #include <c++_tools/statistics/Score.h>
    97
    108#include <map>
    119
    1210namespace theplu {
     11namespace gslapi {
     12  class vector;
     13}
     14namespace statistics {
     15  class Score;
     16}
     17
    1318namespace classifier { 
    1419
     20  class MatrixLookup;
    1521  class Score;
     22  class Target;
     23  class DataLookup1D;
    1624
    1725  ///
     
    3240    /// input. Performs the training of the NCC.
    3341    ///
    34     NCC(const DataView&, const gslapi::vector&);
     42    NCC(const MatrixLookup&, const Target&);
    3543
    3644    ///
  • trunk/lib/classifier/PolynomialKernelFunction.h

    r451 r475  
    1111
    1212namespace theplu {
    13 
    14 namespace gslapi {
    15   class vector;
    16 }
    17 
    1813namespace classifier {
    1914
  • trunk/lib/classifier/SVM.cc

    r463 r475  
    44#include <c++_tools/classifier/SVM.h>
    55
    6 #include <c++_tools/classifier/KernelView.h>
     6#include <c++_tools/classifier/KernelLookup.h>
    77#include <c++_tools/gslapi/matrix.h>
    88#include <c++_tools/gslapi/vector.h>
     
    137137  }
    138138
    139   SVM::SVM(const KernelView& kernel, const gslapi::vector& target)
     139  SVM::SVM(const KernelLookup& kernel, const Target& target)
    140140  : alpha_(target.size(),0),
    141141    bias_(0),
     
    149149    tolerance_(0.00000001)
    150150  {
    151     if (max_epochs_>ULONG_MAX)
    152       max_epochs_=ULONG_MAX;
    153151  }
    154152
  • trunk/lib/classifier/SVM.h

    r461 r475  
    44#define _theplu_classifier_svm_
    55
    6 #include <c++_tools/classifier/KernelView.h>
     6#include <c++_tools/classifier/KernelLookup.h>
     7#include <c++_tools/classifier/Target.h>
    78#include <c++_tools/gslapi/vector.h>
    89
     
    106107    /// the SVM is no longer defined.
    107108    ///
    108     SVM(const KernelView&, const gslapi::vector&);
     109    SVM(const KernelLookup&, const Target&);
    109110
    110111    ///
     
    205206    double bias_;
    206207    double C_inverse_;
    207     const KernelView& kernel_;
     208    const KernelLookup& kernel_;
    208209    unsigned long int max_epochs_;
    209210    gslapi::vector output_;
    210211    Index sample_;
    211     const gslapi::vector& target_;
     212    const Target& target_;
    212213    bool trained_;
    213214    double tolerance_;
  • trunk/lib/gslapi/vector.cc

    r439 r475  
    55#include <c++_tools/utility/stl_utility.h>
    66#include <c++_tools/utility/utility.h>
     7#include <c++_tools/classifier/DataLookup1D.h>
    78
    89#include <sstream>
     
    138139
    139140
     141  vector::vector(const classifier::DataLookup1D& data)
     142    : view_(NULL), const_view_(NULL)
     143  {
     144    v_ = gsl_vector_alloc(data.size());
     145    size_t n=0;
     146    for (size_t i=0; i<data.size(); i++)
     147      gsl_vector_set( v_, n++, data(i) );
     148   
     149  }
     150
     151
     152
    140153  vector::~vector(void)
    141154  {
     
    182195    double sum = 0;
    183196    for (size_t i=0; i<size(); i++)
    184       sum += gsl_vector_get( v_, i );
     197      sum += (*this)(i);
    185198    return( sum );
    186199  } 
    187 
    188 
    189 
    190   double vector::operator*( const vector &other ) const
    191   {
    192     // Jari, check for gsl_support
    193     double res = 0.0;;
    194     for ( size_t i = 0; i < size(); ++i )
    195       res += other[i] * (*this)[i];
    196     return res;
    197   }
    198 
    199 
    200200
    201201  bool vector::operator==(const vector& a) const
     
    207207        return false;
    208208    return true;
     209  }
     210
     211
     212
     213  double vector::operator*( const vector &other ) const
     214  {
     215    double res = 0.0;;
     216    for ( size_t i = 0; i < size(); ++i )
     217      res += other(i) * (*this)(i);
     218    return res;
    209219  }
    210220
  • trunk/lib/gslapi/vector.h

    r439 r475  
    1414
    1515namespace theplu {
     16namespace classifier {
     17  class DataLookup1D;
     18}
    1619namespace gslapi {
    1720
     
    156159
    157160    ///
     161    ///
     162    ///
     163    explicit vector(const classifier::DataLookup1D&);
     164
     165    ///
    158166    /// The destructor.
    159167    ///
     
    336344    /// @return The sum.
    337345    ///
    338     // Jari, doxygen group as "Extends GSL".
    339346    double sum(void) const;
    340347
     
    389396    inline const double&
    390397    operator[](size_t i) const { return *gsl_vector_const_ptr(v_,i); }
    391 
    392     ///
    393     /// @return The dot product.
    394     ///
    395     double operator*( const vector &other ) const;
    396398
    397399    ///
     
    419421
    420422    ///
     423    /// @return The dot product.
     424    ///
     425    double operator*( const vector &other ) const;
     426
     427    ///
    421428    /// Addition and assign operator.
    422429    ///
  • trunk/lib/statistics/AveragerPair.h

    r427 r475  
    1111
    1212namespace theplu{
    13 
    14   class gslapi::vector;
    15 
    1613namespace statistics{
    1714  ///
  • trunk/lib/statistics/AveragerWeighted.h

    r420 r475  
    8888
    8989    ///
     90    /// The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2
     91    /// }{\sum w_i} \f$.
     92    ///
     93    /// @return The variance.
     94    ///
     95    inline double variance(void) const
     96    { return sum_xx_centered()/sum_w(); }
     97
     98    ///
    9099    /// The variance is calculated as \f$ \frac{\sum w_i}{\left(\sum
    91100    /// w_i\right)^2 - \sum w_i^2} \sum w_i (x_i - m)^2 \f$. This
     
    94103    /// @return The variance.
    95104    ///
    96     inline double variance(void) const
     105    inline double variance_unbiased(void) const
    97106    { return sum_ww() / (sum_w()*sum_w()-sum_ww()) * sum_xx_centered(); }
    98107
  • trunk/lib/statistics/Fisher.cc

    r449 r475  
    44#include <c++_tools/statistics/Score.h>
    55#include <c++_tools/statistics/utility.h>
     6#include <c++_tools/classifier/Target.h>
    67
    78#include <gsl/gsl_cdf.h>
     
    1314 
    1415  Fisher::Fisher(bool b)
    15     : Score(b), a_(0), b_(0), c_(0), d_(0), cutoff_column_(0), cutoff_row_(0),
    16       oddsratio_(1.0)
     16    : Score(b), a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0), value_cutoff_(0)
    1717  {
    1818  }
     
    4444    // If a column sum or a row sum is zero, the table is nonsense
    4545    if ((a==0 || d==0) && (c==0 || b==0)){
    46       //Peter, should through exception
     46      //Peter, should throw exception
    4747      std::cerr << "Warning: Fisher: Table is not valid\n";
    4848      return oddsratio_ = 1.0;
     
    5959  {
    6060    double p=1;
    61     if (a_<minimum_size_ || b_<minimum_size_ ||
    62         c_<minimum_size_ || d_<minimum_size_){
     61    if ( ( a_<minimum_size_ || b_<minimum_size_ ||
     62           c_<minimum_size_ || d_<minimum_size_) && !weighted_ ){
    6363
    6464      p=p_value_exact();
     
    7171      if (oddsratio_<0.5){
    7272        // last term because >= not equal to !(<=)
    73         return 1-p+gsl_ran_hypergeometric_pdf(a_, a_+b_, c_+d_, a_+c_);
     73        u_int a = static_cast<u_int>(a_);
     74        u_int b = static_cast<u_int>(b_);
     75        u_int c = static_cast<u_int>(c_);
     76        u_int d = static_cast<u_int>(d_);
     77        return 1-p+gsl_ran_hypergeometric_pdf(a, a+b, c+d, a+c);
    7478      }
    7579    }
     
    8488
    8589  double Fisher::p_value_exact() const
    86   {
     90  {
     91    u_int a = static_cast<u_int>(a_);
     92    u_int b = static_cast<u_int>(b_);
     93    u_int c = static_cast<u_int>(c_);
     94    u_int d = static_cast<u_int>(d_);
     95   
    8796    // Since the calculation is symmetric and cdf_hypergeometric_P
    8897    // loops up to k we choose the smallest number to be k and mirror
    8998    // the matrix. This choice makes the p-value two-sided.
    90     if (a_<b_ && a_<c_ && a_<d_)
    91       return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);
    92     else if (b_<a_ && b_<c_ && b_<d_)
    93       return  statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
    94     else if (c_<a_ && c_<b_ && c_<d_)
    95       return  statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
    9699
    97     return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
     100    if (a<b && a<c && a<d)
     101      return statistics::cdf_hypergeometric_P(a,a+b,c+d,a+c);
     102    else if (b<a && b<c && b<d)
     103      return  statistics::cdf_hypergeometric_P(b,a+b,c+d,b+d);
     104    else if (c<a && c<b && c<d)
     105      return  statistics::cdf_hypergeometric_P(c,c+d,a+b,a+c);
     106
     107    return statistics::cdf_hypergeometric_P(d,c+d,a+b,b+d);
    98108
    99109  }
    100110
    101111
    102   double Fisher::score(const gslapi::vector& x,
    103                        const gslapi::vector& y,
    104                        const std::vector<size_t>& train_set)
     112    double Fisher::score(const classifier::Target& target,
     113                         const gslapi::vector& value)
    105114  {
    106     if (!train_set.size()){
    107       train_set_.resize(0);
    108       for (size_t i=0; i<target_.size(); i++)
    109         train_set_.push_back(i); 
    110     }
    111     else
    112       train_set_ = train_set;
     115    weighted_=false;
    113116    a_=b_=c_=d_=0;
    114     for (size_t i=0; i<train_set_.size(); i++)
    115       if (x(train_set_[i])<cutoff_column_)
    116         if (y(train_set_[i])<cutoff_row_)
     117    for (size_t i=0; i<target.size(); i++)
     118      if (class_one(target(i)))
     119        if (value(i)>value_cutoff_)
    117120          a_++;
    118121        else
    119122          c_++;
    120123      else
    121         if (y(train_set_[i])<cutoff_row_)
     124        if (value(i)>value_cutoff_)
    122125          b_++;
    123126        else
    124127          d_++;
    125128       
    126     // If a column sum or a row sum is zero, the table is nonsense
     129    // If a column sum or a row sum is zero, the table is non-sense
    127130    if ((a_==0 || d_==0) && (c_==0 || b_==0)){
    128131      std::cerr << "Warning: Fisher: Table is not valid\n";
    129       return 0;
     132      return 1;
    130133    }
    131134
     
    133136  }
    134137 
    135   double Fisher::score(const gslapi::vector& x,
    136                        const gslapi::vector& y,             
    137                        const gslapi::vector& w,
    138                        const std::vector<size_t>& train_set)
     138    double Fisher::score(const classifier::Target& target,
     139                         const gslapi::vector& value,
     140                         const gslapi::vector& weighted)
    139141  {
    140     if (!train_set.size()){
    141       train_set_.resize(0);
    142       for (size_t i=0; i<target_.size(); i++)
    143         train_set_.push_back(i); 
     142    weighted_=true;
     143    a_=b_=c_=d_=0;
     144    for (size_t i=0; i<target.size(); i++)
     145      if (class_one(target(i)))
     146        if (value(i)>value_cutoff_)
     147          a_+=value(i);
     148        else
     149          c_+=value(i);
     150      else
     151        if (value(i)>value_cutoff_)
     152          b_+=value(i);
     153        else
     154          d_+=value(i);
     155       
     156    // If a column sum or a row sum is zero, the table is non-sense
     157    if ((a_==0 || d_==0) && (c_==0 || b_==0)){
     158      std::cerr << "Warning: Fisher: Table is not valid\n";
     159      return 1;
    144160    }
    145     else
    146       train_set_ = train_set;
    147 
    148     double a=0;
    149     double b=0;
    150     double c=0;
    151     double d=0;
    152 
    153     for (size_t i=0; i<train_set_.size(); i++)
    154       if (x(train_set_[i]) < cutoff_column_)
    155         if (y(train_set_[i]) < cutoff_row_)
    156           a+=w(train_set_[i]);
    157         else
    158           c+=w(train_set_[i]);
    159       else
    160         if (y(train_set_[i]) < cutoff_column_)
    161           b+=w(train_set_[i]);
    162         else
    163           d+=w(train_set_[i]);
    164161
    165162    return oddsratio(a_,b_,c_,d_);
    166163  }
    167 
     164 
    168165  double Fisher::score(const u_int a, const u_int b,
    169166                       const u_int c, const u_int d)
  • trunk/lib/statistics/Fisher.h

    r449 r475  
    6464    ///
    6565    /// Cutoff sets the limit whether a value should go into the left
    66     /// or the right column. @see score
    67     ///
    68     /// @return reference to cutoff for column
    69     ///
    70     inline double& cutoff_column(void) { return cutoff_column_; }
    71 
    72     ///
    73     /// Cutoff sets the limit whether a value should go into the left
    7466    /// or the right row. @see score
    7567    ///
    7668    /// @return reference to cutoff for row
    7769    ///
    78     inline double& cutoff_row(void) { return cutoff_row_; }
     70    inline double& value_cutoff(void) { return value_cutoff_; }
    7971
    8072    ///
     
    9991    /// @return p-value
    10092    ///
     93    /// @note in weighted case, approximation Chi2 is always used.
     94    ///
    10195    double p_value() const;
    10296   
     
    10498    /// Function calculating score from 2x2 table for which the
    10599    /// elements are calculated as follows \n
    106     /// a: #data \f$ x=1 \f$ AND \f$ y=1 \f$ \n
    107     /// b: #data \f$ x=-1 \f$ AND \f$ y=1 \f$ \n
    108     /// c: #data \f$ x=1 \f$ AND \f$ y=-1 \f$ \n
    109     /// d: #data \f$ x=-1 \f$ AND \f$ y=1 \f$ \n
     100    /// target=positive_label (a,c)
     101    /// \f$ value > \f$ value_cutoff() a,b \n
    110102    ///
    111103    /// @return odds ratio. If absolute_ is true and odds ratio is
    112104    /// less than unity 1 divided by odds ratio is returned
    113105    ///
    114     double score(const gslapi::vector& x, const gslapi::vector& y, 
    115                  const std::vector<size_t>& = std::vector<size_t>());
     106    double score(const classifier::Target& target,
     107                 const gslapi::vector& value);
    116108
    117109    ///
     
    122114    /// @return odds ratio
    123115    ///
    124     /// @note
     116    /// @see score
    125117    ///
    126     double score(const gslapi::vector& x, const gslapi::vector& y, 
    127                  const gslapi::vector& w,
    128                  const std::vector<size_t>& = std::vector<size_t>());
     118    double score(const classifier::Target& target,
     119                 const gslapi::vector& value,
     120                 const gslapi::vector& weight);
    129121
    130122    ///
     
    148140    double p_value_exact(void) const;
    149141
    150     std::vector<size_t> train_set_;
    151     gslapi::vector weight_;
    152     u_int a_;
    153     u_int b_;
    154     u_int c_;
    155     u_int d_;
    156     double cutoff_column_;
    157     double cutoff_row_;
     142    double a_;
     143    double b_;
     144    double c_;
     145    double d_;
    158146    u_int minimum_size_;
    159147    double oddsratio_;
     148    double value_cutoff_;
    160149  };
    161150
  • trunk/lib/statistics/FoldChange.cc

    r414 r475  
    11// $Id$
    22
    3 #include "FoldChange.h"
    4 
    5 #include "Score.h"
    6 #include "Averager.h"
    7 #include "AveragerWeighted.h"
     3#include <c++_tools/statistics/FoldChange.h>
     4#include <c++_tools/statistics/Score.h>
     5#include <c++_tools/statistics/Averager.h>
     6#include <c++_tools/statistics/AveragerWeighted.h>
     7#include <c++_tools/classifier/Target.h>
    88
    99namespace theplu {
     
    3131  }
    3232
    33   double FoldChange::score(const gslapi::vector& target,
    34                            const gslapi::vector& value,
    35                            const std::vector<size_t>& train_set)
     33  double FoldChange::score(const classifier::Target& target,
     34                           const gslapi::vector& value)
    3635  {
    37     if (!train_set_.size())
    38       for (size_t i=0; i<target_.size(); i++)
    39         train_set_.push_back(i); 
    40     else
    41       train_set_=train_set;
    42 
    4336    weighted_=false;
    4437    Averager pos;
    4538    Averager neg;
    4639
    47     for (size_t i=0; i<train_set.size(); i++)
    48       if (target(train_set[i])==1)
    49         pos.add(value(train_set[i]));
     40    for (size_t i=0; i<value.size(); i++)
     41      if (class_one(target(i)))
     42        pos.add(value(i));
    5043      else
    51         neg.add(value(train_set[i]));
     44        neg.add(value(i));
    5245         
    5346    if (absolute_)
     
    5649  }
    5750
    58   double FoldChange::score(const gslapi::vector& target,
     51  double FoldChange::score(const classifier::Target& target,
    5952                           const gslapi::vector& value,
    60                            const gslapi::vector& weight,
    61                            const std::vector<size_t>& train_set)
     53                           const gslapi::vector& weight)
    6254  {
    63     if (!train_set_.size())
    64       for (size_t i=0; i<target_.size(); i++)
    65         train_set_.push_back(i); 
    66     else
    67       train_set_=train_set;
    68 
    6955    weighted_=true;
    7056    AveragerWeighted pos;
    7157    AveragerWeighted neg;
    7258
    73     for (size_t i=0; i<train_set.size(); i++)
    74       if (target(train_set[i])==1)
    75         pos.add(value(train_set[i]),weight(train_set[i]));
     59    for (size_t i=0; i<value.size(); i++)
     60      if (class_one(target(i)))
     61        pos.add(value(i),weight(i));
    7662      else
    77         neg.add(value(train_set[i]),weight(train_set[i]));
     63        neg.add(value(i),weight(i));
    7864         
    7965    if (absolute_)
  • trunk/lib/statistics/FoldChange.h

    r414 r475  
    77
    88namespace theplu {
     9
     10  class gslapi::vector;
     11
     12
    913namespace statistics {
    1014
     
    2832    /// @param target is +1 or -1
    2933    /// @param value vector of the values
    30     /// @train_set defining which values to use (number of values used
    31     /// in the calculation is equal to size of \a train_set)
    3234    ///
    33     double
    34     score(const gslapi::vector& target,
    35           const gslapi::vector& value,
    36           const std::vector<size_t>& train_set = std::vector<size_t>());
     35    double score(const classifier::Target& target,
     36                 const gslapi::vector& value);
    3737 
    3838    ///
     
    4545    /// in the calculation is equal to size of \a train_set)
    4646    ///
    47     double
    48     score(const gslapi::vector& target,
    49           const gslapi::vector& value,
    50           const gslapi::vector& weight,
    51           const std::vector<size_t>& train_set = std::vector<size_t>());
     47    double score(const classifier::Target& target,
     48                 const gslapi::vector& value,
     49                 const gslapi::vector& weight);
    5250 
    5351  private:
  • trunk/lib/statistics/Makefile.am

    r461 r475  
    88noinst_LTLIBRARIES = libstatistics.la
    99libstatistics_la_SOURCES = \
    10   Averager.cc AveragerPair.cc AveragerWeighted.cc Fisher.cc FoldChange.cc \
     10  Averager.cc AveragerPair.cc AveragerWeighted.cc \
     11  AveragerPairWeighted.cc \
     12  Fisher.cc FoldChange.cc \
    1113  Histogram.cc Kernel.cc KernelBox.cc KernelTriCube.cc Linear.cc \
    1214  LinearWeighted.cc Local.cc \
     
    1820
    1921include_statistics_HEADERS = \
    20   Averager.h AveragerPair.h AveragerWeighted.h Fisher.h FoldChange.h \
     22  Averager.h AveragerPair.h AveragerWeighted.h \
     23  AveragerPairWeighted.h \
     24  Fisher.h FoldChange.h \
    2125  Histogram.h Kernel.h KernelBox.h KernelTriCube.h Linear.h LinearWeighted.h \
    2226  Local.h MultiDimensional.h Naive.h NaiveWeighted.h OneDimensional.h \
  • trunk/lib/statistics/Pearson.cc

    r295 r475  
    33
    44#include <c++_tools/statistics/Pearson.h>
    5 
    65#include <c++_tools/statistics/AveragerPair.h>
     6#include <c++_tools/statistics/AveragerPairWeighted.h>
    77#include <c++_tools/gslapi/vector.h>
     8#include <c++_tools/classifier/Target.h>
    89
    910#include <cmath>
     
    1920  }
    2021
    21   void Pearson::centralize(gslapi::vector& x, const gslapi::vector& w)
    22   {
    23     double m = (x*w)/w.sum();
    24     for (size_t i=0; i<x.size(); i++)
    25       x(i)=x(i)-m;
    26   }
    27 
    2822  double Pearson::p_value() const
    2923  {
    3024    if(weighted_)
    3125      return 1;
    32     else if(nof_samples_<3){
     26    if(nof_samples_<2){
    3327      std::cerr << "Warning: Only " << nof_samples_ << "samples. "
    3428                << "Need at lest 3.\n";
    3529      return 1;
    3630    }
    37     else{
    38       double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_);
    39       return 2*gsl_cdf_tdist_Q (t, nof_samples_ -2 );
    40     }
     31
     32    double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_);
     33    double p = gsl_cdf_tdist_Q(t, nof_samples_ -2 );
     34    if (absolute_)
     35      return 2*p;
     36    if (r_<0)
     37      return 1-p;
     38    return p;
     39
    4140  }
    4241
    43   double Pearson::score(const gslapi::vector& vec1, const gslapi::vector& vec2,
    44                         const std::vector<size_t>& train_set)
     42  double Pearson::score(const classifier::Target& target,
     43                        const gslapi::vector& value)
    4544  {
    4645    weighted_=false;
    4746    AveragerPair ap;
    48     if (!train_set.size()){
    49       ap = AveragerPair(vec1.sum(),vec1*vec1,vec2.sum(),vec2*vec2,
    50                         vec1*vec2,vec1.size());
    51       nof_samples_ = vec1.size();
    52     }
    53     else{
    54       for (size_t i=0; i<train_set.size(); i++){
    55         ap.add(vec1(train_set[i]), vec2(train_set[i]));
    56       }
    57       nof_samples_ = train_set.size();
     47    for (size_t i=0; i<target.size(); i++){
     48      if (class_one(target(i)))
     49        ap.add(1, value(i));
     50      else
     51        ap.add(-1, value(i));
     52      nof_samples_ = target.size();
    5853    }
    5954    r_ = ap.correlation();
    6055    if (r_<0 && absolute_)
    61       r_=-r_;
     56      return -r_;
    6257     
    6358    return r_;
    6459  }
    6560   
    66   double Pearson::score(const gslapi::vector& vec1, const gslapi::vector& vec2,
    67                         const gslapi::vector& w1, const gslapi::vector& w2,
    68                         const std::vector<size_t>& train_set =
    69                         std::vector<size_t>())
     61  double Pearson::score(const classifier::Target& target,
     62                        const gslapi::vector& value,
     63                        const gslapi::vector& weight)
    7064  {
    7165    weighted_=true;
    72     gslapi::vector x;
    73     gslapi::vector y;
    74     gslapi::vector wx;
    75     gslapi::vector wy;
    76     if (!train_set.size()){
    77       x = vec1;
    78       y = vec2;
    79       wx = w1;
    80       wy = w2;
    81       nof_samples_ = vec1.size();
     66    AveragerPairWeighted ap;
     67    for (size_t i=0; i<target.size(); i++){
     68      if (class_one(target(i)))
     69        ap.add(1, value(i),1,weight(i));
     70      else
     71        ap.add(-1, value(i),1,weight(i));
     72      nof_samples_ = target.size();
    8273    }
    83     else{
    84       nof_samples_ = train_set.size();
    85       x = gslapi::vector(nof_samples_);
    86       y = gslapi::vector(nof_samples_);
    87       wx = gslapi::vector(nof_samples_);
    88       wy = gslapi::vector(nof_samples_);
    89       for (size_t i=0; i<train_set.size(); i++){
    90         x(i)=vec1(train_set[i]);
    91         y(i)=vec2(train_set[i]);
    92         wx(i)=w1(train_set[i]);
    93         wy(i)=w2(train_set[i]);
    94       }
    95     }
    96 
    97     centralize(x,wx);
    98     centralize(y,wy);
    99 
    100     x.mul(wx);
    101     y.mul(wy);
    102 
    103     r_ = ( (x*y) / sqrt( (x*x)*(y*y)) );
    104     weighted_=true;
    105 
     74    r_ = ap.correlation();
    10675    if (r_<0 && absolute_)
    107       r_=-r_;
     76      return -r_;
    10877     
    10978    return r_;
  • trunk/lib/statistics/Pearson.h

    r414 r475  
    77
    88namespace theplu {
    9   class gslapi::vector;
     9namespace classifier {
     10  class VectorAbstract;
     11}
    1012
    1113namespace statistics { 
     
    3638    /// of Pearson is used.
    3739    ///
    38     double score(const gslapi::vector&, const gslapi::vector&, 
    39                  const std::vector<size_t>& = std::vector<size_t>());
     40    double score(const classifier::Target& target,
     41                 const gslapi::vector& value);
    4042
    4143    ///
     
    4850    /// correlation.
    4951    ///
    50     inline double score(const gslapi::vector& x, const gslapi::vector& y,
    51                         const gslapi::vector& w,
    52                         const std::vector<size_t>& train_set =
    53                         std::vector<size_t>())
    54     { return score(x,y,w,w,train_set); }
     52    double score(const classifier::Target& target,
     53                 const gslapi::vector& value,
     54                 const gslapi::vector& weight);
    5555
    56     ///
    57     /// \f$ \frac{\vert \sum_iw^x_iw^y_i(x_i-m_x)(y_i-m_y)\vert }
    58     /// {\sqrt{\sum_iw^x_iw^y_i(x_i-m_x)^2 ///
    59     /// \sum_iw^x_iw^y_i(y_i-m_y)^2}}\f$, where \f$m_x = \frac{\sum
    60     /// w^x_iw^y_ix_i}{\sum w^x_iw^y_i}\f$ and \f$m_y = \frac{\sum
    61     /// w_ix_i}{\sum w_i}\f$. This expression is chosen to get a
    62     /// correlation equal to unity when \a x and \a y are
    63     /// equal. @return absolute value of weighted version of Pearson
    64     /// correlation.
    65     ///
    66     double score(const gslapi::vector& x, const gslapi::vector& y, 
    67                  const gslapi::vector& wx, const gslapi::vector& wy,
    68                  const std::vector<size_t>&);
    69    
    7056    ///
    7157    /// The p-value is the probability of getting a correlation as
     
    8268
    8369
    84     void centralize(gslapi::vector&, const gslapi::vector&);
     70    //    void centralize(gslapi::vector&, const gslapi::vector&);
    8571  };
    8672
  • trunk/lib/statistics/ROC.cc

    r303 r475  
    22
    33#include <c++_tools/statistics/ROC.h>
    4 
    54#include <c++_tools/utility/stl_utility.h>
    65#include <c++_tools/gslapi/vector.h>
     
    98
    109#include <cmath>
    11 #include <iostream>
    1210#include <utility>
    1311#include <vector>
     
    1513
    1614namespace theplu {
     15  class gslapi::vector;
    1716namespace statistics { 
    1817
    1918  ROC::ROC(bool b)
    20     : Score(b), area_(-1), minimum_size_(10), nof_pos_(0), 
    21       value_(std::vector<std::pair<double, double> >())
    22      
    23        
     19    : Score(b), area_(0.5), minimum_size_(10), nof_pos_(0) 
    2420  {
    2521  }
     
    3026    // Not integrating from the middle of the bin, but from the inner edge.
    3127    if (x>0)
    32       x -= 0.5/nof_pos_/(value_.size()-nof_pos_);
     28      x -= 0.5/nof_pos_/(n()-nof_pos_);
    3329    else if(x<0)
    34       x += 0.5/nof_pos_/(value_.size()-nof_pos_);
     30      x += 0.5/nof_pos_/(n()-nof_pos_);
    3531
    36     double sigma = (std::sqrt( (value_.size()-nof_pos_)*nof_pos_*
    37                                (value_.size()+1.0)/12 ) /
    38                     ( value_.size() - nof_pos_ ) / nof_pos_ );
     32    double sigma = (std::sqrt( (n()-nof_pos_)*nof_pos_*
     33                               (n()+1.0)/12 ) /
     34                    ( n() - nof_pos_ ) / nof_pos_ );
    3935    double p = gsl_cdf_gaussian_Q(x, sigma);
    4036   
     
    6258    if (weighted_)
    6359      return 1.0;
    64     else if (nof_pos_ < minimum_size_ & value_.size()-nof_pos_ < minimum_size_)
    65       return get_p_exact(area_*nof_pos_*(value_.size()-nof_pos_),
    66                           nof_pos_, value_.size()-nof_pos_);
     60    else if (nof_pos_ < minimum_size_ & n()-nof_pos_ < minimum_size_)
     61      return get_p_exact(area_*nof_pos_*(n()-nof_pos_),
     62                          nof_pos_, n()-nof_pos_);
    6763    else
    6864      return get_p_approx(area_);
     
    7066  }
    7167
    72   double ROC::score(const gslapi::vector& target, const gslapi::vector& data,
    73                     const std::vector<size_t>& train_set)
     68  double ROC::score(const classifier::Target& target,
     69                    const gslapi::vector& value)
    7470  {
    7571    weighted_=false;
    76     target_ = target;
    77     data_ = data;
    78     if (!train_set.size()){
    79       train_set_.resize(0);
    80       for (size_t i=0; i<target_.size(); i++)
    81         train_set_.push_back(i); 
     72
     73    vec_pair_.clear();
     74    vec_pair_.reserve(target.size());
     75    for (unsigned int i=0; i<target.size(); i++){
     76      int target_tmp = target(i);
     77      double value_tmp = value(i);
     78      vec_pair_.push_back(std::make_pair(target_tmp,value_tmp));
     79      //      vec_pair_.push_back(std::make_pair(target(i),value(i)));
    8280    }
    83     else
    84       train_set_ = train_set;
    85     sort();
     81    std::sort(vec_pair_.begin(),vec_pair_.end(),
     82              utility::pair_value_compare<int, double>());
     83
     84
    8685    area_ = 0;
    87     for (size_t i=0; i<value_.size(); i++)
    88       if (value_[i].first==1)
     86    nof_pos_=0;
     87    for (size_t i=0; i<n(); i++){
     88      if (class_one(vec_pair_[i].first)){
    8989        area_+=i;
     90        nof_pos_++;
     91      }
     92    }
    9093
    9194    // Normalizing the area to [0,1]
    9295    area_ = ( (area_-nof_pos_*(nof_pos_-1)/2 ) /
    93               (nof_pos_*(value_.size()-nof_pos_)) );
     96              (nof_pos_*(n()-nof_pos_)) );
    9497   
    9598    //Returning score larger 0.5 that you get by random
    9699    if (area_<0.5 && absolute_)
    97100      area_=1.0-area_;
    98    
     101
    99102    return area_;
    100103  }
    101104
    102   double ROC::score(const gslapi::vector& target, const gslapi::vector& data,
    103                     const gslapi::vector& weight,
    104                     const std::vector<size_t>& train_set)
     105  // Peter, should be possible to do this in NlogN
     106  double ROC::score(const classifier::Target& target,
     107                    const gslapi::vector& value,
     108                    const gslapi::vector& weight)
    105109  {
    106110    weighted_=true;
    107     target_ = target;
    108     data_ = data;
    109     weight_=weight;
    110     if (!train_set.size()){
    111       train_set_.resize(0);
    112       for (size_t i=0; i<target_.size(); i++)
    113         train_set_.push_back(i); 
    114     }
    115     else
    116       train_set_ = train_set;
    117     sort();
     111
     112    vec_pair_.clear();
     113    vec_pair_.reserve(target.size());
     114    for (unsigned int i=0; i<target.size(); i++)
     115      vec_pair_.push_back(std::make_pair(target(i),value(i)));
     116    std::sort(vec_pair_.begin(),vec_pair_.end(),
     117              utility::pair_value_compare<int, double>());
     118
    118119    area_=0;
     120    nof_pos_=0;
    119121    double max_area=0;
    120     //Peter, use the sort to skip some ifs and loops
    121     for (size_t i=0; i<value_.size(); i++){
    122       if (target_(train_set_[i])==1){
    123         for (size_t j=0; j<value_.size(); j++){
    124           if (target_(train_set_[j])!=1){
    125             if (data_(train_set_[i]) > data_(train_set_[j])){
    126               area_+=weight_(train_set_[i])*weight_(train_set_[j]);
    127             }
    128             max_area+=weight_(train_set_[i])*weight_(train_set_[j]);
     122
     123    for (size_t i=0; i<n(); i++)
     124      if (class_one(target(i)))
     125        for (size_t j=0; j<n(); j++)
     126          if (!class_one(target(j))){
     127            if (value(i)>value(j))
     128              area_+=weight(i)*weight(j);
     129            max_area+=weight(i)*weight(j);
    129130          }
    130         }
    131       }
    132     }
     131   
    133132    area_/=max_area;
    134133   
     
    139138  }
    140139
    141   void ROC::sort()
     140  int ROC::target(const size_t i) const
    142141  {
    143     value_.resize(0);
    144     nof_pos_=0;
    145     for (unsigned int i=0; i<train_set_.size(); i++){
    146       std::pair<double, double> tmp(target_(train_set_[i]),
    147                                     data_(train_set_[i]));
    148       value_.push_back(tmp);
    149       if (target_(train_set_[i])==1)
    150         nof_pos_++;
    151     }
    152     std::sort(value_.begin(),value_.end(),
    153          utility::pair_value_compare<double, double>());
    154   }
    155 
    156   gslapi::vector ROC::target(void) const
    157   {
    158     gslapi::vector target(value_.size());
    159     for(size_t i=0; i<target.size(); ++i)
    160       target(i) = value_[i].first;
    161     return target;
     142    return vec_pair_[i].first;
    162143  }
    163144
     
    168149    double sens = 1;
    169150    double spec = 0;
    170     gslapi::vector target = r.target();
    171     size_t n = target.size();
    172     double nof_pos = (target.sum()+n)/2;
     151    size_t n = r.n();
     152    double nof_pos = r.n_pos();
    173153    for(size_t i=0; i<n-1; ++i) {
    174154      s << sens << "\t";
    175155      s << spec << "\n";
    176       if (target(i)==1)
     156      if (r.class_one(r.target(i)))
    177157        spec -= 1/(n-nof_pos);
    178158      else
  • trunk/lib/statistics/ROC.h

    r465 r475  
    44#define _theplu_statistics_roc_
    55
    6 #include <c++_tools/gslapi/vector.h>
     6#include <c++_tools/classifier/Target.h>
    77#include <c++_tools/statistics/Score.h>
    88
     
    3232    /// Function taking \a value, \a target (+1 or -1) and vector
    3333    /// defining what samples to use. The score is equivalent to
    34     /// Mann-Whitney statistics. If target is equal to 1, sample
    35     /// belonges to class + otherwise sample belongs to class
    36     /// -. @return the area under the ROC curve. If the area is less
     34    /// Mann-Whitney statistics.
     35    /// @return the area under the ROC curve. If the area is less
    3736    /// than 0.5 and absolute=true, 1-area is returned. Complexity is
    3837    /// \f$ N\log N \f$ where \f$ N \f$ is number of samples.
    3938    ///
    40     double score(const gslapi::vector& target, const gslapi::vector& value,
    41                  const std::vector<size_t>& = std::vector<size_t>());
     39    double score(const classifier::Target& target,
     40                 const gslapi::vector& value);
    4241   
    4342    /// Function taking values, target, weight and a vector defining
     
    5251    /// of samples.
    5352    ///
    54     double score(const gslapi::vector& target, const gslapi::vector& value,
    55                  const gslapi::vector& weight, 
    56                  const std::vector<size_t>& = std::vector<size_t>());
     53    double score(const classifier::Target& target,
     54                 const gslapi::vector& value,
     55                 const gslapi::vector& weight);
    5756       
    5857
     
    7170    ///
    7271    double p_value(void) const;
    73          
    74     ///
    75     /// @return the targets in train_set sorted with respect to the
    76     /// corresponding data
    77     ///
    78     gslapi::vector target(void) const;
    79 
     72   
    8073    ///
    8174    /// minimum_size is the threshold for when a normal
     
    8679    inline u_int& minimum_size(void){ return minimum_size_; } 
    8780
    88   private:
    89     double area_;
    90     u_int minimum_size_;
    91     u_int nof_pos_;
    92     /// pair of target and data. should always be sorted with respect to
    93     /// data.
    94     std::vector<std::pair<double, double> > value_;
    95    
     81    int target(const size_t i) const;
     82    inline size_t n(void) const { return vec_pair_.size(); }
     83    inline size_t n_pos(void) const { return nof_pos_; }
     84
     85  private:
     86   
    9687    /// Implemented as in MatLab 13.1
    9788    double get_p_approx(const double) const;
     
    10091    double get_p_exact(const double, const double, const double) const;
    10192
    102     /// sorting value_, should always be done when changing train_set_
    103     void sort(void);
    104        
     93    double area_;
     94    u_int minimum_size_;
     95    u_int nof_pos_;
     96    std::vector<std::pair<int, double> > vec_pair_;
    10597  };
    10698
  • trunk/lib/statistics/Score.cc

    r303 r475  
    77
    88  Score::Score(bool absolute)
    9     : absolute_(absolute), train_set_(std::vector<size_t>()), weighted_(true)
     9    : absolute_(absolute), positive_label_(1)
    1010  {
    1111  }
  • trunk/lib/statistics/Score.h

    r428 r475  
    77
    88namespace theplu {
     9namespace classifier {
     10  class Target;
     11  class DataLookup1D;
     12}
     13
    914namespace statistics {
    1015
     
    3237
    3338    ///
     39    /// Targets with this label are considered to be in positive
     40    /// group. All others are considered to be in negative
     41    /// group. Default is 1.
     42    ///
     43    /// @return label for positive class
     44    ///
     45    inline int& positive_label(void) { return positive_label_; }
     46
     47    ///
    3448    /// Function calculating the score. In absolute mode, also the
    3549    /// score using negated class labels is calculated, and the
     
    4155    /// @param target vector of targets (most often +1 -1)
    4256    /// @param value vector of the values
    43     /// @train_set defining which values to use (number of values used
    44     /// in the calculation is equal to size of \a train_set)
    4557    ///
    4658    virtual double
    47     score(const gslapi::vector& target,
    48           const gslapi::vector& value,
    49           const std::vector<size_t>& train_set = std::vector<size_t>()) = 0;
     59    score(const classifier::Target& target,
     60          const gslapi::vector& value) = 0;
     61 
     62    ///
     63    /// Function calculating the score. In absolute mode, also the
     64    /// score using negated class labels is calculated, and the
     65    /// largest of the two scores are calculated. Absolute mode should
     66    /// be used when two-tailed test is wanted.
     67    ///
     68    /// @return statistica.
     69    ///
     70    /// @param target vector of targets (most often +1 -1)
     71    /// @param value vector of the values
     72    ///
     73    inline double
     74    score(const classifier::Target& target,
     75          const classifier::DataLookup1D& value)
     76    { return score(target,gslapi::vector(value)); }
    5077 
    5178    ///
     
    6592    ///
    6693    virtual double
    67     score(const gslapi::vector& target,
     94    score(const classifier::Target& target,
    6895          const gslapi::vector& value,
    69           const gslapi::vector& weight,
    70           const std::vector<size_t>& = std::vector<size_t>()) = 0;
     96          const gslapi::vector& weight) = 0;
    7197
    7298    ///
    73     /// @return the one-sided p-value( if absolute true is used
    74     /// this is equivalent to the two-sided p-value.)
     99    /// Function calculating the weighted version of score. In
     100    /// absolute mode, also the score using negated class labels is
     101    /// calculated, and the largest of the two scores are
     102    /// calculated. Absolute mode should be used when two-tailed test
     103    /// is wanted.
    75104    ///
    76     /// virtual double p_value(void) const = 0;
    77  
    78    
     105    /// @return statistica (weighted version)
     106    ///
     107    /// @param target is +1 or -1
     108    /// @param value vector of the values
     109    /// @param weight vector of accompanied weight to the values
     110    /// @train_set defining which values to use (number of values used
     111    /// in the calculation is equal to size of \a train_set)
     112    ///
     113    inline double
     114    score(const classifier::Target& target,
     115          const classifier::DataLookup1D& value,
     116          const classifier::DataLookup1D& weight)
     117    { return score(target, gslapi::vector(value), gslapi::vector(weight)); }
     118
     119    inline bool class_one(int i) const { return i==positive_label_; }
    79120   
    80121  protected:
     122    inline bool weighted(void) const { return weighted_; }
     123
    81124    bool absolute_;
    82     gslapi::vector data_;   
    83     gslapi::vector target_;
    84     std::vector<size_t> train_set_;
    85     gslapi::vector weight_;
     125    int positive_label_;
    86126    bool weighted_;
    87127
  • trunk/lib/statistics/WilcoxonFoldChange.cc

    r465 r475  
    33#include <c++_tools/statistics/WilcoxonFoldChange.h>
    44#include <c++_tools/statistics/utility.h>
     5#include <c++_tools/classifier/Target.h>
     6#include <c++_tools/gslapi/vector.h>
    57
    68#include <cmath>
     9#include <vector>
    710
    811namespace theplu {
     
    1720
    1821
    19   double WilcoxonFoldChange::score(const gslapi::vector& target,
    20                                    const gslapi::vector& value,
    21                                    const std::vector<size_t>& train_set)
     22  double WilcoxonFoldChange::score(const classifier::Target& target,
     23                                   const gslapi::vector& value)
    2224  {
    23     if (!train_set_.size())
    24       for (size_t i=0; i<target_.size(); i++)
    25         train_set_.push_back(i); 
    26     else
    27       train_set_=train_set;
    28 
    2925    std::vector<double> distance;
    3026    //Peter, should reserve the vector to avoid reallocations   
    3127    weighted_=false;
    32     for (size_t i=0; i<train_set_.size(); i++) {
     28    for (size_t i=0; i<target.size(); i++) {
    3329      for (size_t j=0; i<j; j++) {
    34         if (target(train_set_[i])==target(train_set_[j])) continue;
    35         distance.push_back(value(train_set_[i])-value(train_set_[j]));
     30        if (target(i)==target(j)) continue;
     31        distance.push_back(value(i)-value(j));
    3632      }
    3733    }
     
    4137  }
    4238
    43   double WilcoxonFoldChange::score(const gslapi::vector& target,
     39  double WilcoxonFoldChange::score(const classifier::Target& target,
    4440                                   const gslapi::vector& value,
    45                                    const gslapi::vector& weight,
    46                                    const std::vector<size_t>& train_set)
     41                                   const gslapi::vector& weight)
    4742  {
    4843    return 0;
  • trunk/lib/statistics/WilcoxonFoldChange.h

    r465 r475  
    2929    /// in the calculation is equal to size of \a train_set)
    3030    ///
    31     double
    32     score(const gslapi::vector& target,
    33           const gslapi::vector& value,
    34           const std::vector<size_t>& train_set = std::vector<size_t>());
     31    double score(const classifier::Target& target,
     32                 const gslapi::vector& value);
    3533 
    36     /// @todo
     34    ///
    3735    /// @return difference of the weighted means of the two classes
    3836    ///
    39     /// @param target is +1 or -1
    4037    /// @param value vector of the values
    4138    /// @param weight vector of accompanied weight to the values
     
    4542    /// @note not implemented
    4643    ///
    47     double
    48     score(const gslapi::vector& target,
    49           const gslapi::vector& value,
    50           const gslapi::vector& weight,
    51           const std::vector<size_t>& train_set = std::vector<size_t>());
     44    double score(const classifier::Target& target,
     45                 const gslapi::vector& value,
     46                 const gslapi::vector& weight);
    5247 
    5348  private:
  • trunk/lib/statistics/tScore.cc

    r414 r475  
    77#include <c++_tools/statistics/tScore.h>
    88#include <c++_tools/statistics/Averager.h>
    9 #include <c++_tools/gslapi/vector.h>
    109#include <c++_tools/statistics/AveragerWeighted.h>
     10#include <c++_tools/classifier/Target.h>
    1111
    1212namespace theplu {
     
    1414
    1515  tScore::tScore(bool b)
    16     : Score(b),  t_(0), train_set_(), weight_()
     16    : Score(b),  t_(0)
    1717  {
    1818  }
    1919
    20   double tScore::score(const gslapi::vector& target,
    21                        const gslapi::vector& data,
    22                        const std::vector<size_t>& train_set)
     20  double tScore::score(const classifier::Target& target,
     21                       const gslapi::vector& value)
    2322  {
    2423    weighted_=false;
    25     if (!train_set_.size())
    26       for (size_t i=0; i<target_.size(); i++)
    27         train_set_.push_back(i); 
    28     else
    29       train_set_=train_set;
    30     target_ = target;
    31     data_ = data;
    32     weight_ = gslapi::vector(target.size(),1);
    3324    statistics::Averager positive;
    3425    statistics::Averager negative;
    35     for(size_t i=0; i<train_set_.size(); i++){
    36       if (target_[train_set_[i]]==1)
    37         positive.add(data_[train_set_[i]]);
     26    dof_=target.size()-2;
     27    for(size_t i=0; i<target.size(); i++){
     28      if (class_one(target(i)))
     29        positive.add(value(i));
    3830      else
    39         negative.add(data_[train_set_[i]]);
     31        negative.add(value(i));
    4032    }
    4133    double diff = positive.mean() - negative.mean();
     
    4941  }
    5042
    51   double tScore::score(const gslapi::vector& target,
    52                        const gslapi::vector& data,
    53                        const gslapi::vector& weight,
    54                        const std::vector<size_t>& train_set)
     43  double tScore::score(const classifier::Target& target,
     44                       const gslapi::vector& value,
     45                       const gslapi::vector& weight)
    5546  {
    5647    weighted_=true;
    57     if (!train_set_.size())
    58       for (size_t i=0; i<target_.size(); i++)
    59         train_set_.push_back(i); 
    60     else
    61       train_set_=train_set;
    62     target_ = target;
    63     weight_ = weight;
     48
    6449    statistics::AveragerWeighted positive;
    6550    statistics::AveragerWeighted negative;
    66     for(size_t i=0; i<train_set_.size(); i++){
    67       if (target_[train_set_[i]]==1)
    68         positive.add(data_(train_set_[i]),weight_(train_set_[i]));
     51    dof_=target.size()-2;
     52    for(size_t i=0; i<target.size(); i++){
     53      if (class_one(target(i)))
     54        positive.add(value(i),weight(i));
    6955      else
    70         negative.add(data_(train_set_[i]),weight_(train_set_[i]));
     56        negative.add(value(i),weight(i));
    7157    }
    7258    double diff = positive.mean() - negative.mean();
     
    8268  double tScore::p_value(void) const
    8369  {
    84     double dof = target_.size()-2;
    85     double p = gsl_cdf_tdist_Q(t_, dof);
    86     return (dof > 0 && !weighted_) ? p : 1;
     70    double p = gsl_cdf_tdist_Q(t_, dof_);
     71    return (dof_ > 0 && !weighted_) ? p : 1;
    8772  }
    8873
  • trunk/lib/statistics/tScore.h

    r414 r475  
    3737    /// absolute value of t-score is returned
    3838    ///
    39     double score(const gslapi::vector&, const gslapi::vector&, 
    40                  const std::vector<size_t>& = std::vector<size_t>());
     39    double score(const classifier::Target& target,
     40                 const gslapi::vector& value);
    4141
    4242    ///
     
    4444    /// absolute value of t-score is returned
    4545    ///
    46     double score(const gslapi::vector&, const gslapi::vector&, 
    47                  const gslapi::vector&,
    48                  const std::vector<size_t>& = std::vector<size_t>());
     46    double score(const classifier::Target& target,
     47                 const gslapi::vector& value,
     48                 const gslapi::vector& weight);
    4949
    5050    ///
     
    6161  private:
    6262    double t_;
    63     gslapi::vector target_;
    64     std::vector<size_t> train_set_;
    65     gslapi::vector value_;
    66     gslapi::vector weight_;
     63    int dof_;
    6764       
    6865  };
  • trunk/lib/utility/stl_utility.h

    r437 r475  
    4141  template <class T1,class T2>
    4242  struct pair_value_compare
     43  {
     44    ///
     45    /// @return true if x.second<y.second or (x.second==y.second and
     46    /// x.first<y.first)
     47    ///
     48    inline bool operator()(const std::pair<T1,T2>& x,
     49                           const std::pair<T1,T2>& y) {
     50      return ((x.second<y.second) ||
     51              (!(y.second<x.second) && (x.first<y.first)));
     52    }
     53  };
     54
     55  ///
     56  ///
     57  template <class T1,class T2>
     58  struct pointer_compare
    4359  {
    4460    ///
  • trunk/test/consensus_inputranker_test.cc

    r453 r475  
    66#include <c++_tools/statistics/ROC.h>
    77#include <c++_tools/gslapi/matrix.h>
     8#include <c++_tools/classifier/MatrixLookup.h>
     9#include <c++_tools/classifier/CrossSplitting.h>
    810
    911#include <cstdlib>
     
    1416using namespace std;
    1517
    16 int main()
     18int main(const int argc,const char* argv[])
    1719
     20  std::ostream* error;
     21  if (argc>1 && argv[1]==std::string("-v"))
     22    error = &std::cerr;
     23  else {
     24    error = new std::ofstream("/dev/null");
     25    if (argc>1)
     26      std::cout << "consensus_inputranker_test -v : for printing extra information\n";
     27  }
     28  *error << "testing consensus_inputranker" << std::endl;
     29  bool ok = true;
     30
    1831  ifstream is("data/rank_data.txt");
    19   theplu::gslapi::matrix data(is);
     32  theplu::gslapi::matrix data_tmp(is);
     33  theplu::classifier::MatrixLookup data(data_tmp);
    2034  is.close();
    2135
    2236  is.open("data/rank_target.txt");
    23   theplu::gslapi::vector target(is);
     37  theplu::classifier::Target target(is);
    2438  is.close();
    2539
     40 
    2641  theplu::statistics::ROC roc;
    27   theplu::classifier::ConsensusInputRanker cir(data,target,roc,10,3);
     42  theplu::classifier::CrossSplitting sampler(target,3);
     43  theplu::classifier::ConsensusInputRanker cir(data,target,roc,sampler,30);
    2844
    2945  if (cir.id(0)!=2 || cir.id(1)!=0 || cir.id(2)!=1){
    30     cerr << "wrong id" << endl;
    31     return -1;
     46    *error << "incorrect id" << endl;
     47    ok = false;
    3248  }
    3349 
    3450  if (cir.rank(0)!=1 || cir.rank(1)!=2 || cir.rank(2)!=0){
    35     cerr << "wrong rank" << endl;
    36     return -1;
     51    *error << "incorrect rank" << endl;
     52    ok=false;
    3753  }
    3854
    3955  theplu::gslapi::matrix flag(data.rows(),data.columns(),1);
    40   theplu::classifier::ConsensusInputRanker cir2(data,target,flag,roc,10,3);
     56  theplu::classifier::ConsensusInputRanker
     57    cir2(data,target,flag,roc,sampler,30);
    4158
    4259  if (cir2.id(0)!=2 || cir2.id(1)!=0 || cir2.id(2)!=1){
    43     cerr << "wrong id" << endl;
    44     return -1;
     60    *error << "incorrect id for weighted" << endl;
     61    ok=false;
    4562  }
    4663 
    4764  if (cir2.rank(0)!=1 || cir2.rank(1)!=2 || cir2.rank(2)!=0){
    48     cerr << "wrong rank" << endl;
    49     return -1;
     65    *error << "incorrect rank for weighted" << endl;
     66    ok=false;
    5067  }
    5168
    5269 
    53   return 0;
     70  if (error!=&std::cerr)
     71    delete error;
     72
     73  if(ok)
     74    return 0;
     75  return -1;
    5476}
  • trunk/test/crossvalidation_test.cc

    r463 r475  
    22
    33#include <c++_tools/classifier/CrossSplitting.h>
    4 #include <c++_tools/gslapi/vector.h>
     4#include <c++_tools/classifier/Target.h>
     5//#include <c++_tools/gslapi/vector.h>
    56
    67#include <cstdlib>
     
    2425  *error << "testing crosssplitting" << std::endl;
    2526  bool ok = true;
    26   gslapi::vector target(10,1);
     27  classifier::Target target(10,1);
    2728  for (size_t i=0; i<5; i++)
    2829    target(i)=-1;
    2930
    3031  classifier::CrossSplitting cv(target,3);
    31 
    32 
    3332  std::vector<size_t> training_set;
    3433  std::vector<size_t> count(10);
    35 
    3634  training_set = cv.next();
    3735  for (unsigned int i=0; i<training_set.size(); i++)
    3836    count[training_set[i]]++;
    39  
    4037  training_set = cv.next();
    4138  for (unsigned int i=0; i<training_set.size(); i++)
    4239    count[training_set[i]]++;
    43 
    4440  training_set = cv.next();
    4541  for (unsigned int i=0; i<training_set.size(); i++)
    4642    count[training_set[i]]++;
    47 
    4843  for (unsigned int i=0; i<10 ; i++)
    4944    ok = ok && (count[i]==2);
     45  if (!ok)
     46    *error << "Each sample did not occur twice in 3-fold cross-splitting"
     47           << std::endl;
    5048
    51   if (!ok)
    52     *error << "crossvalidation failed" << std::endl;
     49  classifier::CrossSplitting cv2(target,3);
     50  training_set = cv2.next();
     51  if (target(training_set[0])*target(training_set[1]) == 1){
     52    *error << "Error: Two samples from same class"
     53           << std::endl;
     54    ok=false;
     55  }
    5356
    5457  if (error!=&std::cerr)
  • trunk/test/inputranker_test.cc

    r453 r475  
    44#include <c++_tools/statistics/ROC.h>
    55#include <c++_tools/gslapi/matrix.h>
     6#include <c++_tools/gslapi/matrix.h>
     7#include <c++_tools/classifier/MatrixLookup.h>
     8#include <c++_tools/classifier/Target.h>
    69
    710#include <cstdlib>
     
    912#include <iostream>
    1013
    11 using namespace theplu;
    1214
    13 int main()
    14 
     15
     16int main(const int argc,const char* argv[])
     17{
     18  using namespace theplu;
     19  std::ostream* error;
     20  if (argc>1 && argv[1]==std::string("-v"))
     21    error = &std::cerr;
     22  else {
     23    error = new std::ofstream("/dev/null");
     24    if (argc>1)
     25      std::cout << "inputranker_test -v : for printing extra information\n";
     26  }
     27  *error << "testing inputranker" << std::endl;
     28  bool ok = true;
     29
    1530  std::ifstream is("data/rank_data.txt");
    16   gslapi::matrix data(is);
     31  theplu::gslapi::matrix data_tmp(is);
     32  theplu::classifier::MatrixLookup data(data_tmp);
    1733  is.close();
    1834
    1935  is.open("data/rank_target.txt");
    20   theplu::gslapi::vector target(is);
     36  classifier::Target target(is);
    2137  is.close();
    2238
    2339  statistics::ROC roc;
    2440  classifier::InputRanker ir(data,target,roc);
    25 
    2641  if (ir.id(0)!=2 || ir.id(1)!=0 || ir.id(2)!=1){
    27     std::cerr << "wrong id" << std::endl;
    28     return -1;
     42    *error << "wrong id" << std::endl;
     43    ok=false;
    2944  }
    3045 
     46  *error << "second test" << std::endl;
    3147  if (ir.rank(0)!=1 || ir.rank(1)!=2 || ir.rank(2)!=0){
    32     std::cerr << "wrong rank" << std::endl;
    33     return -1;
     48    *error << "wrong rank" << std::endl;
     49    ok=false;
    3450  }
    3551 
    36   return 0;
     52  if (error!=&std::cerr)
     53    delete error;
     54
     55  return (ok ? 0 : -1);
     56
    3757}
  • trunk/test/kernel_test.cc

    r463 r475  
    99#include <c++_tools/classifier/PolynomialKernelFunction.h>
    1010#include <c++_tools/classifier/GaussianKernelFunction.h>
    11 #include <c++_tools/classifier/KernelView.h>
     11#include <c++_tools/classifier/KernelLookup.h>
    1212#include <c++_tools/classifier/Kernel_MEV.h>
    1313#include <c++_tools/classifier/Kernel_SEV.h>
     
    3636  index[1]=2;
    3737  index[2]=3;
    38   classifier::KernelView kv(kernel,index);
     38  classifier::KernelLookup kv(kernel,index,index);
    3939  if (kv.rows()!=index.size()){
    40     *error << "Error: KernelView(kernel, index)\n" << std::endl
    41            << "Size of KernelView is " << kv.rows() << std::endl
     40    *error << "Error: KernelLookup(kernel, index)\n" << std::endl
     41           << "Size of KernelLookup is " << kv.rows() << std::endl
    4242           << "expected " << index.size() << std::endl;
    4343   
    4444    return false;
    4545  }
    46   classifier::KernelView kv2(kernel);
     46  classifier::KernelLookup kv2(kernel);
    4747  if (kv2.rows()!=kernel.size()){
    48     *error << "Error: KernelView(kernel)\n" << std::endl
    49            << "Size of KernelView is " << kv.rows() << std::endl
     48    *error << "Error: KernelLookup(kernel)\n" << std::endl
     49           << "Size of KernelLookup is " << kv.rows() << std::endl
    5050           << "expected " << kernel.size() << std::endl;
    5151   
     
    7171  index[1]=2;
    7272  index[2]=3;
    73   classifier::KernelView kv(kernel,index);
     73  classifier::KernelLookup kv(kernel,index, index);
    7474  if (kv.rows()!=index.size()){
    75     *error << "Error: KernelView(kernel, index)\n" << std::endl
    76            << "Size of KernelView is " << kv.rows() << std::endl
     75    *error << "Error: KernelLookup(kernel, index)\n" << std::endl
     76           << "Size of KernelLookup is " << kv.rows() << std::endl
    7777           << "expected " << index.size() << std::endl;
    7878   
    7979    return false;
    8080  }
    81   classifier::KernelView kv2(kernel);
     81  classifier::KernelLookup kv2(kernel);
    8282  if (kv2.rows()!=kernel.size()){
    83     *error << "Error: KernelView(kernel)\n" << std::endl
    84            << "Size of KernelView is " << kv.rows() << std::endl
     83    *error << "Error: KernelLookup(kernel)\n" << std::endl
     84           << "Size of KernelLookup is " << kv.rows() << std::endl
    8585           << "expected " << kernel.size() << std::endl;
    8686   
  • trunk/test/score_test.cc

    r463 r475  
    3232  *error << "testing ROC" << std::endl;
    3333  gslapi::vector value(31);
    34   gslapi::vector target(31,-1);
     34  classifier::Target target(31,-1);
    3535  for (unsigned int i=0; i<16; i++)
    3636    target(i) = 1;
     
    4040  double area = roc.score(target, value);
    4141  if (area!=1.0){
    42     *error << "test_roc a: area is " << area << " should be 1.0"
     42    *error << "test_roc: area is " << area << " should be 1.0"
    4343           << std::endl;
    4444    ok = false;
     
    7070
    7171  is.open("data/rank_target.txt");
    72   gslapi::vector target2(is);
     72  classifier::Target target2(is);
    7373  is.close();
    7474 
  • trunk/test/svm_test.cc

    r463 r475  
    55#include <c++_tools/classifier/SVM.h>
    66#include <c++_tools/classifier/Kernel.h>
     7#include <c++_tools/classifier/KernelLookup.h>
    78#include <c++_tools/classifier/Kernel_SEV.h>
    89#include <c++_tools/classifier/Kernel_MEV.h>
     
    3839  data2(0,2)=1;
    3940  data2(1,2)=0;
    40   gslapi::vector target2(3);
     41  classifier::Target target2(3);
    4142  target2(0)=-1;
    4243  target2(1)=1;
     
    4647  assert(kernel2.size()==3);
    4748  assert(target2.size()==3);
    48   classifier::KernelView kv2(kernel2);
     49  classifier::KernelLookup kv2(kernel2);
    4950  *error << "testing with linear kernel" << std::endl;
    5051  assert(kv2.rows()==target2.size());
     
    5455  *error << " done." << std::endl;
    5556
    56   if (classifier2.alpha()*target2){
     57  double tmp=0;
     58  for (size_t i=0; i<target2.size(); i++)
     59    tmp += classifier2.alpha()(i)*target2(i);
     60 
     61  if (tmp){
    5762    *error << "condition not fullfilled" << std::endl;
    5863    return -1;
     
    8085
    8186  is.open("data/nm_target_bin.txt");
    82   theplu::gslapi::vector target(is);
     87  classifier::Target target(is);
    8388  is.close();
    8489
     
    8792  is.close();
    8893
    89   classifier::KernelView kv(kernel);
     94  classifier::KernelLookup kv(kernel);
    9095  theplu::classifier::SVM svm(kv, target);
    9196  if (!svm.train()){
     
    108113  double slack = 0;
    109114  for (unsigned int i=0; i<target.size(); i++){
    110     if (output[i]*target[i] < 1){
    111       slack += 1 - output[i]*target[i];
     115    if (output(i)*target(i) < 1){
     116      slack += 1 - output(i)*target(i);
    112117    }
    113118  }
Note: See TracChangeset for help on using the changeset viewer.