Changeset 473


Ignore:
Timestamp:
Dec 22, 2005, 3:44:54 PM (16 years ago)
Author:
Peter
Message:

fixed the bugs!

Location:
branches/peters_vector
Files:
15 edited

Legend:

Unmodified
Added
Removed
  • branches/peters_vector/lib/classifier/ConsensusInputRanker.cc

    r470 r473  
    1414#include <utility>
    1515#include <vector>
     16#include <iostream>
    1617
    1718namespace theplu {
     
    2526
    2627  {
    27     for (size_t i=0; i<n; i++){
     28    for (size_t i=0; i<nof_rankers_; i++){
    2829      std::vector<size_t> index=sampler.next();
    2930      input_rankers_.push_back(InputRanker(MatrixLookup(data,index),
     
    3233    }
    3334    // Sorting with respect to median rank
    34     std::vector<std::pair<size_t,double> > medians(data.rows());
     35    std::vector<std::pair<double,size_t> > medians(data.rows());
    3536    for (size_t i=0; i<data.rows(); i++){
    3637      std::vector<size_t> ranks(nof_rankers_);
    37       for (size_t j=0; j<nof_rankers_; j++)
     38      for (size_t j=0; j<nof_rankers_; j++) {
    3839        ranks[j]=input_rankers_[j].rank(i);
    39      
    40       medians[i].first = i;
    41       medians[i].second = statistics::median(ranks);
     40      }
     41      medians[i].first = statistics::median(ranks);
     42      medians[i].second = i;
    4243    }
    4344   
    4445    //sort medians and assign id_ and rank_
    45     sort(medians.begin(), medians.end(),
    46          utility::pair_value_compare<size_t, double>());
     46    sort(medians.begin(), medians.end());
    4747    id_.resize(data.rows());
    4848    rank_.resize(data.rows());
    4949    for (size_t i=0; i<data.rows(); i++){
    50       id_[i]=medians[i].first;
    51       rank_[id_[i]]=i;           
     50      id_[i]=medians[i].second;
     51      rank_[id_[i]]=i;
    5252    }
    5353
     
    5959    : nof_rankers_(n)
    6060  {
    61 
    62     for (size_t i=0; i<n; i++){
    63       std::vector<size_t> index=sampler.next();
     61    for (size_t i=0; i<nof_rankers_; i++){
     62      std::vector<size_t> index = sampler.next();
    6463      input_rankers_.push_back(InputRanker(MatrixLookup(data,index),
    6564                                           Target(target,index),
     
    6968   
    7069    // Sorting with respect to median rank
    71     std::vector<std::pair<size_t,double> > median(data.rows());
     70    std::vector<std::pair<double, size_t> > median(data.rows());
    7271    for (size_t i=0; i<data.rows(); i++){
    7372      std::vector<size_t> ranks(nof_rankers_);
    7473      for (size_t j=0; j<nof_rankers_; j++)
    7574        ranks[j]=input_rankers_[j].rank(i);
    76       median[i].first = i;
    77       median[i].second = statistics::median(ranks);
     75      median[i].first = statistics::median(ranks);
     76      median[i].second = i;
    7877    }
    7978   
    8079    //sort medians and assign id_ and rank_
    81     sort(median.begin(), median.end(),
    82          utility::pair_value_compare<size_t, double>());
     80    sort(median.begin(), median.end());
    8381    id_.resize(data.rows());
    8482    rank_.resize(data.rows());
    8583    for (size_t i=0; i<data.rows(); i++){
    86       id_[i]=median[i].first;
     84      id_[i]=median[i].second;
    8785      rank_[id_[i]]=i;           
    8886    }
  • branches/peters_vector/lib/classifier/ConsensusInputRanker.h

    r470 r473  
    4747
    4848    ///
    49     /// Highest ranked row is ranked as number zero @return index of
    50     /// row ranked as number \a i
     49    /// Row with lowest rank (highest score) is ranked as number zero
     50    /// @return index of row ranked as number \a i
    5151    ///
    5252    inline size_t id(const size_t i) const {return id_[i];}
    5353   
    5454    ///
    55     /// Highest ranked row is ranked as number zero @return rank for
    56     /// row \a i
     55    /// Row with lowest rank (highest score) is ranked as number zero
     56    /// @return rank for row \a i
    5757    ///
    5858    inline size_t rank(const size_t i) const {return rank_[i];}
  • branches/peters_vector/lib/classifier/CrossSplitting.cc

    r460 r473  
    22
    33
    4 #include <c++_tools/gslapi/vector.h>
     4#include <c++_tools/classifier/Target.h>
    55#include <c++_tools/classifier/CrossSplitting.h>
    66#include <c++_tools/random/random.h>
     
    1111namespace classifier { 
    1212
    13   CrossSplitting::CrossSplitting(const theplu::gslapi::vector& target,
    14                                    const size_t k)
    15     :count_(0),index_negative_(std::vector<size_t>()),
    16      index_positive_(std::vector<size_t>()), k_(k)
     13  CrossSplitting::CrossSplitting(const Target& target, const size_t k)
     14    :count_(0), k_(k)
    1715 
    1816  {
     
    4442    size_t end = int(index_positive_.size()*count_/k_);
    4543    for (size_t i=0; i<index_positive_.size(); i++)
    46       if (i<begin || i>=end)
     44      if (i<begin || i>=end){
    4745        training_set.push_back(index_positive_[i]);
     46      }
    4847   
    4948    begin = int(index_negative_.size()*(count_-1)/k_);
  • branches/peters_vector/lib/classifier/CrossSplitting.h

    r460 r473  
    77
    88namespace theplu {
    9 namespace gslapi {
    10   class vector;
    11 }
    129namespace classifier { 
     10  class Target;
    1311
    1412  ///
     
    1614  /// crossvalidation manner.
    1715  ///   
     16  /// @note The interface of this class will most likely change pretty soon.
     17  ///
    1818  class CrossSplitting
    1919  {
     
    2323    /// Constructor taking \a target and \a k for k-fold cross validation
    2424    ///
    25     CrossSplitting(const theplu::gslapi::vector& target, const size_t k = 3);
     25    CrossSplitting(const theplu::classifier::Target& target, const size_t k);
    2626
    2727    ///
  • branches/peters_vector/lib/classifier/DataLookup1D.cc

    r470 r473  
    22
    33#include <c++_tools/classifier/DataLookup1D.h>
     4#include <iostream>
    45
    56namespace theplu {
     
    1213  }
    1314 
     15  DataLookup1D::~DataLookup1D()
     16  {
     17  }
    1418
    1519}} // of namespace classifier and namespace theplu
  • branches/peters_vector/lib/classifier/DataLookup1D.h

    r470 r473  
    3030    ///
    3131    ///
     32    ~DataLookup1D();
     33
     34    ///
     35    ///
     36    ///
    3237    inline size_t size(void) const
    3338      { return column_vector_ ? matrix_->rows() : matrix_->columns(); }
     
    3641    ///
    3742    ///
    38     inline double operator()(const size_t i) const
    39     { return column_vector_ ? (*matrix_)(i,index_) : (*matrix_)(index_,i); }
     43    inline double operator()(const size_t i) const
     44    { assert(i<size());
     45    return column_vector_ ? (*matrix_)(i,index_) : (*matrix_)(index_,i); }
    4046
    4147  private:
  • branches/peters_vector/lib/classifier/DataLookup2D.h

    r470 r473  
    55
    66#include <vector>
     7#include <iostream>
    78
    89namespace theplu {
  • branches/peters_vector/lib/classifier/InputRanker.cc

    r470 r473  
    99#include <c++_tools/utility/stl_utility.h>
    1010
     11#include <functional>
     12#include <utility>
    1113#include <vector>
    12 #include <utility>
     14
    1315
    1416namespace theplu {
     
    2426
    2527    //scoring each input
    26     std::vector<std::pair<size_t, double> > score;
     28    std::vector<std::pair<double, size_t> > score;
    2729    for (size_t i=0; i<nof_genes; i++) {
    28       double area = score_object.score(target,DataLookup1D(data,i,false));
    29       std::pair<size_t, double> tmp(i,area);
     30      DataLookup1D vector_value(data,i,false);
     31      double area = score_object.score(target,vector_value);
     32      //double area = score_object.score(target,DataLookup1D(data,i,false));
     33      std::pair<double, size_t> tmp(area,i);
    3034      score.push_back(tmp);
    3135    }
    3236
    3337    //sort the scores and assign id_ and rank_
    34     sort(score.begin(), score.end(),
    35          utility::pair_value_compare<size_t, double>());
     38    sort(score.begin(), score.end(), std::greater<std::pair<double,size_t> >());
    3639   
     40    id_.resize(nof_genes);
     41    rank_.resize(nof_genes);
    3742    for (size_t i=0; i<nof_genes; i++){
    38       id_[i]=score[nof_genes-i-1].first;
     43      id_[i]=score[i].second;
    3944      rank_[id_[i]]=i;           
    4045    }
     
    4954  {
    5055    size_t nof_genes = data.rows();
    51     //    size_t nof_samples = data.columns();
    5256
    5357    //scoring each input
    54     std::vector<std::pair<size_t, double> > score;
     58    std::vector<std::pair<double, size_t> > score;
    5559    for (size_t i=0; i<nof_genes; i++) {
    5660      double area = score_object.score(target, DataLookup1D(data,i,false),
    5761                                       DataLookup1D(weight,i,false));
    58       std::pair<size_t, double> tmp(i,area);
     62      std::pair<double, size_t> tmp(area,i);
    5963      score.push_back(tmp);
    6064    }
     65
    6166    //sort the scores and assign id_ and rank_
    62     sort(score.begin(), score.end(),
    63          utility::pair_value_compare<size_t, double>());
     67    sort(score.begin(), score.end(), std::greater<std::pair<double,size_t> >());
    6468
     69    id_.resize(nof_genes);
     70    rank_.resize(nof_genes);
    6571    for (size_t i=0; i<nof_genes; i++){
    66       id_[i]=score[nof_genes-i-1].first;
     72      id_[i]=score[i].second;
    6773      rank_[id_[i]]=i;           
    6874    }
     75
    6976  }
    7077
  • branches/peters_vector/lib/classifier/MatrixLookup.cc

    r470 r473  
    7171
    7272
     73
     74  std::ostream& operator<<(std::ostream& s, const MatrixLookup& m)
     75  {
     76    s.setf(std::ios::dec);
     77    s.precision(12);
     78    for(size_t i=0, j=0; i<m.rows(); i++)
     79      for (j=0; j<m.columns(); j++) {
     80        s << m(i,j);
     81        if (j<m.columns()-1)
     82          s << "\t";
     83        else if (i<m.rows()-1)
     84          s << "\n";
     85      }
     86    return s;
     87  }
     88
     89
     90
    7391}} // of namespace classifier and namespace theplu
  • branches/peters_vector/lib/classifier/MatrixLookup.h

    r470 r473  
    7373  }; 
    7474 
     75  ///
     76  /// The output operator MatrixLookup
     77  ///
     78  std::ostream& operator<< (std::ostream& s, const MatrixLookup&);
     79
    7580}} // of namespace classifier and namespace theplu
    7681
  • branches/peters_vector/lib/gslapi/vector.cc

    r470 r473  
    140140
    141141  vector::vector(const classifier::DataLookup1D& data)
     142    : view_(NULL), const_view_(NULL)
    142143  {
    143144    v_ = gsl_vector_alloc(data.size());
  • branches/peters_vector/lib/statistics/ROC.cc

    r471 r473  
    88
    99#include <cmath>
    10 #include <iostream>
    1110#include <utility>
    1211#include <vector>
     
    7473    vec_pair_.clear();
    7574    vec_pair_.reserve(target.size());
    76     for (unsigned int i=0; i<target.size(); i++)
    77       vec_pair_.push_back(std::make_pair(target(i),value(i)));
     75    for (unsigned int i=0; i<target.size(); i++){
     76      int target_tmp = target(i);
     77      double value_tmp = value(i);
     78      vec_pair_.push_back(std::make_pair(target_tmp,value_tmp));
     79      //      vec_pair_.push_back(std::make_pair(target(i),value(i)));
     80    }
    7881    std::sort(vec_pair_.begin(),vec_pair_.end(),
    7982              utility::pair_value_compare<int, double>());
     83
    8084
    8185    area_ = 0;
     
    9599    if (area_<0.5 && absolute_)
    96100      area_=1.0-area_;
    97    
     101
    98102    return area_;
    99103  }
    100104
     105  // Peter, should be possible to do this in NlogN
    101106  double ROC::score(const classifier::Target& target,
    102107                    const gslapi::vector& value,
  • branches/peters_vector/test/consensus_inputranker_test.cc

    r470 r473  
    1616using namespace std;
    1717
    18 int main()
     18int main(const int argc,const char* argv[])
    1919
     20  std::ostream* error;
     21  if (argc>1 && argv[1]==std::string("-v"))
     22    error = &std::cerr;
     23  else {
     24    error = new std::ofstream("/dev/null");
     25    if (argc>1)
     26      std::cout << "consensus_inputranker_test -v : for printing extra information\n";
     27  }
     28  *error << "testing consensus_inputranker" << std::endl;
     29  bool ok = true;
     30
    2031  ifstream is("data/rank_data.txt");
    2132  theplu::gslapi::matrix data_tmp(is);
     
    2940 
    3041  theplu::statistics::ROC roc;
    31   theplu::classifier::CrossSplitting sampler(3);
     42  theplu::classifier::CrossSplitting sampler(target,3);
    3243  theplu::classifier::ConsensusInputRanker cir(data,target,roc,sampler,30);
    3344
    3445  if (cir.id(0)!=2 || cir.id(1)!=0 || cir.id(2)!=1){
    35     cerr << "wrong id" << endl;
    36     return -1;
     46    *error << "incorrect id" << endl;
     47    ok = false;
    3748  }
    3849 
    3950  if (cir.rank(0)!=1 || cir.rank(1)!=2 || cir.rank(2)!=0){
    40     cerr << "wrong rank" << endl;
    41     return -1;
     51    *error << "incorrect rank" << endl;
     52    ok=false;
    4253  }
    4354
     
    4758
    4859  if (cir2.id(0)!=2 || cir2.id(1)!=0 || cir2.id(2)!=1){
    49     cerr << "wrong id" << endl;
    50     return -1;
     60    *error << "incorrect id for weighted" << endl;
     61    ok=false;
    5162  }
    5263 
    5364  if (cir2.rank(0)!=1 || cir2.rank(1)!=2 || cir2.rank(2)!=0){
    54     cerr << "wrong rank" << endl;
    55     return -1;
     65    *error << "incorrect rank for weighted" << endl;
     66    ok=false;
    5667  }
    5768
    5869 
    59   return 0;
     70  if (error!=&std::cerr)
     71    delete error;
     72
     73  if(ok)
     74    return 0;
     75  return -1;
    6076}
  • branches/peters_vector/test/crossvalidation_test.cc

    r463 r473  
    22
    33#include <c++_tools/classifier/CrossSplitting.h>
    4 #include <c++_tools/gslapi/vector.h>
     4#include <c++_tools/classifier/Target.h>
     5//#include <c++_tools/gslapi/vector.h>
    56
    67#include <cstdlib>
     
    2425  *error << "testing crosssplitting" << std::endl;
    2526  bool ok = true;
    26   gslapi::vector target(10,1);
     27  classifier::Target target(10,1);
    2728  for (size_t i=0; i<5; i++)
    2829    target(i)=-1;
    2930
    3031  classifier::CrossSplitting cv(target,3);
    31 
    32 
    3332  std::vector<size_t> training_set;
    3433  std::vector<size_t> count(10);
    35 
    3634  training_set = cv.next();
    3735  for (unsigned int i=0; i<training_set.size(); i++)
    3836    count[training_set[i]]++;
    39  
    4037  training_set = cv.next();
    4138  for (unsigned int i=0; i<training_set.size(); i++)
    4239    count[training_set[i]]++;
    43 
    4440  training_set = cv.next();
    4541  for (unsigned int i=0; i<training_set.size(); i++)
    4642    count[training_set[i]]++;
    47 
    4843  for (unsigned int i=0; i<10 ; i++)
    4944    ok = ok && (count[i]==2);
     45  if (!ok)
     46    *error << "Each sample did not occur twice in 3-fold cross-splitting"
     47           << std::endl;
    5048
    51   if (!ok)
    52     *error << "crossvalidation failed" << std::endl;
     49  classifier::CrossSplitting cv2(target,3);
     50  training_set = cv2.next();
     51  if (target(training_set[0])*target(training_set[1]) == 1){
     52    *error << "Error: Two samples from same class"
     53           << std::endl;
     54    ok=false;
     55  }
    5356
    5457  if (error!=&std::cerr)
  • branches/peters_vector/test/inputranker_test.cc

    r470 r473  
    66#include <c++_tools/gslapi/matrix.h>
    77#include <c++_tools/classifier/MatrixLookup.h>
     8#include <c++_tools/classifier/Target.h>
    89
    910#include <cstdlib>
     
    2829
    2930  std::ifstream is("data/rank_data.txt");
    30   *error << "load matrix" << std::endl;
    3131  theplu::gslapi::matrix data_tmp(is);
    32   *error << "copy matrix" << std::endl;
    3332  theplu::classifier::MatrixLookup data(data_tmp);
    34   *error << "Done" << std::endl;
    3533  is.close();
    3634
    37   *error << "Load target" << std::endl;
    3835  is.open("data/rank_target.txt");
    3936  classifier::Target target(is);
    4037  is.close();
    41   *error << "Done" << std::endl;
    4238
    4339  statistics::ROC roc;
    44   *error << "Performing ranking" << std::endl;
    4540  classifier::InputRanker ir(data,target,roc);
    46   *error << "first test" << std::endl;
    4741  if (ir.id(0)!=2 || ir.id(1)!=0 || ir.id(2)!=1){
    4842    *error << "wrong id" << std::endl;
Note: See TracChangeset for help on using the changeset viewer.