Changeset 473 for branches/peters_vector
- Timestamp:
- Dec 22, 2005, 3:44:54 PM (18 years ago)
- Location:
- branches/peters_vector
- Files:
-
- 15 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/peters_vector/lib/classifier/ConsensusInputRanker.cc
r470 r473 14 14 #include <utility> 15 15 #include <vector> 16 #include <iostream> 16 17 17 18 namespace theplu { … … 25 26 26 27 { 27 for (size_t i=0; i<n ; i++){28 for (size_t i=0; i<nof_rankers_; i++){ 28 29 std::vector<size_t> index=sampler.next(); 29 30 input_rankers_.push_back(InputRanker(MatrixLookup(data,index), … … 32 33 } 33 34 // Sorting with respect to median rank 34 std::vector<std::pair< size_t,double> > medians(data.rows());35 std::vector<std::pair<double,size_t> > medians(data.rows()); 35 36 for (size_t i=0; i<data.rows(); i++){ 36 37 std::vector<size_t> ranks(nof_rankers_); 37 for (size_t j=0; j<nof_rankers_; j++) 38 for (size_t j=0; j<nof_rankers_; j++) { 38 39 ranks[j]=input_rankers_[j].rank(i); 39 40 medians[i].first = i;41 medians[i].second = statistics::median(ranks);40 } 41 medians[i].first = statistics::median(ranks); 42 medians[i].second = i; 42 43 } 43 44 44 45 //sort medians and assign id_ and rank_ 45 sort(medians.begin(), medians.end(), 46 utility::pair_value_compare<size_t, double>()); 46 sort(medians.begin(), medians.end()); 47 47 id_.resize(data.rows()); 48 48 rank_.resize(data.rows()); 49 49 for (size_t i=0; i<data.rows(); i++){ 50 id_[i]=medians[i]. first;51 rank_[id_[i]]=i; 50 id_[i]=medians[i].second; 51 rank_[id_[i]]=i; 52 52 } 53 53 … … 59 59 : nof_rankers_(n) 60 60 { 61 62 for (size_t i=0; i<n; i++){ 63 std::vector<size_t> index=sampler.next(); 61 for (size_t i=0; i<nof_rankers_; i++){ 62 std::vector<size_t> index = sampler.next(); 64 63 input_rankers_.push_back(InputRanker(MatrixLookup(data,index), 65 64 Target(target,index), … … 69 68 70 69 // Sorting with respect to median rank 71 std::vector<std::pair< size_t,double> > median(data.rows());70 std::vector<std::pair<double, size_t> > median(data.rows()); 72 71 for (size_t i=0; i<data.rows(); i++){ 73 72 std::vector<size_t> ranks(nof_rankers_); 74 73 for (size_t j=0; j<nof_rankers_; j++) 75 74 ranks[j]=input_rankers_[j].rank(i); 76 median[i].first = i;77 median[i].second = statistics::median(ranks);75 median[i].first = statistics::median(ranks); 76 median[i].second = i; 78 77 } 79 78 80 79 //sort medians and assign id_ and rank_ 81 sort(median.begin(), median.end(), 82 utility::pair_value_compare<size_t, double>()); 80 sort(median.begin(), median.end()); 83 81 id_.resize(data.rows()); 84 82 rank_.resize(data.rows()); 85 83 for (size_t i=0; i<data.rows(); i++){ 86 id_[i]=median[i]. first;84 id_[i]=median[i].second; 87 85 rank_[id_[i]]=i; 88 86 } -
branches/peters_vector/lib/classifier/ConsensusInputRanker.h
r470 r473 47 47 48 48 /// 49 /// Highest ranked row is ranked as number zero @return index of50 /// row ranked as number \a i49 /// Row with lowest rank (highest score) is ranked as number zero 50 /// @return index of row ranked as number \a i 51 51 /// 52 52 inline size_t id(const size_t i) const {return id_[i];} 53 53 54 54 /// 55 /// Highest ranked row is ranked as number zero @return rank for56 /// row \a i55 /// Row with lowest rank (highest score) is ranked as number zero 56 /// @return rank for row \a i 57 57 /// 58 58 inline size_t rank(const size_t i) const {return rank_[i];} -
branches/peters_vector/lib/classifier/CrossSplitting.cc
r460 r473 2 2 3 3 4 #include <c++_tools/ gslapi/vector.h>4 #include <c++_tools/classifier/Target.h> 5 5 #include <c++_tools/classifier/CrossSplitting.h> 6 6 #include <c++_tools/random/random.h> … … 11 11 namespace classifier { 12 12 13 CrossSplitting::CrossSplitting(const theplu::gslapi::vector& target, 14 const size_t k) 15 :count_(0),index_negative_(std::vector<size_t>()), 16 index_positive_(std::vector<size_t>()), k_(k) 13 CrossSplitting::CrossSplitting(const Target& target, const size_t k) 14 :count_(0), k_(k) 17 15 18 16 { … … 44 42 size_t end = int(index_positive_.size()*count_/k_); 45 43 for (size_t i=0; i<index_positive_.size(); i++) 46 if (i<begin || i>=end) 44 if (i<begin || i>=end){ 47 45 training_set.push_back(index_positive_[i]); 46 } 48 47 49 48 begin = int(index_negative_.size()*(count_-1)/k_); -
branches/peters_vector/lib/classifier/CrossSplitting.h
r460 r473 7 7 8 8 namespace theplu { 9 namespace gslapi {10 class vector;11 }12 9 namespace classifier { 10 class Target; 13 11 14 12 /// … … 16 14 /// crossvalidation manner. 17 15 /// 16 /// @note The interface of this class will most likely change pretty soon. 17 /// 18 18 class CrossSplitting 19 19 { … … 23 23 /// Constructor taking \a target and \a k for k-fold cross validation 24 24 /// 25 CrossSplitting(const theplu:: gslapi::vector& target, const size_t k = 3);25 CrossSplitting(const theplu::classifier::Target& target, const size_t k); 26 26 27 27 /// -
branches/peters_vector/lib/classifier/DataLookup1D.cc
r470 r473 2 2 3 3 #include <c++_tools/classifier/DataLookup1D.h> 4 #include <iostream> 4 5 5 6 namespace theplu { … … 12 13 } 13 14 15 DataLookup1D::~DataLookup1D() 16 { 17 } 14 18 15 19 }} // of namespace classifier and namespace theplu -
branches/peters_vector/lib/classifier/DataLookup1D.h
r470 r473 30 30 /// 31 31 /// 32 ~DataLookup1D(); 33 34 /// 35 /// 36 /// 32 37 inline size_t size(void) const 33 38 { return column_vector_ ? matrix_->rows() : matrix_->columns(); } … … 36 41 /// 37 42 /// 38 inline double operator()(const size_t i) const 39 { return column_vector_ ? (*matrix_)(i,index_) : (*matrix_)(index_,i); } 43 inline double operator()(const size_t i) const 44 { assert(i<size()); 45 return column_vector_ ? (*matrix_)(i,index_) : (*matrix_)(index_,i); } 40 46 41 47 private: -
branches/peters_vector/lib/classifier/DataLookup2D.h
r470 r473 5 5 6 6 #include <vector> 7 #include <iostream> 7 8 8 9 namespace theplu { -
branches/peters_vector/lib/classifier/InputRanker.cc
r470 r473 9 9 #include <c++_tools/utility/stl_utility.h> 10 10 11 #include <functional> 12 #include <utility> 11 13 #include <vector> 12 #include <utility> 14 13 15 14 16 namespace theplu { … … 24 26 25 27 //scoring each input 26 std::vector<std::pair< size_t, double> > score;28 std::vector<std::pair<double, size_t> > score; 27 29 for (size_t i=0; i<nof_genes; i++) { 28 double area = score_object.score(target,DataLookup1D(data,i,false)); 29 std::pair<size_t, double> tmp(i,area); 30 DataLookup1D vector_value(data,i,false); 31 double area = score_object.score(target,vector_value); 32 //double area = score_object.score(target,DataLookup1D(data,i,false)); 33 std::pair<double, size_t> tmp(area,i); 30 34 score.push_back(tmp); 31 35 } 32 36 33 37 //sort the scores and assign id_ and rank_ 34 sort(score.begin(), score.end(), 35 utility::pair_value_compare<size_t, double>()); 38 sort(score.begin(), score.end(), std::greater<std::pair<double,size_t> >()); 36 39 40 id_.resize(nof_genes); 41 rank_.resize(nof_genes); 37 42 for (size_t i=0; i<nof_genes; i++){ 38 id_[i]=score[ nof_genes-i-1].first;43 id_[i]=score[i].second; 39 44 rank_[id_[i]]=i; 40 45 } … … 49 54 { 50 55 size_t nof_genes = data.rows(); 51 // size_t nof_samples = data.columns();52 56 53 57 //scoring each input 54 std::vector<std::pair< size_t, double> > score;58 std::vector<std::pair<double, size_t> > score; 55 59 for (size_t i=0; i<nof_genes; i++) { 56 60 double area = score_object.score(target, DataLookup1D(data,i,false), 57 61 DataLookup1D(weight,i,false)); 58 std::pair< size_t, double> tmp(i,area);62 std::pair<double, size_t> tmp(area,i); 59 63 score.push_back(tmp); 60 64 } 65 61 66 //sort the scores and assign id_ and rank_ 62 sort(score.begin(), score.end(), 63 utility::pair_value_compare<size_t, double>()); 67 sort(score.begin(), score.end(), std::greater<std::pair<double,size_t> >()); 64 68 69 id_.resize(nof_genes); 70 rank_.resize(nof_genes); 65 71 for (size_t i=0; i<nof_genes; i++){ 66 id_[i]=score[ nof_genes-i-1].first;72 id_[i]=score[i].second; 67 73 rank_[id_[i]]=i; 68 74 } 75 69 76 } 70 77 -
branches/peters_vector/lib/classifier/MatrixLookup.cc
r470 r473 71 71 72 72 73 74 std::ostream& operator<<(std::ostream& s, const MatrixLookup& m) 75 { 76 s.setf(std::ios::dec); 77 s.precision(12); 78 for(size_t i=0, j=0; i<m.rows(); i++) 79 for (j=0; j<m.columns(); j++) { 80 s << m(i,j); 81 if (j<m.columns()-1) 82 s << "\t"; 83 else if (i<m.rows()-1) 84 s << "\n"; 85 } 86 return s; 87 } 88 89 90 73 91 }} // of namespace classifier and namespace theplu -
branches/peters_vector/lib/classifier/MatrixLookup.h
r470 r473 73 73 }; 74 74 75 /// 76 /// The output operator MatrixLookup 77 /// 78 std::ostream& operator<< (std::ostream& s, const MatrixLookup&); 79 75 80 }} // of namespace classifier and namespace theplu 76 81 -
branches/peters_vector/lib/gslapi/vector.cc
r470 r473 140 140 141 141 vector::vector(const classifier::DataLookup1D& data) 142 : view_(NULL), const_view_(NULL) 142 143 { 143 144 v_ = gsl_vector_alloc(data.size()); -
branches/peters_vector/lib/statistics/ROC.cc
r471 r473 8 8 9 9 #include <cmath> 10 #include <iostream>11 10 #include <utility> 12 11 #include <vector> … … 74 73 vec_pair_.clear(); 75 74 vec_pair_.reserve(target.size()); 76 for (unsigned int i=0; i<target.size(); i++) 77 vec_pair_.push_back(std::make_pair(target(i),value(i))); 75 for (unsigned int i=0; i<target.size(); i++){ 76 int target_tmp = target(i); 77 double value_tmp = value(i); 78 vec_pair_.push_back(std::make_pair(target_tmp,value_tmp)); 79 // vec_pair_.push_back(std::make_pair(target(i),value(i))); 80 } 78 81 std::sort(vec_pair_.begin(),vec_pair_.end(), 79 82 utility::pair_value_compare<int, double>()); 83 80 84 81 85 area_ = 0; … … 95 99 if (area_<0.5 && absolute_) 96 100 area_=1.0-area_; 97 101 98 102 return area_; 99 103 } 100 104 105 // Peter, should be possible to do this in NlogN 101 106 double ROC::score(const classifier::Target& target, 102 107 const gslapi::vector& value, -
branches/peters_vector/test/consensus_inputranker_test.cc
r470 r473 16 16 using namespace std; 17 17 18 int main( )18 int main(const int argc,const char* argv[]) 19 19 { 20 std::ostream* error; 21 if (argc>1 && argv[1]==std::string("-v")) 22 error = &std::cerr; 23 else { 24 error = new std::ofstream("/dev/null"); 25 if (argc>1) 26 std::cout << "consensus_inputranker_test -v : for printing extra information\n"; 27 } 28 *error << "testing consensus_inputranker" << std::endl; 29 bool ok = true; 30 20 31 ifstream is("data/rank_data.txt"); 21 32 theplu::gslapi::matrix data_tmp(is); … … 29 40 30 41 theplu::statistics::ROC roc; 31 theplu::classifier::CrossSplitting sampler( 3);42 theplu::classifier::CrossSplitting sampler(target,3); 32 43 theplu::classifier::ConsensusInputRanker cir(data,target,roc,sampler,30); 33 44 34 45 if (cir.id(0)!=2 || cir.id(1)!=0 || cir.id(2)!=1){ 35 cerr << "wrongid" << endl;36 return -1;46 *error << "incorrect id" << endl; 47 ok = false; 37 48 } 38 49 39 50 if (cir.rank(0)!=1 || cir.rank(1)!=2 || cir.rank(2)!=0){ 40 cerr << "wrongrank" << endl;41 return -1;51 *error << "incorrect rank" << endl; 52 ok=false; 42 53 } 43 54 … … 47 58 48 59 if (cir2.id(0)!=2 || cir2.id(1)!=0 || cir2.id(2)!=1){ 49 cerr << "wrong id" << endl;50 return -1;60 *error << "incorrect id for weighted" << endl; 61 ok=false; 51 62 } 52 63 53 64 if (cir2.rank(0)!=1 || cir2.rank(1)!=2 || cir2.rank(2)!=0){ 54 cerr << "wrong rank" << endl;55 return -1;65 *error << "incorrect rank for weighted" << endl; 66 ok=false; 56 67 } 57 68 58 69 59 return 0; 70 if (error!=&std::cerr) 71 delete error; 72 73 if(ok) 74 return 0; 75 return -1; 60 76 } -
branches/peters_vector/test/crossvalidation_test.cc
r463 r473 2 2 3 3 #include <c++_tools/classifier/CrossSplitting.h> 4 #include <c++_tools/gslapi/vector.h> 4 #include <c++_tools/classifier/Target.h> 5 //#include <c++_tools/gslapi/vector.h> 5 6 6 7 #include <cstdlib> … … 24 25 *error << "testing crosssplitting" << std::endl; 25 26 bool ok = true; 26 gslapi::vectortarget(10,1);27 classifier::Target target(10,1); 27 28 for (size_t i=0; i<5; i++) 28 29 target(i)=-1; 29 30 30 31 classifier::CrossSplitting cv(target,3); 31 32 33 32 std::vector<size_t> training_set; 34 33 std::vector<size_t> count(10); 35 36 34 training_set = cv.next(); 37 35 for (unsigned int i=0; i<training_set.size(); i++) 38 36 count[training_set[i]]++; 39 40 37 training_set = cv.next(); 41 38 for (unsigned int i=0; i<training_set.size(); i++) 42 39 count[training_set[i]]++; 43 44 40 training_set = cv.next(); 45 41 for (unsigned int i=0; i<training_set.size(); i++) 46 42 count[training_set[i]]++; 47 48 43 for (unsigned int i=0; i<10 ; i++) 49 44 ok = ok && (count[i]==2); 45 if (!ok) 46 *error << "Each sample did not occur twice in 3-fold cross-splitting" 47 << std::endl; 50 48 51 if (!ok) 52 *error << "crossvalidation failed" << std::endl; 49 classifier::CrossSplitting cv2(target,3); 50 training_set = cv2.next(); 51 if (target(training_set[0])*target(training_set[1]) == 1){ 52 *error << "Error: Two samples from same class" 53 << std::endl; 54 ok=false; 55 } 53 56 54 57 if (error!=&std::cerr) -
branches/peters_vector/test/inputranker_test.cc
r470 r473 6 6 #include <c++_tools/gslapi/matrix.h> 7 7 #include <c++_tools/classifier/MatrixLookup.h> 8 #include <c++_tools/classifier/Target.h> 8 9 9 10 #include <cstdlib> … … 28 29 29 30 std::ifstream is("data/rank_data.txt"); 30 *error << "load matrix" << std::endl;31 31 theplu::gslapi::matrix data_tmp(is); 32 *error << "copy matrix" << std::endl;33 32 theplu::classifier::MatrixLookup data(data_tmp); 34 *error << "Done" << std::endl;35 33 is.close(); 36 34 37 *error << "Load target" << std::endl;38 35 is.open("data/rank_target.txt"); 39 36 classifier::Target target(is); 40 37 is.close(); 41 *error << "Done" << std::endl;42 38 43 39 statistics::ROC roc; 44 *error << "Performing ranking" << std::endl;45 40 classifier::InputRanker ir(data,target,roc); 46 *error << "first test" << std::endl;47 41 if (ir.id(0)!=2 || ir.id(1)!=0 || ir.id(2)!=1){ 48 42 *error << "wrong id" << std::endl;
Note: See TracChangeset
for help on using the changeset viewer.