Changeset 469 for branches/peters_vector/lib
- Timestamp:
- Dec 19, 2005, 3:58:29 PM (17 years ago)
- Location:
- branches/peters_vector/lib
- Files:
-
- 38 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/peters_vector/lib/classifier/ConsensusInputRanker.cc
r464 r469 4 4 #include <c++_tools/classifier/ConsensusInputRanker.h> 5 5 6 #include <c++_tools/classifier/CrossSplitting.h> 7 #include <c++_tools/classifier/DataView.h> 8 #include <c++_tools/classifier/InputRanker.h> 9 #include <c++_tools/classifier/Target.h> 10 #include <c++_tools/statistics/utility.h> 11 #include <c++_tools/utility/stl_utility.h> 6 12 #include <c++_tools/gslapi/matrix.h> 7 #include <c++_tools/statistics/utility.h>8 #include <c++_tools/classifier/CrossSplitting.h>9 #include <c++_tools/classifier/InputRanker.h>10 #include <c++_tools/utility/stl_utility.h>11 13 12 14 #include <utility> … … 16 18 namespace classifier { 17 19 18 ConsensusInputRanker::ConsensusInputRanker(const gslapi::matrix& data, 19 const gslapi::vector& target, 20 statistics::Score& score_object, 21 const size_t rounds, 22 const size_t k) 23 :id_(std::vector<size_t>()), input_rankers_(std::vector<InputRanker>()), 24 k_(k), nof_rankers_(k*rounds), rank_(std::vector<size_t>()) 20 ConsensusInputRanker::ConsensusInputRanker 21 (const DataView& data, const Target& target, 22 statistics::Score& score_object, CrossSplitting& sampler, 23 const size_t n) 24 : nof_rankers_(n) 25 25 26 26 { 27 CrossSplitting cv(target, k_);28 for (size_t i=0; i<nof_rankers_; i++){29 //InputRanker ir(data, target, score_object);30 input_rankers_.push_back(InputRanker(data, target, score_object,31 cv.next()));27 for (size_t i=0; i<n; i++){ 28 std::vector<size_t> index=sampler.next(); 29 input_rankers_.push_back(InputRanker(DataView(data,index), 30 Target(target,index), 31 score_object) ); 32 32 } 33 33 // Sorting with respect to median rank … … 54 54 } 55 55 56 ConsensusInputRanker::ConsensusInputRanker(const gslapi::matrix& data, 57 const gslapi::vector& target, 58 const gslapi::matrix& weight, 59 statistics::Score& score_object, 60 const size_t rounds, 61 const size_t k) 62 :id_(std::vector<size_t>()), input_rankers_(std::vector<InputRanker>()), 63 k_(k), nof_rankers_(k*rounds), rank_(std::vector<size_t>()) 56 ConsensusInputRanker::ConsensusInputRanker 57 (const DataView& data, const Target& target, const DataView& weight, 58 statistics::Score& score_object, CrossSplitting& sampler,const size_t n) 59 : nof_rankers_(n) 60 { 64 61 65 { 66 CrossSplitting cv(target, k_); 67 for (size_t i=0; i<nof_rankers_; i++) 68 input_rankers_.push_back(InputRanker(data,target,weight,score_object, 69 cv.next())); 62 for (size_t i=0; i<n; i++){ 63 std::vector<size_t> index=sampler.next(); 64 input_rankers_.push_back(InputRanker(DataView(data,index), 65 Target(target,index), 66 score_object, 67 DataView(weight,index))); 68 } 70 69 71 70 // Sorting with respect to median rank -
branches/peters_vector/lib/classifier/ConsensusInputRanker.h
r451 r469 7 7 8 8 namespace theplu { 9 class gslapi::matrix;10 class gslapi::vector;11 9 class statistics::Score; 12 10 namespace classifier { 11 12 class CrossSplitting; 13 class DataView; 14 class Target; 13 15 14 16 /// … … 26 28 /// manner, and in total there is \a n times \a k ranklists. 27 29 /// 28 ConsensusInputRanker(const gslapi::matrix& data, 29 const gslapi::vector& target, 30 statistics::Score&, 31 const size_t n = 1 , 30 ConsensusInputRanker(const DataView& data, 31 const Target& target, 32 statistics::Score&, CrossSplitting&, 32 33 const size_t k = 3 ); 33 34 … … 37 38 /// manner, and in total there is \a n times \a k ranklists. 38 39 /// 39 ConsensusInputRanker(const gslapi::matrix& data,40 const gslapi::vector& target,41 const gslapi::matrix& weight,42 statistics::Score&, 43 const size_t n = 1,44 const size_t k = 3);40 ConsensusInputRanker(const DataView& data, 41 const Target& target, 42 const DataView& weight, 43 statistics::Score&, 44 CrossSplitting&, 45 const size_t n = 1 ); 45 46 46 47 /// … … 60 61 std::vector<size_t> id_; 61 62 std::vector<InputRanker> input_rankers_; 62 size_t k_; 63 size_t nof_rankers_; 63 u_int nof_rankers_; 64 64 std::vector<size_t> rank_; 65 65 }; -
branches/peters_vector/lib/classifier/DataView.cc
r455 r469 15 15 } 16 16 17 18 17 19 DataView::DataView(const gslapi::matrix& data, 18 20 const std::vector<size_t>& row, … … 22 24 } 23 25 26 27 28 DataView::DataView(const DataView& data, 29 const std::vector<size_t>& row, 30 const std::vector<size_t>& col) 31 : MatrixView(row,col), data_(data.data_) 32 { 33 } 34 35 36 24 37 DataView::DataView(const gslapi::matrix& data, 25 const std::vector<size_t>& index, bool row) 38 const std::vector<size_t>& index, 39 const bool row) 26 40 : MatrixView(index, row), data_(&data) 27 41 { 28 if(row) 42 } 43 44 45 46 DataView::DataView(const DataView& data, 47 const std::vector<size_t>& index, bool row) 48 : MatrixView(index, row), data_(data.data_) 49 { 50 if(row){ 51 row_index_=index; 52 column_index_.reserve((*data_).columns()); 29 53 for(size_t i=0;i<(*data_).columns();i++) 30 54 column_index_.push_back(i); 31 else 55 } 56 else{ 57 column_index_=index; 58 row_index_.reserve((*data_).rows()); 32 59 for(size_t i=0;i<(*data_).rows();i++) 33 60 row_index_.push_back(i); 61 } 34 62 35 63 } 36 64 65 66 37 67 DataView::DataView(const DataView& dv) 38 68 : MatrixView(dv), data_(dv.data_) -
branches/peters_vector/lib/classifier/DataView.h
r455 r469 10 10 namespace theplu { 11 11 namespace classifier { 12 13 12 14 13 15 /// … … 45 47 DataView(const DataView&); 46 48 49 /// 50 /// Constructor taking the row index vector and column index vector 51 /// as input. 52 /// 53 DataView(const DataView&, const std::vector<size_t>&, 54 const std::vector<size_t>&); 55 56 /// 57 /// Constructor taking the column (default) or row index vector 58 /// as input. 59 /// 60 DataView(const DataView&, const std::vector<size_t>&, const bool row=false); 61 47 62 /// 48 63 /// Destructor -
branches/peters_vector/lib/classifier/InputRanker.cc
r451 r469 3 3 #include <c++_tools/classifier/InputRanker.h> 4 4 5 #include <c++_tools/gslapi/matrix.h> 6 #include <c++_tools/gslapi/vector.h> 5 #include <c++_tools/classifier/DataView.h> 6 #include <c++_tools/classifier/VectorView.h> 7 #include <c++_tools/classifier/Target.h> 7 8 #include <c++_tools/statistics/ROC.h> 8 9 #include <c++_tools/utility/stl_utility.h> … … 15 16 16 17 17 InputRanker::InputRanker(const gslapi::matrix& data, 18 const gslapi::vector& target, 19 statistics::Score& score_object, 20 const std::vector<size_t>& train_set) 21 :train_set_(train_set), 22 id_(std::vector<size_t>(data.rows())), 23 rank_(std::vector<size_t>(data.rows())) 24 18 InputRanker::InputRanker(const DataView& data, 19 const Target& target, 20 statistics::Score& score_object) 25 21 { 26 22 size_t nof_genes = data.rows(); … … 35 31 std::vector<std::pair<size_t, double> > score; 36 32 for (size_t i=0; i<nof_genes; i++) { 37 double area = score_object.score(target, gslapi::vector(data,i),train_set_);33 double area = score_object.score(target,VectorView(data,i,false)); 38 34 std::pair<size_t, double> tmp(i,area); 39 35 score.push_back(tmp); … … 52 48 53 49 54 InputRanker::InputRanker(const gslapi::matrix& data, 55 const gslapi::vector& target, 56 const gslapi::matrix& weight, 50 InputRanker::InputRanker(const DataView& data, 51 const Target& target, 57 52 statistics::Score& score_object, 58 const std::vector<size_t>& train_set) 59 :train_set_(train_set), 60 id_(std::vector<size_t>(data.rows())), 61 rank_(std::vector<size_t>(data.rows())) 62 53 const DataView& weight) 63 54 { 64 55 size_t nof_genes = data.rows(); … … 72 63 std::vector<std::pair<size_t, double> > score; 73 64 for (size_t i=0; i<nof_genes; i++) { 74 double area = score_object.score(target, gslapi::vector(data,i),75 gslapi::vector(weight,i), train_set_);65 double area = score_object.score(target, VectorView(data,i,false), 66 VectorView(weight,i,false)); 76 67 std::pair<size_t, double> tmp(i,area); 77 68 score.push_back(tmp); -
branches/peters_vector/lib/classifier/InputRanker.h
r451 r469 15 15 } 16 16 namespace classifier { 17 18 class DataView; 19 class Target; 20 17 21 /// 18 22 /// Class for ranking rows in a matrix, using a Score and a … … 24 28 public: 25 29 /// 26 /// Default constructor.27 ///28 inline InputRanker(void) : train_set_(std::vector<size_t>()),29 id_(std::vector<size_t>()),30 rank_(std::vector<size_t>()) {}31 32 ///33 30 /// Constructor taking data, target, a Score 34 31 /// object and vector defining what samples to use (default is to 35 32 /// use all samples) 36 33 /// 37 InputRanker(const gslapi::matrix&, 38 const gslapi::vector&, 39 statistics::Score&, 40 const std::vector<size_t>& = std::vector<size_t>()); 34 InputRanker(const DataView&, const Target&, statistics::Score&); 35 41 36 42 37 /// … … 45 40 /// use all samples) 46 41 /// 47 InputRanker(const gslapi::matrix&, 48 const gslapi::vector&, 49 const gslapi::matrix&, 50 statistics::Score&, 51 const std::vector<size_t>& = std::vector<size_t>()); 42 InputRanker(const DataView&, const Target&, statistics::Score&, 43 const DataView&); 44 52 45 53 46 /// -
branches/peters_vector/lib/classifier/Kernel_MEV.h
r451 r469 6 6 #include <c++_tools/classifier/Kernel.h> 7 7 #include <c++_tools/classifier/KernelFunction.h> 8 #include <c++_tools/gslapi/vector.h> 8 9 #include <c++_tools/gslapi/matrix.h> 9 10 -
branches/peters_vector/lib/classifier/Makefile.am
r467 r469 21 21 PolynomialKernelFunction.cc \ 22 22 SVM.cc \ 23 Target.cc \ 23 24 VectorView.cc 25 24 26 25 27 include_classifierdir = $(includedir)/c++_tools/classifier … … 40 42 PolynomialKernelFunction.h \ 41 43 SVM.h \ 42 VectorAbstract.h \44 Target.h \ 43 45 VectorView.h -
branches/peters_vector/lib/classifier/MatrixView.cc
r455 r469 9 9 10 10 11 MatrixView::MatrixView(const MatrixView& m, 12 const std::vector<size_t>& row, 13 const std::vector<size_t>& col) 14 { 15 for (size_t i=0; i<row.size(); i++) 16 row_index_[i] = m.row_index_[row[i]]; 17 for (size_t i=0; i<col.size(); i++) 18 column_index_[i] = m.column_index_[col[i]]; 19 } 20 21 22 23 MatrixView::MatrixView(const MatrixView& m, 24 const std::vector<size_t>& index, 25 const bool row) 26 { 27 if (row){ 28 row_index_=index; 29 column_index_= m.column_index_; 30 } 31 else{ 32 column_index_=index; 33 row_index_= m.row_index_; 34 } 35 } 36 37 11 38 MatrixView::MatrixView(const std::vector<size_t>& row, 12 39 const std::vector<size_t>& col) … … 15 42 } 16 43 44 45 17 46 MatrixView::MatrixView(const std::vector<size_t>& index, bool row) 18 47 { 19 if(row)20 row_index_=index;21 else22 column_index_=index;23 48 } 24 49 -
branches/peters_vector/lib/classifier/MatrixView.h
r455 r469 37 37 38 38 /// 39 /// 40 /// 41 MatrixView(const MatrixView&, const std::vector<size_t>& row, 42 const std::vector<size_t>& col); 43 44 /// 45 /// 46 /// 47 MatrixView(const MatrixView&, const std::vector<size_t>& index, 48 const bool row); 49 50 51 /// 39 52 /// Destructor 40 53 /// 41 54 virtual ~MatrixView() {}; 42 55 56 /// 57 /// @return number of columns 58 /// 43 59 inline size_t columns(void) const { return column_index_.size(); } 44 60 61 /// 62 /// @return number of rows 63 /// 45 64 inline size_t rows(void) const { return row_index_.size(); } 46 65 66 /// 67 /// @return 68 /// 47 69 virtual double operator()(const size_t row, const size_t column) const=0; 48 70 -
branches/peters_vector/lib/classifier/NCC.cc
r456 r469 4 4 5 5 #include <c++_tools/classifier/DataView.h> 6 #include <c++_tools/classifier/Target.h> 6 7 #include <c++_tools/gslapi/vector.h> 7 8 #include <c++_tools/statistics/Averager.h> 9 #include <c++_tools/statistics/AveragerPairWeighted.h> 8 10 #include <c++_tools/utility/stl_utility.h> 9 11 … … 16 18 namespace classifier { 17 19 18 NCC::NCC(const DataView& data, const gslapi::vector& target)20 NCC::NCC(const DataView& data, const Target& target) 19 21 { 20 gslapi::vectorsorted_target(target);22 Target sorted_target(target); 21 23 sorted_target.sort(); // if targets contain NaN => infinite loop 22 24 … … 51 53 { 52 54 prediction=gslapi::vector(classes_.size()); 53 std::vector<size_t> use;55 gslapi::vector w(input.size(),0); 54 56 for(size_t i=0; i<input.size(); i++) // take care of missing values 55 57 if(!std::isnan(input(i))) 56 use.push_back(i); 57 for(size_t j=0; j<centroids_.columns(); j++) 58 prediction(j)=score.score(input,gslapi::vector(centroids_,j,false),use); 58 w(i)=1.0; 59 statistics::AveragerPairWeighted ap; 60 for(size_t j=0; j<centroids_.columns(); j++) { 61 ap.add(input,gslapi::vector(centroids_,j,false),w, w); 62 prediction(j)=ap.correlation(); 63 ap.reset(); 64 } 59 65 } 60 66 -
branches/peters_vector/lib/classifier/NCC.h
r456 r469 4 4 #define _theplu_classifier_ncc_ 5 5 6 #include <c++_tools/classifier/DataView.h>7 6 #include <c++_tools/gslapi/matrix.h> 8 #include <c++_tools/statistics/Score.h>9 7 10 8 #include <map> 11 9 12 10 namespace theplu { 11 namespace gslapi { 12 class vector; 13 } 14 namespace statistics { 15 class Score; 16 } 17 13 18 namespace classifier { 14 19 20 class DataView; 15 21 class Score; 22 class Target; 23 class VectorView; 16 24 17 25 /// … … 32 40 /// input. Performs the training of the NCC. 33 41 /// 34 NCC(const DataView&, const gslapi::vector&);42 NCC(const DataView&, const Target&); 35 43 36 44 /// -
branches/peters_vector/lib/classifier/PolynomialKernelFunction.h
r451 r469 11 11 12 12 namespace theplu { 13 14 namespace gslapi {15 class vector;16 }17 18 13 namespace classifier { 19 14 -
branches/peters_vector/lib/classifier/SVM.cc
r463 r469 137 137 } 138 138 139 SVM::SVM(const KernelView& kernel, const gslapi::vector& target)139 SVM::SVM(const KernelView& kernel, const Target& target) 140 140 : alpha_(target.size(),0), 141 141 bias_(0), … … 149 149 tolerance_(0.00000001) 150 150 { 151 if (max_epochs_>ULONG_MAX)152 max_epochs_=ULONG_MAX;153 151 } 154 152 -
branches/peters_vector/lib/classifier/SVM.h
r461 r469 5 5 6 6 #include <c++_tools/classifier/KernelView.h> 7 #include <c++_tools/classifier/Target.h> 7 8 #include <c++_tools/gslapi/vector.h> 8 9 … … 106 107 /// the SVM is no longer defined. 107 108 /// 108 SVM(const KernelView&, const gslapi::vector&);109 SVM(const KernelView&, const Target&); 109 110 110 111 /// … … 209 210 gslapi::vector output_; 210 211 Index sample_; 211 const gslapi::vector& target_;212 const Target& target_; 212 213 bool trained_; 213 214 double tolerance_; -
branches/peters_vector/lib/classifier/VectorAbstract.h
r467 r469 35 35 36 36 37 37 38 }; 38 39 -
branches/peters_vector/lib/classifier/VectorView.cc
r467 r469 7 7 namespace classifier { 8 8 9 VectorView::VectorView(const gslapi::vector& vec) 10 : VectorAbstract(), vector_(&vec) 11 { 12 for(size_t i=0;i<vec.size();i++) 13 index_.push_back(i); 14 } 15 16 VectorView::VectorView(const gslapi::vector& vec, 17 const std::vector<size_t>& index) 18 : VectorAbstract(), index_(index), vector_(&vec) 9 VectorView::VectorView(const MatrixView& m, const size_t i, bool row=false) 10 : VectorAbstract(), column_vector_(row), index_(i), matrix_(&m) 19 11 { 20 12 } -
branches/peters_vector/lib/classifier/VectorView.h
r467 r469 5 5 6 6 #include <c++_tools/classifier/VectorAbstract.h> 7 #include <c++_tools/gslapi/vector.h>8 7 9 8 #include <vector> … … 11 10 namespace theplu { 12 11 namespace classifier { 12 13 class MatrixView; 13 14 14 15 /// … … 21 22 public: 22 23 23 24 24 /// 25 25 /// Constructor 26 26 /// 27 VectorView(const gslapi::vector&); 28 29 /// 30 /// Constructor taking the index vector as input. 31 /// 32 VectorView(const gslapi::vector&, const std::vector<size_t>&); 27 VectorView(const MatrixView&, const size_t, const bool row); 33 28 34 29 /// 35 /// Destructor36 30 /// 37 ~VectorView() {}; 31 /// 32 size_t size(void) const; 38 33 39 inline size_t size(void) const { return index_.size(); }40 41 inline const double& operator()(const size_t i) const42 { return (*vector_)(index_[i]); }34 /// 35 /// 36 /// 37 const double& operator()(const size_t) const; 43 38 44 39 private: 45 std::vector<size_t> index_; 46 const gslapi::vector* vector_; 40 VectorView(); 41 42 const bool column_vector_; 43 const size_t index_; 44 const MatrixView* matrix_; 45 47 46 }; 48 47 -
branches/peters_vector/lib/gslapi/vector.cc
r439 r469 178 178 179 179 180 double vector::sum(void) const180 double VectorAbstract::sum(void) const 181 181 { 182 182 double sum = 0; 183 183 for (size_t i=0; i<size(); i++) 184 sum += gsl_vector_get( v_, i);184 sum += (*this)(i); 185 185 return( sum ); 186 186 } 187 188 189 190 double vector::operator*( const vector &other ) const191 {192 // Jari, check for gsl_support193 double res = 0.0;;194 for ( size_t i = 0; i < size(); ++i )195 res += other[i] * (*this)[i];196 return res;197 }198 199 200 187 201 188 bool vector::operator==(const vector& a) const … … 207 194 return false; 208 195 return true; 196 } 197 198 199 200 double VectorAbstract::operator*( const VectorAbstract &other ) const 201 { 202 double res = 0.0;; 203 for ( size_t i = 0; i < size(); ++i ) 204 res += other(i) * (*this)(i); 205 return res; 209 206 } 210 207 -
branches/peters_vector/lib/gslapi/vector.h
r468 r469 4 4 #define _theplu_gslapi_vector_ 5 5 6 #include <c++_tools/classifier/VectorAbstract.h>7 6 #include <c++_tools/utility/Exception.h> 8 7 … … 40 39 /// community. 41 40 /// 42 class vector : public classifier::VectorAbstract41 class vector 43 42 { 44 43 public: … … 337 336 /// @return The sum. 338 337 /// 339 // Jari, doxygen group as "Extends GSL".340 338 double sum(void) const; 341 339 … … 390 388 inline const double& 391 389 operator[](size_t i) const { return *gsl_vector_const_ptr(v_,i); } 392 393 ///394 /// @return The dot product.395 ///396 double operator*( const vector &other ) const;397 390 398 391 /// … … 420 413 421 414 /// 415 /// @return The dot product. 416 /// 417 double operator*( const vector &other ) const; 418 419 /// 422 420 /// Addition and assign operator. 423 421 /// -
branches/peters_vector/lib/statistics/AveragerPair.h
r427 r469 11 11 12 12 namespace theplu{ 13 14 class gslapi::vector;15 16 13 namespace statistics{ 17 14 /// -
branches/peters_vector/lib/statistics/AveragerWeighted.h
r420 r469 88 88 89 89 /// 90 /// The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2 91 /// }{\sum w_i} \f$. 92 /// 93 /// @return The variance. 94 /// 95 inline double variance(void) const 96 { return sum_xx_centered()/sum_w(); } 97 98 /// 90 99 /// The variance is calculated as \f$ \frac{\sum w_i}{\left(\sum 91 100 /// w_i\right)^2 - \sum w_i^2} \sum w_i (x_i - m)^2 \f$. This … … 94 103 /// @return The variance. 95 104 /// 96 inline double variance (void) const105 inline double variance_unbiased(void) const 97 106 { return sum_ww() / (sum_w()*sum_w()-sum_ww()) * sum_xx_centered(); } 98 107 -
branches/peters_vector/lib/statistics/Fisher.cc
r449 r469 4 4 #include <c++_tools/statistics/Score.h> 5 5 #include <c++_tools/statistics/utility.h> 6 #include <c++_tools/classifier/Target.h> 6 7 7 8 #include <gsl/gsl_cdf.h> … … 13 14 14 15 Fisher::Fisher(bool b) 15 : Score(b), a_(0), b_(0), c_(0), d_(0), cutoff_column_(0), cutoff_row_(0), 16 oddsratio_(1.0) 16 : Score(b), a_(0), b_(0), c_(0), d_(0), oddsratio_(1.0), value_cutoff_(0) 17 17 { 18 18 } … … 44 44 // If a column sum or a row sum is zero, the table is nonsense 45 45 if ((a==0 || d==0) && (c==0 || b==0)){ 46 //Peter, should thro ughexception46 //Peter, should throw exception 47 47 std::cerr << "Warning: Fisher: Table is not valid\n"; 48 48 return oddsratio_ = 1.0; … … 59 59 { 60 60 double p=1; 61 if ( a_<minimum_size_ || b_<minimum_size_ ||62 c_<minimum_size_ || d_<minimum_size_){61 if ( ( a_<minimum_size_ || b_<minimum_size_ || 62 c_<minimum_size_ || d_<minimum_size_) && !weighted_ ){ 63 63 64 64 p=p_value_exact(); … … 71 71 if (oddsratio_<0.5){ 72 72 // last term because >= not equal to !(<=) 73 return 1-p+gsl_ran_hypergeometric_pdf(a_, a_+b_, c_+d_, a_+c_); 73 u_int a = static_cast<u_int>(a_); 74 u_int b = static_cast<u_int>(b_); 75 u_int c = static_cast<u_int>(c_); 76 u_int d = static_cast<u_int>(d_); 77 return 1-p+gsl_ran_hypergeometric_pdf(a, a+b, c+d, a+c); 74 78 } 75 79 } … … 84 88 85 89 double Fisher::p_value_exact() const 86 { 90 { 91 u_int a = static_cast<u_int>(a_); 92 u_int b = static_cast<u_int>(b_); 93 u_int c = static_cast<u_int>(c_); 94 u_int d = static_cast<u_int>(d_); 95 87 96 // Since the calculation is symmetric and cdf_hypergeometric_P 88 97 // loops up to k we choose the smallest number to be k and mirror 89 98 // the matrix. This choice makes the p-value two-sided. 90 if (a_<b_ && a_<c_ && a_<d_)91 return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);92 else if (b_<a_ && b_<c_ && b_<d_)93 return statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);94 else if (c_<a_ && c_<b_ && c_<d_)95 return statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);96 99 97 return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_); 100 if (a<b && a<c && a<d) 101 return statistics::cdf_hypergeometric_P(a,a+b,c+d,a+c); 102 else if (b<a && b<c && b<d) 103 return statistics::cdf_hypergeometric_P(b,a+b,c+d,b+d); 104 else if (c<a && c<b && c<d) 105 return statistics::cdf_hypergeometric_P(c,c+d,a+b,a+c); 106 107 return statistics::cdf_hypergeometric_P(d,c+d,a+b,b+d); 98 108 99 109 } 100 110 101 111 102 double Fisher::score(const gslapi::vector& x, 103 const gslapi::vector& y, 104 const std::vector<size_t>& train_set) 112 double Fisher::score(const classifier::Target& target, 113 const classifier::VectorAbstract& value) 105 114 { 106 if (!train_set.size()){ 107 train_set_.resize(0); 108 for (size_t i=0; i<target_.size(); i++) 109 train_set_.push_back(i); 110 } 111 else 112 train_set_ = train_set; 115 weighted_=false; 113 116 a_=b_=c_=d_=0; 114 for (size_t i=0; i<t rain_set_.size(); i++)115 if ( x(train_set_[i])<cutoff_column_)116 if ( y(train_set_[i])<cutoff_row_)117 for (size_t i=0; i<target.size(); i++) 118 if (class_one(target(i))) 119 if (value(i)>value_cutoff_) 117 120 a_++; 118 121 else 119 122 c_++; 120 123 else 121 if ( y(train_set_[i])<cutoff_row_)124 if (value(i)>value_cutoff_) 122 125 b_++; 123 126 else 124 127 d_++; 125 128 126 // If a column sum or a row sum is zero, the table is non sense129 // If a column sum or a row sum is zero, the table is non-sense 127 130 if ((a_==0 || d_==0) && (c_==0 || b_==0)){ 128 131 std::cerr << "Warning: Fisher: Table is not valid\n"; 129 return 0;132 return 1; 130 133 } 131 134 … … 133 136 } 134 137 135 double Fisher::score(const gslapi::vector& x, 136 const gslapi::vector& y, 137 const gslapi::vector& w, 138 const std::vector<size_t>& train_set) 138 double Fisher::score(const classifier::Target& target, 139 const classifier::VectorAbstract& value, 140 const classifier::VectorAbstract& weighted) 139 141 { 140 if (!train_set.size()){ 141 train_set_.resize(0); 142 for (size_t i=0; i<target_.size(); i++) 143 train_set_.push_back(i); 142 weighted_=true; 143 a_=b_=c_=d_=0; 144 for (size_t i=0; i<target.size(); i++) 145 if (class_one(target(i))) 146 if (value(i)>value_cutoff_) 147 a_+=value(i); 148 else 149 c_+=value(i); 150 else 151 if (value(i)>value_cutoff_) 152 b_+=value(i); 153 else 154 d_+=value(i); 155 156 // If a column sum or a row sum is zero, the table is non-sense 157 if ((a_==0 || d_==0) && (c_==0 || b_==0)){ 158 std::cerr << "Warning: Fisher: Table is not valid\n"; 159 return 1; 144 160 } 145 else146 train_set_ = train_set;147 148 double a=0;149 double b=0;150 double c=0;151 double d=0;152 153 for (size_t i=0; i<train_set_.size(); i++)154 if (x(train_set_[i]) < cutoff_column_)155 if (y(train_set_[i]) < cutoff_row_)156 a+=w(train_set_[i]);157 else158 c+=w(train_set_[i]);159 else160 if (y(train_set_[i]) < cutoff_column_)161 b+=w(train_set_[i]);162 else163 d+=w(train_set_[i]);164 161 165 162 return oddsratio(a_,b_,c_,d_); 166 163 } 167 164 168 165 double Fisher::score(const u_int a, const u_int b, 169 166 const u_int c, const u_int d) -
branches/peters_vector/lib/statistics/Fisher.h
r449 r469 64 64 /// 65 65 /// Cutoff sets the limit whether a value should go into the left 66 /// or the right column. @see score67 ///68 /// @return reference to cutoff for column69 ///70 inline double& cutoff_column(void) { return cutoff_column_; }71 72 ///73 /// Cutoff sets the limit whether a value should go into the left74 66 /// or the right row. @see score 75 67 /// 76 68 /// @return reference to cutoff for row 77 69 /// 78 inline double& cutoff_row(void) { return cutoff_row_; }70 inline double& value_cutoff(void) { return value_cutoff_; } 79 71 80 72 /// … … 99 91 /// @return p-value 100 92 /// 93 /// @note in weighted case, approximation Chi2 is always used. 94 /// 101 95 double p_value() const; 102 96 … … 104 98 /// Function calculating score from 2x2 table for which the 105 99 /// elements are calculated as follows \n 106 /// a: #data \f$ x=1 \f$ AND \f$ y=1 \f$ \n 107 /// b: #data \f$ x=-1 \f$ AND \f$ y=1 \f$ \n 108 /// c: #data \f$ x=1 \f$ AND \f$ y=-1 \f$ \n 109 /// d: #data \f$ x=-1 \f$ AND \f$ y=1 \f$ \n 100 /// target=positive_label (a,c) 101 /// \f$ value > \f$ value_cutoff() a,b \n 110 102 /// 111 103 /// @return odds ratio. If absolute_ is true and odds ratio is 112 104 /// less than unity 1 divided by odds ratio is returned 113 105 /// 114 double score(const gslapi::vector& x, const gslapi::vector& y, 115 const std::vector<size_t>& = std::vector<size_t>());106 double score(const classifier::Target& target, 107 const classifier::VectorAbstract& value); 116 108 117 109 /// … … 122 114 /// @return odds ratio 123 115 /// 124 /// @ note116 /// @see score 125 117 /// 126 double score(const gslapi::vector& x, const gslapi::vector& y, 127 const gslapi::vector& w,128 const std::vector<size_t>& = std::vector<size_t>());118 double score(const classifier::Target& target, 119 const classifier::VectorAbstract& value, 120 const classifier::VectorAbstract& weight); 129 121 130 122 /// … … 148 140 double p_value_exact(void) const; 149 141 150 std::vector<size_t> train_set_; 151 gslapi::vector weight_; 152 u_int a_; 153 u_int b_; 154 u_int c_; 155 u_int d_; 156 double cutoff_column_; 157 double cutoff_row_; 142 double a_; 143 double b_; 144 double c_; 145 double d_; 158 146 u_int minimum_size_; 159 147 double oddsratio_; 148 double value_cutoff_; 160 149 }; 161 150 -
branches/peters_vector/lib/statistics/FoldChange.cc
r414 r469 1 1 // $Id$ 2 2 3 #include "FoldChange.h"4 5 #include "Score.h"6 #include "Averager.h"7 #include "AveragerWeighted.h"3 #include <c++_tools/statistics/FoldChange.h> 4 #include <c++_tools/statistics/Score.h> 5 #include <c++_tools/statistics/Averager.h> 6 #include <c++_tools/statistics/AveragerWeighted.h> 7 #include <c++_tools/classifier/Target.h> 8 8 9 9 namespace theplu { … … 31 31 } 32 32 33 double FoldChange::score(const gslapi::vector& target, 34 const gslapi::vector& value, 35 const std::vector<size_t>& train_set) 33 double FoldChange::score(const classifier::Target& target, 34 const classifier::VectorAbstract& value) 36 35 { 37 if (!train_set_.size())38 for (size_t i=0; i<target_.size(); i++)39 train_set_.push_back(i);40 else41 train_set_=train_set;42 43 36 weighted_=false; 44 37 Averager pos; 45 38 Averager neg; 46 39 47 for (size_t i=0; i< train_set.size(); i++)48 if ( target(train_set[i])==1)49 pos.add(value( train_set[i]));40 for (size_t i=0; i<value.size(); i++) 41 if (class_one(target(i))) 42 pos.add(value(i)); 50 43 else 51 neg.add(value( train_set[i]));44 neg.add(value(i)); 52 45 53 46 if (absolute_) … … 56 49 } 57 50 58 double FoldChange::score(const gslapi::vector& target, 59 const gslapi::vector& value, 60 const gslapi::vector& weight, 61 const std::vector<size_t>& train_set) 51 double FoldChange::score(const classifier::Target& target, 52 const classifier::VectorAbstract& value, 53 const classifier::VectorAbstract& weight) 62 54 { 63 if (!train_set_.size())64 for (size_t i=0; i<target_.size(); i++)65 train_set_.push_back(i);66 else67 train_set_=train_set;68 69 55 weighted_=true; 70 56 AveragerWeighted pos; 71 57 AveragerWeighted neg; 72 58 73 for (size_t i=0; i< train_set.size(); i++)74 if ( target(train_set[i])==1)75 pos.add(value( train_set[i]),weight(train_set[i]));59 for (size_t i=0; i<value.size(); i++) 60 if (class_one(target(i))) 61 pos.add(value(i),weight(i)); 76 62 else 77 neg.add(value( train_set[i]),weight(train_set[i]));63 neg.add(value(i),weight(i)); 78 64 79 65 if (absolute_) -
branches/peters_vector/lib/statistics/FoldChange.h
r414 r469 7 7 8 8 namespace theplu { 9 10 class classifier::VectorAbstract; 11 12 9 13 namespace statistics { 10 14 … … 28 32 /// @param target is +1 or -1 29 33 /// @param value vector of the values 30 /// @train_set defining which values to use (number of values used31 /// in the calculation is equal to size of \a train_set)32 34 /// 33 double 34 score(const gslapi::vector& target, 35 const gslapi::vector& value, 36 const std::vector<size_t>& train_set = std::vector<size_t>()); 35 double score(const classifier::Target& target, 36 const classifier::VectorAbstract& value); 37 37 38 38 /// … … 45 45 /// in the calculation is equal to size of \a train_set) 46 46 /// 47 double 48 score(const gslapi::vector& target, 49 const gslapi::vector& value, 50 const gslapi::vector& weight, 51 const std::vector<size_t>& train_set = std::vector<size_t>()); 47 double score(const classifier::Target& target, 48 const classifier::VectorAbstract& value, 49 const classifier::VectorAbstract& weight); 52 50 53 51 private: -
branches/peters_vector/lib/statistics/Makefile.am
r461 r469 8 8 noinst_LTLIBRARIES = libstatistics.la 9 9 libstatistics_la_SOURCES = \ 10 Averager.cc AveragerPair.cc AveragerWeighted.cc Fisher.cc FoldChange.cc \ 10 Averager.cc AveragerPair.cc AveragerWeighted.cc \ 11 AveragerPairWeighted.cc \ 12 Fisher.cc FoldChange.cc \ 11 13 Histogram.cc Kernel.cc KernelBox.cc KernelTriCube.cc Linear.cc \ 12 14 LinearWeighted.cc Local.cc \ … … 18 20 19 21 include_statistics_HEADERS = \ 20 Averager.h AveragerPair.h AveragerWeighted.h Fisher.h FoldChange.h \ 22 Averager.h AveragerPair.h AveragerWeighted.h \ 23 AveragerPairWeighted.h \ 24 Fisher.h FoldChange.h \ 21 25 Histogram.h Kernel.h KernelBox.h KernelTriCube.h Linear.h LinearWeighted.h \ 22 26 Local.h MultiDimensional.h Naive.h NaiveWeighted.h OneDimensional.h \ -
branches/peters_vector/lib/statistics/Pearson.cc
r295 r469 3 3 4 4 #include <c++_tools/statistics/Pearson.h> 5 6 5 #include <c++_tools/statistics/AveragerPair.h> 6 #include <c++_tools/statistics/AveragerPairWeighted.h> 7 7 #include <c++_tools/gslapi/vector.h> 8 #include <c++_tools/classifier/Target.h> 8 9 9 10 #include <cmath> … … 19 20 } 20 21 21 void Pearson::centralize(gslapi::vector& x, const gslapi::vector& w)22 {23 double m = (x*w)/w.sum();24 for (size_t i=0; i<x.size(); i++)25 x(i)=x(i)-m;26 }27 28 22 double Pearson::p_value() const 29 23 { 30 24 if(weighted_) 31 25 return 1; 32 else if(nof_samples_<3){26 if(nof_samples_<2){ 33 27 std::cerr << "Warning: Only " << nof_samples_ << "samples. " 34 28 << "Need at lest 3.\n"; 35 29 return 1; 36 30 } 37 else{ 38 double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_); 39 return 2*gsl_cdf_tdist_Q (t, nof_samples_ -2 ); 40 } 31 32 double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_); 33 double p = gsl_cdf_tdist_Q(t, nof_samples_ -2 ); 34 if (absolute_) 35 return 2*p; 36 if (r_<0) 37 return 1-p; 38 return p; 39 41 40 } 42 41 43 double Pearson::score(const gslapi::vector& vec1, const gslapi::vector& vec2,44 const std::vector<size_t>& train_set)42 double Pearson::score(const classifier::Target& target, 43 const classifier::VectorAbstract& value) 45 44 { 46 45 weighted_=false; 47 46 AveragerPair ap; 48 if (!train_set.size()){ 49 ap = AveragerPair(vec1.sum(),vec1*vec1,vec2.sum(),vec2*vec2, 50 vec1*vec2,vec1.size()); 51 nof_samples_ = vec1.size(); 52 } 53 else{ 54 for (size_t i=0; i<train_set.size(); i++){ 55 ap.add(vec1(train_set[i]), vec2(train_set[i])); 56 } 57 nof_samples_ = train_set.size(); 47 for (size_t i=0; i<target.size(); i++){ 48 if (class_one(target(i))) 49 ap.add(1, value(i)); 50 else 51 ap.add(-1, value(i)); 52 nof_samples_ = target.size(); 58 53 } 59 54 r_ = ap.correlation(); 60 55 if (r_<0 && absolute_) 61 r _=-r_;56 return -r_; 62 57 63 58 return r_; 64 59 } 65 60 66 double Pearson::score(const gslapi::vector& vec1, const gslapi::vector& vec2, 67 const gslapi::vector& w1, const gslapi::vector& w2, 68 const std::vector<size_t>& train_set = 69 std::vector<size_t>()) 61 double Pearson::score(const classifier::Target& target, 62 const classifier::VectorAbstract& value, 63 const classifier::VectorAbstract& weight) 70 64 { 71 65 weighted_=true; 72 gslapi::vector x; 73 gslapi::vector y; 74 gslapi::vector wx; 75 gslapi::vector wy; 76 if (!train_set.size()){ 77 x = vec1; 78 y = vec2; 79 wx = w1; 80 wy = w2; 81 nof_samples_ = vec1.size(); 66 AveragerPairWeighted ap; 67 for (size_t i=0; i<target.size(); i++){ 68 if (class_one(target(i))) 69 ap.add(1, value(i),1,weight(i)); 70 else 71 ap.add(-1, value(i),1,weight(i)); 72 nof_samples_ = target.size(); 82 73 } 83 else{ 84 nof_samples_ = train_set.size(); 85 x = gslapi::vector(nof_samples_); 86 y = gslapi::vector(nof_samples_); 87 wx = gslapi::vector(nof_samples_); 88 wy = gslapi::vector(nof_samples_); 89 for (size_t i=0; i<train_set.size(); i++){ 90 x(i)=vec1(train_set[i]); 91 y(i)=vec2(train_set[i]); 92 wx(i)=w1(train_set[i]); 93 wy(i)=w2(train_set[i]); 94 } 95 } 96 97 centralize(x,wx); 98 centralize(y,wy); 99 100 x.mul(wx); 101 y.mul(wy); 102 103 r_ = ( (x*y) / sqrt( (x*x)*(y*y)) ); 104 weighted_=true; 105 74 r_ = ap.correlation(); 106 75 if (r_<0 && absolute_) 107 r _=-r_;76 return -r_; 108 77 109 78 return r_; -
branches/peters_vector/lib/statistics/Pearson.h
r414 r469 7 7 8 8 namespace theplu { 9 class gslapi::vector; 9 namespace classifier { 10 class VectorAbstract; 11 } 10 12 11 13 namespace statistics { … … 36 38 /// of Pearson is used. 37 39 /// 38 double score(const gslapi::vector&, const gslapi::vector&, 39 const std::vector<size_t>& = std::vector<size_t>());40 double score(const classifier::Target& target, 41 const classifier::VectorAbstract& value); 40 42 41 43 /// … … 48 50 /// correlation. 49 51 /// 50 inline double score(const gslapi::vector& x, const gslapi::vector& y, 51 const gslapi::vector& w, 52 const std::vector<size_t>& train_set = 53 std::vector<size_t>()) 54 { return score(x,y,w,w,train_set); } 52 double score(const classifier::Target& target, 53 const classifier::VectorAbstract& value, 54 const classifier::VectorAbstract& weight); 55 55 56 ///57 /// \f$ \frac{\vert \sum_iw^x_iw^y_i(x_i-m_x)(y_i-m_y)\vert }58 /// {\sqrt{\sum_iw^x_iw^y_i(x_i-m_x)^2 ///59 /// \sum_iw^x_iw^y_i(y_i-m_y)^2}}\f$, where \f$m_x = \frac{\sum60 /// w^x_iw^y_ix_i}{\sum w^x_iw^y_i}\f$ and \f$m_y = \frac{\sum61 /// w_ix_i}{\sum w_i}\f$. This expression is chosen to get a62 /// correlation equal to unity when \a x and \a y are63 /// equal. @return absolute value of weighted version of Pearson64 /// correlation.65 ///66 double score(const gslapi::vector& x, const gslapi::vector& y,67 const gslapi::vector& wx, const gslapi::vector& wy,68 const std::vector<size_t>&);69 70 56 /// 71 57 /// The p-value is the probability of getting a correlation as … … 82 68 83 69 84 void centralize(gslapi::vector&, const gslapi::vector&);70 // void centralize(gslapi::vector&, const gslapi::vector&); 85 71 }; 86 72 -
branches/peters_vector/lib/statistics/ROC.cc
r303 r469 4 4 5 5 #include <c++_tools/utility/stl_utility.h> 6 #include <c++_tools/ gslapi/vector.h>6 #include <c++_tools/classifier/VectorAbstract.h> 7 7 8 8 #include <gsl/gsl_cdf.h> … … 15 15 16 16 namespace theplu { 17 class classifier::VectorAbstract; 17 18 namespace statistics { 18 19 19 20 ROC::ROC(bool b) 20 : Score(b), area_(-1), minimum_size_(10), nof_pos_(0), 21 value_(std::vector<std::pair<double, double> >()) 22 23 21 : Score(b), area_(0.5), minimum_size_(10), nof_pos_(0) 24 22 { 25 23 } … … 30 28 // Not integrating from the middle of the bin, but from the inner edge. 31 29 if (x>0) 32 x -= 0.5/nof_pos_/( value_.size()-nof_pos_);30 x -= 0.5/nof_pos_/(n()-nof_pos_); 33 31 else if(x<0) 34 x += 0.5/nof_pos_/( value_.size()-nof_pos_);32 x += 0.5/nof_pos_/(n()-nof_pos_); 35 33 36 double sigma = (std::sqrt( ( value_.size()-nof_pos_)*nof_pos_*37 ( value_.size()+1.0)/12 ) /38 ( value_.size() - nof_pos_ ) / nof_pos_ );34 double sigma = (std::sqrt( (n()-nof_pos_)*nof_pos_* 35 (n()+1.0)/12 ) / 36 ( n() - nof_pos_ ) / nof_pos_ ); 39 37 double p = gsl_cdf_gaussian_Q(x, sigma); 40 38 … … 62 60 if (weighted_) 63 61 return 1.0; 64 else if (nof_pos_ < minimum_size_ & value_.size()-nof_pos_ < minimum_size_)65 return get_p_exact(area_*nof_pos_*( value_.size()-nof_pos_),66 nof_pos_, value_.size()-nof_pos_);62 else if (nof_pos_ < minimum_size_ & n()-nof_pos_ < minimum_size_) 63 return get_p_exact(area_*nof_pos_*(n()-nof_pos_), 64 nof_pos_, n()-nof_pos_); 67 65 else 68 66 return get_p_approx(area_); … … 70 68 } 71 69 72 double ROC::score(const gslapi::vector& target, const gslapi::vector& data,73 const std::vector<size_t>& train_set)70 double ROC::score(const classifier::Target& target, 71 const classifier::VectorAbstract& value) 74 72 { 75 73 weighted_=false; 76 target_ = target; 77 data_ = data; 78 if (!train_set.size()){ 79 train_set_.resize(0); 80 for (size_t i=0; i<target_.size(); i++) 81 train_set_.push_back(i); 74 75 vec_pair_.clear(); 76 vec_pair_.reserve(target.size()); 77 for (unsigned int i=0; i<target.size(); i++) 78 vec_pair_.push_back(std::make_pair(target(i),value(i))); 79 std::sort(vec_pair_.begin(),vec_pair_.end(), 80 utility::pair_value_compare<int, double>()); 81 82 area_ = 0; 83 nof_pos_=0; 84 for (size_t i=0; i<n(); i++){ 85 if (class_one(target(i))){ 86 area_+=i; 87 nof_pos_++; 88 } 82 89 } 83 else84 train_set_ = train_set;85 sort();86 area_ = 0;87 for (size_t i=0; i<value_.size(); i++)88 if (value_[i].first==1)89 area_+=i;90 90 91 91 // Normalizing the area to [0,1] 92 92 area_ = ( (area_-nof_pos_*(nof_pos_-1)/2 ) / 93 (nof_pos_*( value_.size()-nof_pos_)) );93 (nof_pos_*(n()-nof_pos_)) ); 94 94 95 95 //Returning score larger 0.5 that you get by random … … 100 100 } 101 101 102 double ROC::score(const gslapi::vector& target, const gslapi::vector& data,103 const gslapi::vector& weight,104 const std::vector<size_t>& train_set)102 double ROC::score(const classifier::Target& target, 103 const classifier::VectorAbstract& value, 104 const classifier::VectorAbstract& weight) 105 105 { 106 106 weighted_=true; 107 target_ = target; 108 data_ = data; 109 weight_=weight; 110 if (!train_set.size()){ 111 train_set_.resize(0); 112 for (size_t i=0; i<target_.size(); i++) 113 train_set_.push_back(i); 114 } 115 else 116 train_set_ = train_set; 117 sort(); 107 108 vec_pair_.clear(); 109 vec_pair_.reserve(target.size()); 110 for (unsigned int i=0; i<target.size(); i++) 111 vec_pair_.push_back(std::make_pair(target(i),value(i))); 112 std::sort(vec_pair_.begin(),vec_pair_.end(), 113 utility::pair_value_compare<int, double>()); 114 118 115 area_=0; 116 nof_pos_=0; 119 117 double max_area=0; 120 //Peter, use the sort to skip some ifs and loops 121 for (size_t i=0; i< value_.size(); i++){122 if ( target_(train_set_[i])==1){123 for (size_t j=0; j< value_.size(); j++){124 if ( target_(train_set_[j])!=1){125 if ( data_(train_set_[i]) > data_(train_set_[j])){126 area_+=weight _(train_set_[i])*weight_(train_set_[j]);118 119 for (size_t i=0; i<n(); i++){ 120 if (class_one(target(i))){ 121 for (size_t j=0; j<n(); j++){ 122 if (!class_one(target(j))){ 123 if (value(i) > value(j)){ 124 area_+=weight(i)*weight(j); 127 125 } 128 max_area+=weight _(train_set_[i])*weight_(train_set_[j]);126 max_area+=weight(i)*weight(j); 129 127 } 130 128 } … … 139 137 } 140 138 141 void ROC::sort()139 int ROC::target(const size_t i) const 142 140 { 143 value_.resize(0); 144 nof_pos_=0; 145 for (unsigned int i=0; i<train_set_.size(); i++){ 146 std::pair<double, double> tmp(target_(train_set_[i]), 147 data_(train_set_[i])); 148 value_.push_back(tmp); 149 if (target_(train_set_[i])==1) 150 nof_pos_++; 151 } 152 std::sort(value_.begin(),value_.end(), 153 utility::pair_value_compare<double, double>()); 154 } 155 156 gslapi::vector ROC::target(void) const 157 { 158 gslapi::vector target(value_.size()); 159 for(size_t i=0; i<target.size(); ++i) 160 target(i) = value_[i].first; 161 return target; 141 return vec_pair_[i].first; 162 142 } 163 143 … … 168 148 double sens = 1; 169 149 double spec = 0; 170 gslapi::vector target = r.target(); 171 size_t n = target.size(); 172 double nof_pos = (target.sum()+n)/2; 150 size_t n = r.n(); 151 double nof_pos = r.n_pos(); 173 152 for(size_t i=0; i<n-1; ++i) { 174 153 s << sens << "\t"; 175 154 s << spec << "\n"; 176 if ( target(i)==1)155 if (r.class_one(r.target(i))) 177 156 spec -= 1/(n-nof_pos); 178 157 else -
branches/peters_vector/lib/statistics/ROC.h
r465 r469 4 4 #define _theplu_statistics_roc_ 5 5 6 #include <c++_tools/ gslapi/vector.h>6 #include <c++_tools/classifier/Target.h> 7 7 #include <c++_tools/statistics/Score.h> 8 8 … … 32 32 /// Function taking \a value, \a target (+1 or -1) and vector 33 33 /// defining what samples to use. The score is equivalent to 34 /// Mann-Whitney statistics. If target is equal to 1, sample 35 /// belonges to class + otherwise sample belongs to class 36 /// -. @return the area under the ROC curve. If the area is less 34 /// Mann-Whitney statistics. 35 /// @return the area under the ROC curve. If the area is less 37 36 /// than 0.5 and absolute=true, 1-area is returned. Complexity is 38 37 /// \f$ N\log N \f$ where \f$ N \f$ is number of samples. 39 38 /// 40 double score(const gslapi::vector& target, const gslapi::vector& value, 41 const std::vector<size_t>& = std::vector<size_t>());39 double score(const classifier::Target& target, 40 const classifier::VectorAbstract& value); 42 41 43 42 /// Function taking values, target, weight and a vector defining … … 52 51 /// of samples. 53 52 /// 54 double score(const gslapi::vector& target, const gslapi::vector& value, 55 const gslapi::vector& weight,56 const std::vector<size_t>& = std::vector<size_t>());53 double score(const classifier::Target& target, 54 const classifier::VectorAbstract& value, 55 const classifier::VectorAbstract& weight); 57 56 58 57 … … 71 70 /// 72 71 double p_value(void) const; 73 74 /// 75 /// @return the targets in train_set sorted with respect to the 76 /// corresponding data 77 /// 78 gslapi::vector target(void) const; 79 72 80 73 /// 81 74 /// minimum_size is the threshold for when a normal … … 86 79 inline u_int& minimum_size(void){ return minimum_size_; } 87 80 88 private: 89 double area_; 90 u_int minimum_size_; 91 u_int nof_pos_; 92 /// pair of target and data. should always be sorted with respect to 93 /// data. 94 std::vector<std::pair<double, double> > value_; 95 81 int target(const size_t i) const; 82 inline size_t n(void) const { return vec_pair_.size(); } 83 inline size_t n_pos(void) const { return nof_pos_; } 84 85 private: 86 96 87 /// Implemented as in MatLab 13.1 97 88 double get_p_approx(const double) const; … … 100 91 double get_p_exact(const double, const double, const double) const; 101 92 102 /// sorting value_, should always be done when changing train_set_ 103 void sort(void); 104 93 double area_; 94 u_int minimum_size_; 95 u_int nof_pos_; 96 std::vector<std::pair<int, double> > vec_pair_; 105 97 }; 106 98 -
branches/peters_vector/lib/statistics/Score.cc
r303 r469 7 7 8 8 Score::Score(bool absolute) 9 : absolute_(absolute), train_set_(std::vector<size_t>()), weighted_(true)9 : absolute_(absolute), positive_label_(1) 10 10 { 11 11 } -
branches/peters_vector/lib/statistics/Score.h
r428 r469 4 4 #define _theplu_statistics_score_ 5 5 6 #include <c++_tools/gslapi/vector.h> 6 namespace theplu { 7 namespace classifier { 8 class Target; 9 class VectorAbstract; 10 } 7 11 8 namespace theplu {9 12 namespace statistics { 10 13 … … 32 35 33 36 /// 37 /// Targets with this label are considered to be in positive 38 /// group. All others are considered to be in negative 39 /// group. Default is 1. 40 /// 41 /// @return label for positive class 42 /// 43 inline int& positive_label(void) { return positive_label_; } 44 45 /// 34 46 /// Function calculating the score. In absolute mode, also the 35 47 /// score using negated class labels is calculated, and the … … 41 53 /// @param target vector of targets (most often +1 -1) 42 54 /// @param value vector of the values 43 /// @train_set defining which values to use (number of values used44 /// in the calculation is equal to size of \a train_set)45 55 /// 46 56 virtual double 47 score(const gslapi::vector& target, 48 const gslapi::vector& value, 49 const std::vector<size_t>& train_set = std::vector<size_t>()) = 0; 57 score(const classifier::Target& target, 58 const classifier::VectorAbstract& value) = 0; 50 59 51 60 /// … … 65 74 /// 66 75 virtual double 67 score(const gslapi::vector& target, 68 const gslapi::vector& value, 69 const gslapi::vector& weight, 70 const std::vector<size_t>& = std::vector<size_t>()) = 0; 76 score(const classifier::Target& target, 77 const classifier::VectorAbstract& value, 78 const classifier::VectorAbstract& weight) = 0; 71 79 72 /// 73 /// @return the one-sided p-value( if absolute true is used 74 /// this is equivalent to the two-sided p-value.) 75 /// 76 /// virtual double p_value(void) const = 0; 77 78 80 inline bool class_one(int i) const { return i==positive_label_; } 79 81 80 82 protected: 83 inline bool weighted(void) const { return weighted_; } 84 81 85 bool absolute_; 82 gslapi::vector data_; 83 gslapi::vector target_; 84 std::vector<size_t> train_set_; 85 gslapi::vector weight_; 86 int positive_label_; 86 87 bool weighted_; 87 88 -
branches/peters_vector/lib/statistics/WilcoxonFoldChange.cc
r465 r469 2 2 3 3 #include <c++_tools/statistics/WilcoxonFoldChange.h> 4 #include <c++_tools/classifier/VectorAbstract.h> 4 5 #include <c++_tools/statistics/utility.h> 6 #include <c++_tools/classifier/Target.h> 5 7 6 8 #include <cmath> 9 #include <vector> 7 10 8 11 namespace theplu { … … 17 20 18 21 19 double WilcoxonFoldChange::score(const gslapi::vector& target, 20 const gslapi::vector& value, 21 const std::vector<size_t>& train_set) 22 double WilcoxonFoldChange::score(const classifier::Target& target, 23 const classifier::VectorAbstract& value) 22 24 { 23 if (!train_set_.size())24 for (size_t i=0; i<target_.size(); i++)25 train_set_.push_back(i);26 else27 train_set_=train_set;28 29 25 std::vector<double> distance; 30 26 //Peter, should reserve the vector to avoid reallocations 31 27 weighted_=false; 32 for (size_t i=0; i<t rain_set_.size(); i++) {28 for (size_t i=0; i<target.size(); i++) { 33 29 for (size_t j=0; i<j; j++) { 34 if (target( train_set_[i])==target(train_set_[j])) continue;35 distance.push_back(value( train_set_[i])-value(train_set_[j]));30 if (target(i)==target(j)) continue; 31 distance.push_back(value(i)-value(j)); 36 32 } 37 33 } … … 41 37 } 42 38 43 double WilcoxonFoldChange::score(const gslapi::vector& target, 44 const gslapi::vector& value, 45 const gslapi::vector& weight, 46 const std::vector<size_t>& train_set) 39 double WilcoxonFoldChange::score(const classifier::Target& target, 40 const classifier::VectorAbstract& value, 41 const classifier::VectorAbstract& weight) 47 42 { 48 43 return 0; -
branches/peters_vector/lib/statistics/WilcoxonFoldChange.h
r465 r469 29 29 /// in the calculation is equal to size of \a train_set) 30 30 /// 31 double 32 score(const gslapi::vector& target, 33 const gslapi::vector& value, 34 const std::vector<size_t>& train_set = std::vector<size_t>()); 31 double score(const classifier::Target& target, 32 const classifier::VectorAbstract& value); 35 33 36 /// @todo34 /// 37 35 /// @return difference of the weighted means of the two classes 38 36 /// 39 /// @param target is +1 or -140 37 /// @param value vector of the values 41 38 /// @param weight vector of accompanied weight to the values … … 45 42 /// @note not implemented 46 43 /// 47 double 48 score(const gslapi::vector& target, 49 const gslapi::vector& value, 50 const gslapi::vector& weight, 51 const std::vector<size_t>& train_set = std::vector<size_t>()); 44 double score(const classifier::Target& target, 45 const classifier::VectorAbstract& value, 46 const classifier::VectorAbstract& weight); 52 47 53 48 private: -
branches/peters_vector/lib/statistics/tScore.cc
r414 r469 7 7 #include <c++_tools/statistics/tScore.h> 8 8 #include <c++_tools/statistics/Averager.h> 9 #include <c++_tools/gslapi/vector.h>10 9 #include <c++_tools/statistics/AveragerWeighted.h> 10 #include <c++_tools/classifier/Target.h> 11 #include <c++_tools/classifier/VectorAbstract.h> 11 12 12 13 namespace theplu { … … 14 15 15 16 tScore::tScore(bool b) 16 : Score(b), t_(0) , train_set_(), weight_()17 : Score(b), t_(0) 17 18 { 18 19 } 19 20 20 double tScore::score(const gslapi::vector& target, 21 const gslapi::vector& data, 22 const std::vector<size_t>& train_set) 21 double tScore::score(const classifier::Target& target, 22 const classifier::VectorAbstract& value) 23 23 { 24 24 weighted_=false; 25 if (!train_set_.size())26 for (size_t i=0; i<target_.size(); i++)27 train_set_.push_back(i);28 else29 train_set_=train_set;30 target_ = target;31 data_ = data;32 weight_ = gslapi::vector(target.size(),1);33 25 statistics::Averager positive; 34 26 statistics::Averager negative; 35 for(size_t i=0; i<train_set_.size(); i++){ 36 if (target_[train_set_[i]]==1) 37 positive.add(data_[train_set_[i]]); 27 dof_=target.size()-2; 28 for(size_t i=0; i<target.size(); i++){ 29 if (class_one(target(i))) 30 positive.add(value(i)); 38 31 else 39 negative.add( data_[train_set_[i]]);32 negative.add(value(i)); 40 33 } 41 34 double diff = positive.mean() - negative.mean(); … … 49 42 } 50 43 51 double tScore::score(const gslapi::vector& target, 52 const gslapi::vector& data, 53 const gslapi::vector& weight, 54 const std::vector<size_t>& train_set) 44 double tScore::score(const classifier::Target& target, 45 const classifier::VectorAbstract& value, 46 const classifier::VectorAbstract& weight) 55 47 { 56 48 weighted_=true; 57 if (!train_set_.size()) 58 for (size_t i=0; i<target_.size(); i++) 59 train_set_.push_back(i); 60 else 61 train_set_=train_set; 62 target_ = target; 63 weight_ = weight; 49 64 50 statistics::AveragerWeighted positive; 65 51 statistics::AveragerWeighted negative; 66 for(size_t i=0; i<train_set_.size(); i++){ 67 if (target_[train_set_[i]]==1) 68 positive.add(data_(train_set_[i]),weight_(train_set_[i])); 52 dof_=target.size()-2; 53 for(size_t i=0; i<target.size(); i++){ 54 if (class_one(target(i))) 55 positive.add(value(i),weight(i)); 69 56 else 70 negative.add( data_(train_set_[i]),weight_(train_set_[i]));57 negative.add(value(i),weight(i)); 71 58 } 72 59 double diff = positive.mean() - negative.mean(); … … 82 69 double tScore::p_value(void) const 83 70 { 84 double dof = target_.size()-2; 85 double p = gsl_cdf_tdist_Q(t_, dof); 86 return (dof > 0 && !weighted_) ? p : 1; 71 double p = gsl_cdf_tdist_Q(t_, dof_); 72 return (dof_ > 0 && !weighted_) ? p : 1; 87 73 } 88 74 -
branches/peters_vector/lib/statistics/tScore.h
r414 r469 37 37 /// absolute value of t-score is returned 38 38 /// 39 double score(const gslapi::vector&, const gslapi::vector&, 40 const std::vector<size_t>& = std::vector<size_t>());39 double score(const classifier::Target& target, 40 const classifier::VectorAbstract& value); 41 41 42 42 /// … … 44 44 /// absolute value of t-score is returned 45 45 /// 46 double score(const gslapi::vector&, const gslapi::vector&, 47 const gslapi::vector&,48 const std::vector<size_t>& = std::vector<size_t>());46 double score(const classifier::Target& target, 47 const classifier::VectorAbstract& value, 48 const classifier::VectorAbstract& weight); 49 49 50 50 /// … … 61 61 private: 62 62 double t_; 63 gslapi::vector target_; 64 std::vector<size_t> train_set_; 65 gslapi::vector value_; 66 gslapi::vector weight_; 63 int dof_; 67 64 68 65 }; -
branches/peters_vector/lib/utility/stl_utility.h
r437 r469 41 41 template <class T1,class T2> 42 42 struct pair_value_compare 43 { 44 /// 45 /// @return true if x.second<y.second or (x.second==y.second and 46 /// x.first<y.first) 47 /// 48 inline bool operator()(const std::pair<T1,T2>& x, 49 const std::pair<T1,T2>& y) { 50 return ((x.second<y.second) || 51 (!(y.second<x.second) && (x.first<y.first))); 52 } 53 }; 54 55 /// 56 /// 57 template <class T1,class T2> 58 struct pointer_compare 43 59 { 44 60 ///
Note: See TracChangeset
for help on using the changeset viewer.