Changeset 1712
- Timestamp:
- Jan 13, 2009, 7:41:09 PM (14 years ago)
- Location:
- trunk/yat/normalizer
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/normalizer/qQuantileNormalizer.cc
r1709 r1712 25 25 #include "yat/statistics/Averager.h" 26 26 #include "yat/utility/Matrix.h" 27 #include "yat/utility/VectorConstView.h" 27 #include "yat/utility/Vector.h" 28 #include "yat/utility/VectorBase.h" 28 29 29 30 #include <algorithm> … … 35 36 36 37 37 Partitioner::Partitioner(const utility::Vector ConstView& vec,38 Partitioner::Partitioner(const utility::VectorBase& vec, 38 39 unsigned int N) 39 40 : average_(utility::Vector(N)), index_(utility::Vector(N)) … … 77 78 78 79 79 qQuantileNormalizer::qQuantileNormalizer(const 80 utility::VectorConstView& target, 80 qQuantileNormalizer::qQuantileNormalizer(const utility::VectorBase& target, 81 81 unsigned int Q) 82 82 : target_(Partitioner(target,Q)) … … 102 102 diff-=target_.averages(); 103 103 const utility::Vector& idx=target_.index(); 104 regression::CSplineInterpolation cspline(idx,diff); 104 105 105 // add linear interpolation for first part 106 for (size_t row=0; row<idx(0); ++row) { 106 // linear interpolation for first part, i.e., use first diff for 107 // all points in the first part. 108 size_t start=0; 109 size_t end=idx(0); 110 for (size_t row=start; row<end; ++row) { 107 111 size_t srow=sorted_index[column][row]; 108 result(srow,column) = matrix(srow,column) ;112 result(srow,column) = matrix(srow,column) + diff(0); 109 113 } 110 114 111 115 // cspline interpolation for all data between the first and last 112 116 // parts 113 regression::CSplineInterpolation cspline(idx,diff); 114 for (size_t row=idx(0); row<=idx(target_.size()-1); ++row) { 117 start=idx(0); 118 end=idx(target_.size()-1); 119 for (size_t row=start; row<=end; ++row) { 115 120 size_t srow=sorted_index[column][row]; 116 result(srow,column) = ( matrix(srow,column) + cspline.evaluate(row) );121 result(srow,column) = matrix(srow,column) + cspline.evaluate(row) ; 117 122 } 118 123 119 // add linear interpolation for last part 120 for (size_t row=idx(target_.size()-1)+1; row<result.rows(); ++row) { 124 // linear interpolation for last part, i.e., use last diff for 125 // all points in the last part. 126 start=idx(target_.size()-1)+1; 127 end=result.rows(); 128 for (size_t row=start; row<end; ++row) { 121 129 size_t srow=sorted_index[column][row]; 122 result(srow,column) = matrix(srow,column) ;130 result(srow,column) = matrix(srow,column) + diff(diff.size()-1); 123 131 } 124 132 } -
trunk/yat/normalizer/qQuantileNormalizer.h
r1711 r1712 27 27 namespace utility { 28 28 class Matrix; 29 class Vector ConstView;29 class VectorBase; 30 30 } 31 31 namespace normalizer { 32 32 33 33 /** 34 \brief Documentation please. 34 \brief Partition a vector of data into equal sizes. 35 36 The class also calculates the average of each part and assigns 37 the average to the mid point of each part. The midpoint is a 38 double, i.e., it is not forced to be an integer index. 35 39 */ 36 40 class Partitioner … … 38 42 public: 39 43 /** 40 \brief Documentation please.44 \brief Create the partition and perform required calculations. 41 45 */ 42 Partitioner(const utility::Vector ConstView& vec, unsigned int N);46 Partitioner(const utility::VectorBase& vec, unsigned int N); 43 47 44 48 /** 45 \brief Documentation please. 49 \brief Return the averages for each part. 50 51 \return The average vector. 46 52 */ 47 53 const utility::Vector& averages(void) const; 48 54 49 55 /** 50 \brief Documentation please. 56 \brief Return the mid point for each partition. 57 58 \return The index vector. 51 59 */ 52 60 const utility::Vector& index(void) const; 53 61 54 62 /** 55 \ briefThe number of parts.63 \return The number of parts. 56 64 */ 57 65 size_t size(void) const; … … 66 74 \brief Perform Q-quantile normalization 67 75 68 After a Q-quantile normalization each column has the same 69 distribution of data (the Q-quantiles are the same). Also, within 70 each column the rank of an element is not changed. 76 After a Q-quantile normalization each column has approximately 77 the same distribution of data (the Q-quantiles are the 78 same). Also, within each column the rank of an element is not 79 changed. 71 80 72 81 There is currently no weighted version of qQuantileNormalizer 73 82 74 83 The normalization goes like this 75 76 0. Data is not assumed to be sorted. 77 78 1. Partition the target data in N+1 parts. The ends have half 79 size of the "normal" part size ( = \#targetdata/N ) 80 81 2. Calculate the arithmetic mean for each part 82 83 3. Do the same for the data to be tranformed (called source 84 here). 85 86 4. For each part, calculate the difference between the target and 87 the source. Now we have N differences d_i. 88 89 5. Create a cubic spline fit to this difference vector d. The 90 resulting curve is used to recalculate all column values. 91 92 I. For values in parts 1 through N-1 we use a cubic spline 93 fit. 94 95 II. For end parts 0 and N linear interpolation is used 96 97 Linear interpolation simply means a translation. 84 - Data is not assumed to be sorted. 85 - Partition the target data in N parts. 86 - Calculate the arithmetic mean for each part, the mean is 87 assigned to the mid point of each part. 88 - Do the same for the data to be tranformed (called source 89 here). 90 - For each part, calculate the difference between the target and 91 the source. Now we have N differences d_i with associated rank 92 (midpoint of each part). 93 - Create a cubic spline fit to this difference vector d. The 94 resulting curve is used to recalculate all column values. 95 - Use the cubic spline fit for values within the cubic spline 96 fit range [midpoint 1st part, midpoint last part]. 97 - For data outside the cubic spline fit use linear 98 extrapolation, i.e., a constant shift. d_first for points 99 below fit range, and d_last for points above fit range. 98 100 99 101 \since New in yat 0.5 … … 111 113 undefined. Keep \f$ N \f$ equal to or less than the smallest 112 114 number of data points in the target or each data set to be 113 normalized witha ginven target.115 normalized against a ginven target. 114 116 */ 115 qQuantileNormalizer(const utility::VectorConstView& target, 116 unsigned int Q); 117 qQuantileNormalizer(const utility::VectorBase& target, unsigned int Q); 117 118 118 119 /**
Note: See TracChangeset
for help on using the changeset viewer.