1 | #include "PCA.h" |
---|
2 | |
---|
3 | |
---|
4 | using namespace std; |
---|
5 | using namespace thep_cpp_tools; |
---|
6 | |
---|
7 | PCA::PCA() : process_( false ), explained_calc_( false ) |
---|
8 | { |
---|
9 | } |
---|
10 | |
---|
11 | PCA::PCA( const thep_gsl_api::matrix& A ) : A_( A ), |
---|
12 | process_( false ), |
---|
13 | explained_calc_( false ) |
---|
14 | { |
---|
15 | } |
---|
16 | |
---|
17 | void PCA::process() |
---|
18 | { |
---|
19 | process_ = true; |
---|
20 | // Row-center the data matrix |
---|
21 | thep_gsl_api::matrix A_center( A_.rows(), A_.cols() ); |
---|
22 | this->row_center( A_center ); |
---|
23 | |
---|
24 | |
---|
25 | // Single value decompose the data matrix |
---|
26 | auto_ptr<thep_cpp_tools::SVD> pSVD( new thep_cpp_tools::SVD( A_center ) ); |
---|
27 | pSVD->process(); |
---|
28 | thep_gsl_api::matrix U = pSVD->get_u(); |
---|
29 | thep_gsl_api::matrix V = pSVD->get_v(); |
---|
30 | |
---|
31 | // Read the eigenvectors and eigenvalues |
---|
32 | eigenvectors_ = U.transpose(); |
---|
33 | eigenvalues_ = pSVD->get_s_vec(); |
---|
34 | |
---|
35 | // T |
---|
36 | for( size_t i = 0; i < eigenvalues_.size(); ++i ) |
---|
37 | { |
---|
38 | eigenvalues_[ i ] = eigenvalues_[ i ]*eigenvalues_[ i ]; |
---|
39 | } |
---|
40 | |
---|
41 | eigenvalues_ /= A_center.rows(); |
---|
42 | |
---|
43 | // Sort the eigenvectors in order of eigenvalues |
---|
44 | // Simple (not efficient) algorithm that always |
---|
45 | // make sure that the i:th element is in its correct |
---|
46 | // position (N element --> Ordo( N*N )) |
---|
47 | for( size_t i = 0; i < eigenvalues_.size(); ++i ) |
---|
48 | { |
---|
49 | for( size_t j = i + 1; j < eigenvalues_.size(); ++j ) |
---|
50 | { |
---|
51 | if( eigenvalues_[ j ] > eigenvalues_[ i ] ) |
---|
52 | { |
---|
53 | swap( eigenvalues_[ i ], eigenvalues_[ j ] ); |
---|
54 | eigenvectors_.swap_rows( i, j ); |
---|
55 | } |
---|
56 | } |
---|
57 | } |
---|
58 | } |
---|
59 | |
---|
60 | |
---|
61 | // This function will row-center the matrix A_, |
---|
62 | // that is, A_ = A_ - M, where M is a matrix |
---|
63 | // with the meanvalues of each row |
---|
64 | void PCA::row_center( thep_gsl_api::matrix& A_center ) |
---|
65 | { |
---|
66 | meanvalues_ = thep_gsl_api::vector( A_.rows() ); |
---|
67 | thep_gsl_api::vector A_row_sum = A_.row_sum(); |
---|
68 | for( size_t i = 0; i < A_center.rows(); ++i ) |
---|
69 | { |
---|
70 | meanvalues_[ i ] = A_row_sum.get( i ) / static_cast<double>( A_.cols() ); |
---|
71 | for( size_t j = 0; j < A_center.cols(); ++j ) |
---|
72 | { |
---|
73 | A_center.set( i, j, A_.get( i , j ) - meanvalues_[ i ] ); |
---|
74 | } |
---|
75 | } |
---|
76 | } |
---|
77 | |
---|
78 | |
---|
79 | thep_gsl_api::matrix PCA::projection( const thep_gsl_api::matrix& |
---|
80 | samples ) const |
---|
81 | { |
---|
82 | assert( process_ ); |
---|
83 | const size_t Ncol = samples.cols(); |
---|
84 | const size_t Nrow = samples.rows(); |
---|
85 | thep_gsl_api::matrix projs( Ncol, Ncol ); |
---|
86 | |
---|
87 | for( size_t j = 0; j < Ncol; ++j ) |
---|
88 | { |
---|
89 | thep_gsl_api::matrix temp = samples.col( j ); |
---|
90 | thep_gsl_api::vector centered( Nrow ); |
---|
91 | |
---|
92 | for( size_t i = 0; i < Nrow; ++i ) |
---|
93 | { |
---|
94 | centered[ i ] = temp.get( i, 0 ) - meanvalues_[ i ]; |
---|
95 | } |
---|
96 | |
---|
97 | thep_gsl_api::vector proj( eigenvectors_ * centered ); |
---|
98 | for( size_t i = 0; i < samples.cols(); ++i ) |
---|
99 | { |
---|
100 | projs.set( i, j, proj[ i ] ); |
---|
101 | } |
---|
102 | } |
---|
103 | return projs; |
---|
104 | } |
---|
105 | |
---|
106 | |
---|
107 | void PCA::calculate_explained_intensity() |
---|
108 | { |
---|
109 | size_t DIM = eigenvalues_.size(); |
---|
110 | explained_intensity_ = thep_gsl_api::vector( DIM ); |
---|
111 | double sum = 0; |
---|
112 | for( size_t i = 0; i < DIM; ++i ) |
---|
113 | { |
---|
114 | sum += eigenvalues_[ i ]; |
---|
115 | } |
---|
116 | double exp_sum = 0; |
---|
117 | for( size_t i = 0; i < DIM; ++i ) |
---|
118 | { |
---|
119 | exp_sum += eigenvalues_[ i ]; |
---|
120 | explained_intensity_[ i ] = exp_sum / sum ; |
---|
121 | } |
---|
122 | } |
---|
123 | |
---|
124 | double PCA::get_explained_intensity( const size_t& k ) |
---|
125 | { |
---|
126 | if( !explained_calc_ ) |
---|
127 | { |
---|
128 | this->calculate_explained_intensity(); |
---|
129 | explained_calc_ = true; |
---|
130 | } |
---|
131 | return explained_intensity_[ k ]; |
---|
132 | } |
---|
133 | |
---|
134 | |
---|
135 | bool PCA::test() |
---|
136 | { |
---|
137 | cout << "1. Print original matrix A = \n";//Testing row-centering of matrix\n"; |
---|
138 | thep_gsl_api::matrix A( 3, 3 ); |
---|
139 | for( size_t i = 0; i < 3; ++i ) |
---|
140 | { |
---|
141 | for( size_t j = 0; j < 3; ++j ) |
---|
142 | { |
---|
143 | A.set( i, j, sin( static_cast<double>( i + j + 2 + (i+1)*(j+1) ) ) ); |
---|
144 | } |
---|
145 | } |
---|
146 | A_ = A; |
---|
147 | |
---|
148 | // Print only matrix that is row-centered as PCA ( process() ) |
---|
149 | // will do that before calculation |
---|
150 | this->row_center( A ); |
---|
151 | cout << A << endl; |
---|
152 | |
---|
153 | cout << "2. Testing PCA\n"; |
---|
154 | this->process(); |
---|
155 | |
---|
156 | for( size_t i = 0; i < eigenvalues_.size(); ++i ) |
---|
157 | { |
---|
158 | cout << "lambda(" << i << ") = " << eigenvalues_[i] << " " |
---|
159 | << "has eigenvector = " << endl; |
---|
160 | cout << eigenvectors_.row( i ) << endl; |
---|
161 | } |
---|
162 | |
---|
163 | // The eigenvalues and eigenvectors are |
---|
164 | // from matlab using eig(A'A)/3 |
---|
165 | double answer_lambda[ 3 ] = { 1.8908, 0.0327, 0 }; |
---|
166 | double answer_eigenvec[ 3 ][ 3 ] = |
---|
167 | { |
---|
168 | { 0.4763, -0.6738, 0.5649 }, |
---|
169 | { 0.8770, 0.3182, -0.3599 }, |
---|
170 | { 0.0627, 0.6669 , 0.7425 }, |
---|
171 | }; |
---|
172 | |
---|
173 | double ERROR_TOL = 0.0001; |
---|
174 | |
---|
175 | // error eigenvalues |
---|
176 | cout << "abs( lambda(i) - lambda_matlab(i) ) ) = \n( "; |
---|
177 | double max_err = 0; |
---|
178 | for( size_t i = 0; i < 3; ++i ) |
---|
179 | { |
---|
180 | double err = fabs( eigenvalues_[ i ] - answer_lambda[ i ] ); |
---|
181 | if( err > max_err ) max_err = err; |
---|
182 | cout << err << " "; |
---|
183 | } |
---|
184 | cout << ")\n"; |
---|
185 | |
---|
186 | cerr << "max_error = " << max_err << ", to be compared with ERROR_TOL = " |
---|
187 | << ERROR_TOL << endl; |
---|
188 | if( max_err < ERROR_TOL ) |
---|
189 | { |
---|
190 | cout << "ok, compared with precision in given matlab-references!\n"; |
---|
191 | } |
---|
192 | else |
---|
193 | { |
---|
194 | cerr << "Not ok, error to big!\n"; |
---|
195 | return false; |
---|
196 | } |
---|
197 | |
---|
198 | // error eigenvectors |
---|
199 | max_err = 0; |
---|
200 | for( size_t i = 0; i < 3; ++i ) |
---|
201 | { |
---|
202 | cout << "err_eigvec( " << i << " ) = \n"; |
---|
203 | for( size_t j = 0; j < 3; ++j ) |
---|
204 | { |
---|
205 | double err = fabs( fabs( eigenvectors_.get( i, j ) ) - fabs( answer_eigenvec[ i ][ j ] ) ); |
---|
206 | if( err > max_err ) max_err = err; |
---|
207 | cout << err << endl; |
---|
208 | } |
---|
209 | cout << ")\n\n"; |
---|
210 | } |
---|
211 | |
---|
212 | cerr << "max_error = " << max_err << ", to be compared with ERROR_TOL = " |
---|
213 | << ERROR_TOL << endl; |
---|
214 | if( max_err < ERROR_TOL ) |
---|
215 | { |
---|
216 | cout << "ok, compared with precision in given matlab-references!\n"; |
---|
217 | } |
---|
218 | else |
---|
219 | { |
---|
220 | cerr << "Not ok, error to big!\n"; |
---|
221 | return false; |
---|
222 | } |
---|
223 | |
---|
224 | cout << "\nThe following projection is obtained:\n"; |
---|
225 | cout << this->projection( A ) << endl; |
---|
226 | |
---|
227 | |
---|
228 | return true; |
---|
229 | } |
---|