source: trunk/src/PCA.h @ 16

Last change on this file since 16 was 16, checked in by daniel, 19 years ago

Added documentation for the new methods.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.8 KB
Line 
1#ifndef GENETICS_PCA_ANALYZER_H
2#define GENETICS_PCA_ANALYZER_H
3
4// C++ tools include
5/////////////////////
6#include "vector.h"
7#include "matrix.h"
8#include "SVD.h"
9
10// Standard C++ includes
11////////////////////////
12#include <vector>
13#include <iostream>
14#include <memory>
15#include <cstdlib>
16
17
18namespace thep_cpp_tools
19{
20  /**
21     Class performing PCA using SVD. This class assumes that
22     the columns corresponds to the dimenension of the problem.
23     That means if data has dimension NxM (M=columns) the number
24     of principal-axes will equal M-1. When projecting data into
25     this space, all Nx1 vectors will have dimension Mx1. Hence
26     the projection will have dimension MxM where each column is
27     a point in the new space. Also, it assumes that M>N. The opposite
28     problem is added in the functions: process_transposed_problem and
29     projection_transposed()...
30  */
31 
32  class PCA
33  {
34  public:
35    /**
36       This constructor is only to be used in test-class
37    */
38    PCA(); 
39
40    /**
41       Constructor taking the data-matrix as input. No row-centering
42       should have been performed and no products.
43     */
44    explicit PCA( const thep_gsl_api::matrix& );
45
46   
47    /**
48       Will perform PCA according to the following scheme: \n
49       1: Rowcenter A  \n
50       2: SVD(A)  --> USV' \n
51       3: Calculate eigenvalues according to \n
52          \f$ \lambda_{ii} = s_{ii}/N_{rows} \f$ \n
53       4: Sort eigenvectors (from matrix V) according to descending eigenvalues \n
54    */
55    void process();
56
57    /**
58       If M<N use this method instead. Using the same format as before
59       where rows in the matrix corresponds to the dimensional coordinate.
60       The only difference is in the SVD step where the matrix V is used
61       after running the transposed matrix. For projections, see
62       projection_transposed() method.
63     */
64    void process_transposed_problem();
65   
66    /**
67       Performes a simple test on performance. Not optimal!
68       Returns true if ok otherwise false.
69    */
70    bool test();
71
72
73    /**
74       Returns eigenvector \a i
75    */
76    thep_gsl_api::matrix get_eigenvector( const size_t& i ) const
77    {
78      return eigenvectors_.row( i );
79    }
80
81    /**
82       Returns eigenvalues to covariance matrix
83       \f$ C = \frac{1}{N^2}A^TA \f$
84    */
85    double get_eigenvalue( const size_t& i ) const
86    {
87      return eigenvalues_[ i ];
88    }
89
90    /**
91       Returns the explained intensity of component \a K \n
92       \f$I = \frac{ \sum^{K}_{i=1} \lambda_i }{ \sum^{N}_{j=1} \lambda_j }\f$ \n
93       where \f$N\f$ is the dimension
94    */
95    double PCA::get_explained_intensity( const size_t& k );
96
97
98
99    /**
100        This function will project data onto the new coordinate-system
101  where the axes are the calculated eigenvectors. This means that
102  PCA must have been run before this function can be used!
103  Output is presented as coordinates in the N-dimensional room
104  spanned by the eigenvectors.
105    */
106    thep_gsl_api::matrix projection( const thep_gsl_api::matrix& ) const;
107   
108    /**
109  Same as projection() but works when used process_transposed_problem()
110    */
111    thep_gsl_api::matrix projection_transposed( const thep_gsl_api::matrix& ) const;
112
113   
114   
115  private:
116    thep_gsl_api::matrix A_; 
117    thep_gsl_api::matrix  eigenvectors_;
118    thep_gsl_api::vector  eigenvalues_;
119    thep_gsl_api::vector  explained_intensity_;
120    thep_gsl_api::vector  meanvalues_;
121    bool process_, explained_calc_;
122   
123    /**
124       Private function that will row-center the matrix A,
125       that is, A = A - M, where M is a matrix
126       with the meanvalues of each row
127    */
128    void row_center( thep_gsl_api::matrix& A_center );
129
130    /**
131       Private function that will calculate the explained
132       intensity
133    */
134    void calculate_explained_intensity();
135
136   
137  }; // class PCA 
138 
139}
140
141#endif
142
Note: See TracBrowser for help on using the repository browser.