source: trunk/yat/statistics/utility.cc @ 3745

Last change on this file since 3745 was 3745, checked in by Peter, 5 years ago

add function calculating correlation matrix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 2.7 KB
Line 
1// $Id: utility.cc 3745 2018-07-27 00:27:02Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
7  Copyright (C) 2009, 2011, 2012 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <config.h>
26
27#include "utility.h"
28
29#include "yat/normalizer/Centralizer.h"
30#include "yat/normalizer/ColumnNormalizer.h"
31
32#include <gsl/gsl_cdf.h>
33#include <gsl/gsl_randist.h>
34#include <gsl/gsl_statistics_double.h>
35
36#include <algorithm>
37#include <cassert>
38#include <cmath>
39#include <limits>
40
41namespace theplu {
42namespace yat {
43namespace statistics {
44
45  double cdf_hypergeometric_P(unsigned int k, unsigned int n1,
46                              unsigned int n2, unsigned int t)
47  {
48    return gsl_cdf_hypergeometric_P(k, n1, n2, t);
49  }
50
51
52  utility::Matrix correlation(utility::Matrix x)
53  {
54    using utility::Matrix;
55    size_t n = x.columns();
56    // centralise
57    normalizer::ColumnNormalizer<normalizer::Centralizer<> > normalizer;
58    normalizer(x, x);
59
60    Matrix cov(n, n);
61    for (size_t i=0; i<n; ++i) {
62      for (size_t j=i; j<n; ++j) {
63        cov(i, j) = x.column_const_view(i) * x.column_const_view(j);
64        cov(j, i) = cov(i, j);
65      }
66    }
67
68    utility::Vector stddev(n);
69    for (size_t i=0; i<n; ++i)
70      stddev(i) = std::sqrt(cov(i, i));
71
72    Matrix corr(cov);
73    for (size_t i=0; i<n; ++i) {
74      corr(i, i) = 1.0;
75      for (size_t j=0; j<i; ++j) {
76        corr(i, j) = cov(i, j) / (stddev(i) * stddev(j));
77        corr(j, i) = corr(i, j);
78      }
79    }
80    return corr;
81  }
82
83
84  double pearson_p_value(double r, unsigned int n)
85  {
86    assert(n>=2);
87    if (n<=2)
88      return std::numeric_limits<double>::quiet_NaN();
89    if (r>=1.0)
90      return 0.0;
91    if (r<=-1.0)
92      return 1.0;
93    return gsl_cdf_tdist_Q(r*std::sqrt((n-2)/(1-r*r)), n-2);
94  }
95
96
97  double kurtosis(const utility::VectorBase& v)
98  {
99    const gsl_vector* gvp=v.gsl_vector_p();
100    return gsl_stats_kurtosis(gvp->data,gvp->stride,gvp->size);
101  }
102
103
104  double skewness(const utility::VectorBase& v)
105  {
106    const gsl_vector* gvp=v.gsl_vector_p();
107    return gsl_stats_skew(gvp->data,gvp->stride,gvp->size);
108  }
109
110}}} // of namespace statistics, yat, and theplu
Note: See TracBrowser for help on using the repository browser.