#ifndef _theplu_yat_normalizer_qquantile_normalizer_
#define _theplu_yat_normalizer_qquantile_normalizer_

/*
Copyright (C) 2009 Jari Häkkinen

This file is part of the yat library, http://dev.thep.lu.se/yat

The yat library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 3 of the
License, or (at your option) any later version.

The yat library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with yat. If not, see <http://www.gnu.org/licenses/>.
*/

#include "yat/utility/Vector.h"

namespace theplu {
namespace yat {
namespace utility {
class Matrix;
class VectorConstView;
}
namespace normalizer {

/**
\brief Documentation please.
*/
class Partitioner
{
public:
/**
\brief Documentation please.
*/
Partitioner(const utility::VectorConstView& vec, unsigned int N);

/**
\brief Documentation please.
*/
const utility::Vector& averages(void) const;

/**
\brief Documentation please.
*/
const utility::Vector& index(void) const;

/**
\brief The number of parts.
*/
size_t size(void) const;

private:
utility::Vector average_;
utility::Vector index_;
};


/**
\brief Perform Q-quantile normalization

After a Q-quantile normalization each column has the same
distribution of data (the Q-quantiles are the same). Also, within
each column the rank of an element is not changed.

There is currently no weighted version of qQuantileNormalizer

The normalization goes like this

0. Data is not assumed to be sorted.

1. Partition the target data in N+1 parts. The ends have half
size of the "normal" part size ( = \#targetdata/N )

2. Calculate the arithmetic mean for each part

3. Do the same for the data to be tranformed (called source
here).

4. For each part, calculate the difference between the target and
the source. Now we have N differences d_i.

5. Create a cubic spline fit to this difference vector d. The
resulting curve is used to recalculate all column values.

I. For values in parts 1 through N-1 we use a cubic spline
fit.

II. For end parts 0 and N linear interpolation is used

Linear interpolation simply means a translation.

\since New in yat 0.5
*/
class qQuantileNormalizer
{
public:
/**
\brief Documentation please.

\a Q is the number of parts and must be within \f$ [2,N] \f$
where \f$ N \f$ is the total number of data points in the
target. However, if \f$ N \f$ is larger than the number of points
in the data to be normalized the behaviour of the code is
undefined. Keep \f$ N \f$ equal to or less than the smallest
number of data points in the target or each data set to be
normalized with a ginven target.
*/
qQuantileNormalizer(const utility::VectorConstView& target,
unsigned int Q);

/**
\brief perform the Q-quantile normalization.

It is possible to normalize "in place"; it is permissible for
\a matrix and \a result to reference the same Matrix.

\note dimensions of \a matrix and \a result must match.
*/
void operator()(const utility::Matrix& matrix,
utility::Matrix& result) const;

private:
Partitioner target_;
};

}}} // end of namespace normalizer, yat and thep

#endif
