source: trunk/yat/classifier/ConsensusInputRanker.h @ 1706

Last change on this file since 1706 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 4.6 KB
Line 
1#ifndef _theplu_yat_classifier_consensusinputranker_
2#define _theplu_yat_classifier_consensusinputranker_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include "InputRanker.h"
28
29#include <vector>
30
31namespace theplu {
32namespace yat {
33namespace statistics {
34  class Score;
35  class VectorFunction;
36}
37namespace classifier { 
38
39  class IRRetrieve;
40  class MatrixLookup;
41  class MatrixLookupWeighted;
42  class Sampler;
43
44  ///
45  /// @brief Robust algorithm to rank rows in a data matrix versus a
46  /// target vector.
47  ///
48  /// The idea is to create several (different) ranked lists. The list
49  /// could be different because they are based upon different
50  /// sub-sets of the data, or the different lists could be different
51  /// because they have been generated using different criteria. Having
52  /// \f$ N \f$ lists means each row in the data matrix has \f$ N \f$
53  /// ranks (each corresponding to one list). A
54  /// statistics::VectorFunction is used to boil down these ranks to
55  /// one consensus rank, and a ranked list is created by sorting the
56  /// data rows with respect to this consensus rank.
57  ///
58  /// For the time being there are two ways to build a
59  /// ConsensusInputRanker. 1) Sending a Sampler and a MatrixLookup to
60  /// the add function will create one ranked list for each of the
61  /// partitions defined in the Sampler. 2) You can generate
62  /// your ranked list outside, using your favourite method, and
63  /// adding it into the ConsensusInputRanker object. This allows
64  /// combining different scores and different sub-sets in a more
65  /// general way.
66  ///
67  class ConsensusInputRanker
68  {
69 
70  public:
71
72    ///
73    /// @brief Default constructor
74    ///
75    /// Truly does nothing but creates a few empty member vectors.
76    ///
77    ConsensusInputRanker(const IRRetrieve&, const statistics::VectorFunction&);
78   
79    ///
80    /// Iterating through @a sampler creating subsets of @a data, and
81    /// for each subset is an InputRanker is created using the @a
82    /// score. After creation the data rows are sorted with respect to
83    /// the median rank (i.e. update() is called).
84    ///
85    void add(const Sampler& sampler, const MatrixLookup&, 
86             const statistics::Score& s);
87   
88    ///
89    /// @brief Add a set of InputRankers
90    ///
91    /// Iterating through @a sampler creating subsets of @a data, and
92    /// for each subset is an InputRanker is created using the @a
93    /// score. After creation the data rows are sorted with respect to
94    /// the median rank (i.e. update() is called).
95    ///
96    void add(const Sampler& sampler, const MatrixLookupWeighted& data, 
97             const statistics::Score& score);
98   
99    ///
100    /// @brief Add an InputRanker
101    ///
102    /// @note update() must be called to make the added InputRanker to
103    /// influence consensus ids and ranks. If a sequence of
104    /// InputRankers are added, update() need to be called only after
105    /// the last InputRanker is added.
106    ///
107    void add(const InputRanker& ir);
108   
109    ///
110    /// Row with lowest rank (highest score) is ranked as number zero
111    /// @return index of row ranked as number \a i
112    ///
113    size_t id(size_t i) const;
114   
115    /**
116       @return ith InputRanker
117    */
118    const InputRanker& input_ranker(size_t i) const;
119
120    ///
121    /// Row with lowest rank (highest score) is ranked as number zero
122    /// @return rank for row \a i
123    ///
124    size_t rank(size_t i) const;
125   
126    /**
127       \brief \brief reserve memory for internal vector of InputRankers
128
129       This function is recommended before adding using add(const
130       InputRanker&) to avoid re-allocations.
131    */
132    void reserve(size_t n);
133
134
135    ///
136    /// update ids and ranks
137    ///
138    void update(void);
139
140
141  private:
142
143    std::vector<size_t> id_;
144    std::vector<InputRanker> input_rankers_;
145    std::vector<size_t> rank_;
146    const IRRetrieve& retriever_;
147    const statistics::VectorFunction& vec_func_;
148  };
149
150}}} // of namespace classifier, yat, and theplu
151
152#endif
Note: See TracBrowser for help on using the repository browser.