source: trunk/yat/classifier/ConsensusInputRanker.h

Last change on this file was 4207, checked in by Peter, 6 weeks ago

update copyright statements

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 4.6 KB
Line 
1#ifndef _theplu_yat_classifier_consensusinputranker_
2#define _theplu_yat_classifier_consensusinputranker_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2022 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "InputRanker.h"
29
30#include <vector>
31
32namespace theplu {
33namespace yat {
34namespace statistics {
35  class Score;
36  class VectorFunction;
37}
38namespace classifier {
39
40  class IRRetrieve;
41  class MatrixLookup;
42  class MatrixLookupWeighted;
43  class Sampler;
44
45  ///
46  /// @brief Robust algorithm to rank rows in a data matrix versus a
47  /// target vector.
48  ///
49  /// The idea is to create several (different) ranked lists. The list
50  /// could be different because they are based upon different
51  /// sub-sets of the data, or the different lists could be different
52  /// because they have been generated using different criteria. Having
53  /// \f$ N \f$ lists means each row in the data matrix has \f$ N \f$
54  /// ranks (each corresponding to one list). A
55  /// statistics::VectorFunction is used to boil down these ranks to
56  /// one consensus rank, and a ranked list is created by sorting the
57  /// data rows with respect to this consensus rank.
58  ///
59  /// For the time being there are two ways to build a
60  /// ConsensusInputRanker. 1) Sending a Sampler and a MatrixLookup to
61  /// the add function will create one ranked list for each of the
62  /// partitions defined in the Sampler. 2) You can generate
63  /// your ranked list outside, using your favourite method, and
64  /// adding it into the ConsensusInputRanker object. This allows
65  /// combining different scores and different sub-sets in a more
66  /// general way.
67  ///
68  class ConsensusInputRanker
69  {
70
71  public:
72
73    ///
74    /// @brief Default constructor
75    ///
76    /// Truly does nothing but creates a few empty member vectors.
77    ///
78    ConsensusInputRanker(const IRRetrieve&, const statistics::VectorFunction&);
79
80    ///
81    /// Iterating through @a sampler creating subsets of @a data, and
82    /// for each subset is an InputRanker is created using the @a
83    /// score. After creation the data rows are sorted with respect to
84    /// the median rank (i.e. update() is called).
85    ///
86    void add(const Sampler& sampler, const MatrixLookup&,
87             const statistics::Score& s);
88
89    ///
90    /// @brief Add a set of InputRankers
91    ///
92    /// Iterating through @a sampler creating subsets of @a data, and
93    /// for each subset is an InputRanker is created using the @a
94    /// score. After creation the data rows are sorted with respect to
95    /// the median rank (i.e. update() is called).
96    ///
97    void add(const Sampler& sampler, const MatrixLookupWeighted& data,
98             const statistics::Score& score);
99
100    ///
101    /// @brief Add an InputRanker
102    ///
103    /// @note update() must be called to make the added InputRanker to
104    /// influence consensus ids and ranks. If a sequence of
105    /// InputRankers are added, update() need to be called only after
106    /// the last InputRanker is added.
107    ///
108    void add(const InputRanker& ir);
109
110    ///
111    /// Row with lowest rank (highest score) is ranked as number zero
112    /// @return index of row ranked as number \a i
113    ///
114    size_t id(size_t i) const;
115
116    /**
117       @return ith InputRanker
118    */
119    const InputRanker& input_ranker(size_t i) const;
120
121    ///
122    /// Row with lowest rank (highest score) is ranked as number zero
123    /// @return rank for row \a i
124    ///
125    size_t rank(size_t i) const;
126
127    /**
128       \brief \brief reserve memory for internal vector of InputRankers
129
130       This function is recommended before adding using add(const
131       InputRanker&) to avoid re-allocations.
132    */
133    void reserve(size_t n);
134
135
136    ///
137    /// update ids and ranks
138    ///
139    void update(void);
140
141
142  private:
143
144    std::vector<size_t> id_;
145    std::vector<InputRanker> input_rankers_;
146    std::vector<size_t> rank_;
147    const IRRetrieve& retriever_;
148    const statistics::VectorFunction& vec_func_;
149  };
150
151}}} // of namespace classifier, yat, and theplu
152
153#endif
Note: See TracBrowser for help on using the repository browser.