source: trunk/yat/classifier/ConsensusInputRanker.h @ 828

Last change on this file since 828 was 828, checked in by Peter, 15 years ago

Generalized ConsenusInputRanker?, Fixes #151

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 4.5 KB
Line 
1#ifndef _theplu_yat_classifier_consensusinputranker_
2#define _theplu_yat_classifier_consensusinputranker_
3
4// $Id$
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "InputRanker.h"
28
29#include <vector>
30
31namespace theplu {
32namespace yat {
33namespace statistics {
34  class Score;
35  class VectorFunction;
36}
37namespace classifier { 
38
39  class IRRetrieve;
40  class MatrixLookup;
41  class MatrixLookupWeighted;
42  class Sampler;
43
44  ///
45  /// @brief Robust algorithm to rank rows in a data matrix versus a
46  /// target vector.
47  ///
48  /// The idea is to create several (different) ranked lists. The list
49  /// could be different because they are based upon different
50  /// sub-sets of the data, or the different lists could be different
51  /// because they have been generated using different criteria. Having
52  /// \f$ N \f$ lists means each row in the data matrix has \f$ N \f$
53  /// ranks (each corresponding to one list). A
54  /// statistics::VectorFunction is used to boil down these ranks to
55  /// one consensus rank, and a ranked list is created by sorting the
56  /// data rows with respect to this consensus rank.
57  ///
58  /// For the time being there are two ways to build a
59  /// ConsensusInputRanker. 1) Sending a Sampler and a MatrixLookup to
60  /// the add function will create one ranked list for each of the
61  /// partitions defined in the Sampler. 2) You can generate
62  /// your ranked list outside, using your favourite method, and
63  /// adding it into the ConsensusInputRanker object. This allows
64  /// combining different scores and different sub-sets in a more
65  /// general way.
66  ///
67  class ConsensusInputRanker
68  {
69 
70  public:
71
72    ///
73    /// @brief Default constructor
74    ///
75    /// Truly does nothing but creates a few empty member vectors.
76    ///
77    ConsensusInputRanker(const IRRetrieve&, const statistics::VectorFunction&);
78   
79    ///
80    /// Iterating through @a sampler creating subsets of @a data, and
81    /// for each subset is an InputRanker is created using the @a
82    /// score. After creation the data rows are sorted with respect to
83    /// the median rank (i.e. update() is called).
84    ///
85    void add(const Sampler& sampler, const MatrixLookup&, statistics::Score& s);
86   
87    ///
88    /// @brief Add a set of InputRankers
89    ///
90    /// Iterating through @a sampler creating subsets of @a data, and
91    /// for each subset is an InputRanker is created using the @a
92    /// score. After creation the data rows are sorted with respect to
93    /// the median rank (i.e. update() is called).
94    ///
95    void add(const Sampler& sampler, const MatrixLookupWeighted& data, 
96             statistics::Score& score);
97   
98    ///
99    /// @brief Add an InputRanker
100    ///
101    /// @note update() must be called to make the added InputRanker to
102    /// influence consensus ids and ranks. If a sequence of
103    /// InputRankers are added, update() need to be called only after
104    /// the last InputRanker is added.
105    ///
106    void add(const InputRanker& ir);
107   
108    ///
109    /// Row with lowest rank (highest score) is ranked as number zero
110    /// @return index of row ranked as number \a i
111    ///
112    size_t id(size_t i) const;
113   
114    /**
115       @return ith InputRanker
116    */
117    const InputRanker& input_ranker(size_t i) const;
118
119    ///
120    /// Row with lowest rank (highest score) is ranked as number zero
121    /// @return rank for row \a i
122    ///
123    size_t rank(size_t i) const;
124   
125    /**
126       \brief \brief reserve memory for internal vector of InputRankers
127
128       This function is recommended before adding using add(const
129       InputRanker&) to avoid re-allocations.
130    */
131    void reserve(size_t n);
132
133
134    ///
135    /// update ids and ranks
136    ///
137    void update(void);
138
139
140  private:
141
142    std::vector<size_t> id_;
143    std::vector<InputRanker> input_rankers_;
144    std::vector<size_t> rank_;
145    const IRRetrieve& retriever_;
146    const statistics::VectorFunction& vec_func_;
147  };
148
149}}} // of namespace classifier, yat, and theplu
150
151#endif
Note: See TracBrowser for help on using the repository browser.