source: branches/0.4-stable/yat/classifier/ConsensusInputRanker.h @ 1392

Last change on this file since 1392 was 1392, checked in by Peter, 15 years ago

trac has moved

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 4.6 KB
Line 
1#ifndef _theplu_yat_classifier_consensusinputranker_
2#define _theplu_yat_classifier_consensusinputranker_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "InputRanker.h"
30
31#include <vector>
32
33namespace theplu {
34namespace yat {
35namespace statistics {
36  class Score;
37  class VectorFunction;
38}
39namespace classifier { 
40
41  class IRRetrieve;
42  class MatrixLookup;
43  class MatrixLookupWeighted;
44  class Sampler;
45
46  ///
47  /// @brief Robust algorithm to rank rows in a data matrix versus a
48  /// target vector.
49  ///
50  /// The idea is to create several (different) ranked lists. The list
51  /// could be different because they are based upon different
52  /// sub-sets of the data, or the different lists could be different
53  /// because they have been generated using different criteria. Having
54  /// \f$ N \f$ lists means each row in the data matrix has \f$ N \f$
55  /// ranks (each corresponding to one list). A
56  /// statistics::VectorFunction is used to boil down these ranks to
57  /// one consensus rank, and a ranked list is created by sorting the
58  /// data rows with respect to this consensus rank.
59  ///
60  /// For the time being there are two ways to build a
61  /// ConsensusInputRanker. 1) Sending a Sampler and a MatrixLookup to
62  /// the add function will create one ranked list for each of the
63  /// partitions defined in the Sampler. 2) You can generate
64  /// your ranked list outside, using your favourite method, and
65  /// adding it into the ConsensusInputRanker object. This allows
66  /// combining different scores and different sub-sets in a more
67  /// general way.
68  ///
69  class ConsensusInputRanker
70  {
71 
72  public:
73
74    ///
75    /// @brief Default constructor
76    ///
77    /// Truly does nothing but creates a few empty member vectors.
78    ///
79    ConsensusInputRanker(const IRRetrieve&, const statistics::VectorFunction&);
80   
81    ///
82    /// Iterating through @a sampler creating subsets of @a data, and
83    /// for each subset is an InputRanker is created using the @a
84    /// score. After creation the data rows are sorted with respect to
85    /// the median rank (i.e. update() is called).
86    ///
87    void add(const Sampler& sampler, const MatrixLookup&, 
88             const statistics::Score& s);
89   
90    ///
91    /// @brief Add a set of InputRankers
92    ///
93    /// Iterating through @a sampler creating subsets of @a data, and
94    /// for each subset is an InputRanker is created using the @a
95    /// score. After creation the data rows are sorted with respect to
96    /// the median rank (i.e. update() is called).
97    ///
98    void add(const Sampler& sampler, const MatrixLookupWeighted& data, 
99             const statistics::Score& score);
100   
101    ///
102    /// @brief Add an InputRanker
103    ///
104    /// @note update() must be called to make the added InputRanker to
105    /// influence consensus ids and ranks. If a sequence of
106    /// InputRankers are added, update() need to be called only after
107    /// the last InputRanker is added.
108    ///
109    void add(const InputRanker& ir);
110   
111    ///
112    /// Row with lowest rank (highest score) is ranked as number zero
113    /// @return index of row ranked as number \a i
114    ///
115    size_t id(size_t i) const;
116   
117    /**
118       @return ith InputRanker
119    */
120    const InputRanker& input_ranker(size_t i) const;
121
122    ///
123    /// Row with lowest rank (highest score) is ranked as number zero
124    /// @return rank for row \a i
125    ///
126    size_t rank(size_t i) const;
127   
128    /**
129       \brief \brief reserve memory for internal vector of InputRankers
130
131       This function is recommended before adding using add(const
132       InputRanker&) to avoid re-allocations.
133    */
134    void reserve(size_t n);
135
136
137    ///
138    /// update ids and ranks
139    ///
140    void update(void);
141
142
143  private:
144
145    std::vector<size_t> id_;
146    std::vector<InputRanker> input_rankers_;
147    std::vector<size_t> rank_;
148    const IRRetrieve& retriever_;
149    const statistics::VectorFunction& vec_func_;
150  };
151
152}}} // of namespace classifier, yat, and theplu
153
154#endif
Note: See TracBrowser for help on using the repository browser.