source: branches/0.4-stable/yat/classifier/ConsensusInputRanker.h @ 1743

Last change on this file since 1743 was 1743, checked in by Peter, 12 years ago

updating copyright statements

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 4.7 KB
Line 
1#ifndef _theplu_yat_classifier_consensusinputranker_
2#define _theplu_yat_classifier_consensusinputranker_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 2 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27  02111-1307, USA.
28*/
29
30#include "InputRanker.h"
31
32#include <vector>
33
34namespace theplu {
35namespace yat {
36namespace statistics {
37  class Score;
38  class VectorFunction;
39}
40namespace classifier { 
41
42  class IRRetrieve;
43  class MatrixLookup;
44  class MatrixLookupWeighted;
45  class Sampler;
46
47  ///
48  /// @brief Robust algorithm to rank rows in a data matrix versus a
49  /// target vector.
50  ///
51  /// The idea is to create several (different) ranked lists. The list
52  /// could be different because they are based upon different
53  /// sub-sets of the data, or the different lists could be different
54  /// because they have been generated using different criteria. Having
55  /// \f$ N \f$ lists means each row in the data matrix has \f$ N \f$
56  /// ranks (each corresponding to one list). A
57  /// statistics::VectorFunction is used to boil down these ranks to
58  /// one consensus rank, and a ranked list is created by sorting the
59  /// data rows with respect to this consensus rank.
60  ///
61  /// For the time being there are two ways to build a
62  /// ConsensusInputRanker. 1) Sending a Sampler and a MatrixLookup to
63  /// the add function will create one ranked list for each of the
64  /// partitions defined in the Sampler. 2) You can generate
65  /// your ranked list outside, using your favourite method, and
66  /// adding it into the ConsensusInputRanker object. This allows
67  /// combining different scores and different sub-sets in a more
68  /// general way.
69  ///
70  class ConsensusInputRanker
71  {
72 
73  public:
74
75    ///
76    /// @brief Default constructor
77    ///
78    /// Truly does nothing but creates a few empty member vectors.
79    ///
80    ConsensusInputRanker(const IRRetrieve&, const statistics::VectorFunction&);
81   
82    ///
83    /// Iterating through @a sampler creating subsets of @a data, and
84    /// for each subset is an InputRanker is created using the @a
85    /// score. After creation the data rows are sorted with respect to
86    /// the median rank (i.e. update() is called).
87    ///
88    void add(const Sampler& sampler, const MatrixLookup&, 
89             const statistics::Score& s);
90   
91    ///
92    /// @brief Add a set of InputRankers
93    ///
94    /// Iterating through @a sampler creating subsets of @a data, and
95    /// for each subset is an InputRanker is created using the @a
96    /// score. After creation the data rows are sorted with respect to
97    /// the median rank (i.e. update() is called).
98    ///
99    void add(const Sampler& sampler, const MatrixLookupWeighted& data, 
100             const statistics::Score& score);
101   
102    ///
103    /// @brief Add an InputRanker
104    ///
105    /// @note update() must be called to make the added InputRanker to
106    /// influence consensus ids and ranks. If a sequence of
107    /// InputRankers are added, update() need to be called only after
108    /// the last InputRanker is added.
109    ///
110    void add(const InputRanker& ir);
111   
112    ///
113    /// Row with lowest rank (highest score) is ranked as number zero
114    /// @return index of row ranked as number \a i
115    ///
116    size_t id(size_t i) const;
117   
118    /**
119       @return ith InputRanker
120    */
121    const InputRanker& input_ranker(size_t i) const;
122
123    ///
124    /// Row with lowest rank (highest score) is ranked as number zero
125    /// @return rank for row \a i
126    ///
127    size_t rank(size_t i) const;
128   
129    /**
130       \brief \brief reserve memory for internal vector of InputRankers
131
132       This function is recommended before adding using add(const
133       InputRanker&) to avoid re-allocations.
134    */
135    void reserve(size_t n);
136
137
138    ///
139    /// update ids and ranks
140    ///
141    void update(void);
142
143
144  private:
145
146    std::vector<size_t> id_;
147    std::vector<InputRanker> input_rankers_;
148    std::vector<size_t> rank_;
149    const IRRetrieve& retriever_;
150    const statistics::VectorFunction& vec_func_;
151  };
152
153}}} // of namespace classifier, yat, and theplu
154
155#endif
Note: See TracBrowser for help on using the repository browser.