source: trunk/yat/classifier/ConsensusInputRanker.h @ 831

Last change on this file since 831 was 831, checked in by Peter, 14 years ago

Refs #185.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 4.6 KB
Line 
1#ifndef _theplu_yat_classifier_consensusinputranker_
2#define _theplu_yat_classifier_consensusinputranker_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "InputRanker.h"
30
31#include <vector>
32
33namespace theplu {
34namespace yat {
35namespace statistics {
36  class Score;
37  class VectorFunction;
38}
39namespace classifier { 
40
41  class IRRetrieve;
42  class MatrixLookup;
43  class MatrixLookupWeighted;
44  class Sampler;
45
46  ///
47  /// @brief Robust algorithm to rank rows in a data matrix versus a
48  /// target vector.
49  ///
50  /// The idea is to create several (different) ranked lists. The list
51  /// could be different because they are based upon different
52  /// sub-sets of the data, or the different lists could be different
53  /// because they have been generated using different criteria. Having
54  /// \f$ N \f$ lists means each row in the data matrix has \f$ N \f$
55  /// ranks (each corresponding to one list). A
56  /// statistics::VectorFunction is used to boil down these ranks to
57  /// one consensus rank, and a ranked list is created by sorting the
58  /// data rows with respect to this consensus rank.
59  ///
60  /// For the time being there are two ways to build a
61  /// ConsensusInputRanker. 1) Sending a Sampler and a MatrixLookup to
62  /// the add function will create one ranked list for each of the
63  /// partitions defined in the Sampler. 2) You can generate
64  /// your ranked list outside, using your favourite method, and
65  /// adding it into the ConsensusInputRanker object. This allows
66  /// combining different scores and different sub-sets in a more
67  /// general way.
68  ///
69  class ConsensusInputRanker
70  {
71 
72  public:
73
74    ///
75    /// @brief Default constructor
76    ///
77    /// Truly does nothing but creates a few empty member vectors.
78    ///
79    ConsensusInputRanker(const IRRetrieve&, const statistics::VectorFunction&);
80   
81    ///
82    /// Iterating through @a sampler creating subsets of @a data, and
83    /// for each subset is an InputRanker is created using the @a
84    /// score. After creation the data rows are sorted with respect to
85    /// the median rank (i.e. update() is called).
86    ///
87    void add(const Sampler& sampler, const MatrixLookup&, statistics::Score& s);
88   
89    ///
90    /// @brief Add a set of InputRankers
91    ///
92    /// Iterating through @a sampler creating subsets of @a data, and
93    /// for each subset is an InputRanker is created using the @a
94    /// score. After creation the data rows are sorted with respect to
95    /// the median rank (i.e. update() is called).
96    ///
97    void add(const Sampler& sampler, const MatrixLookupWeighted& data, 
98             statistics::Score& score);
99   
100    ///
101    /// @brief Add an InputRanker
102    ///
103    /// @note update() must be called to make the added InputRanker to
104    /// influence consensus ids and ranks. If a sequence of
105    /// InputRankers are added, update() need to be called only after
106    /// the last InputRanker is added.
107    ///
108    void add(const InputRanker& ir);
109   
110    ///
111    /// Row with lowest rank (highest score) is ranked as number zero
112    /// @return index of row ranked as number \a i
113    ///
114    size_t id(size_t i) const;
115   
116    /**
117       @return ith InputRanker
118    */
119    const InputRanker& input_ranker(size_t i) const;
120
121    ///
122    /// Row with lowest rank (highest score) is ranked as number zero
123    /// @return rank for row \a i
124    ///
125    size_t rank(size_t i) const;
126   
127    /**
128       \brief \brief reserve memory for internal vector of InputRankers
129
130       This function is recommended before adding using add(const
131       InputRanker&) to avoid re-allocations.
132    */
133    void reserve(size_t n);
134
135
136    ///
137    /// update ids and ranks
138    ///
139    void update(void);
140
141
142  private:
143
144    std::vector<size_t> id_;
145    std::vector<InputRanker> input_rankers_;
146    std::vector<size_t> rank_;
147    const IRRetrieve& retriever_;
148    const statistics::VectorFunction& vec_func_;
149  };
150
151}}} // of namespace classifier, yat, and theplu
152
153#endif
Note: See TracBrowser for help on using the repository browser.