source: trunk/yat/omic/BamPairIterator.h @ 3173

Last change on this file since 3173 was 3173, checked in by Peter, 8 years ago

a first version of BamPairIterator?. refs #784

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.6 KB
Line 
1#ifndef theplu_yat_omic_bam_pair_iterator
2#define theplu_yat_omic_bam_pair_iterator
3
4// $Id: BamPairIterator.h 3173 2014-03-08 13:48:13Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <iostream>
26
27#include "BamRead.h"
28
29#include <boost/concept/assert.hpp>
30#include <boost/iterator/iterator_facade.hpp>
31
32#include <iterator>
33#include <map>
34#include <utility>
35
36namespace theplu {
37namespace yat {
38namespace omic {
39
40  /**
41     \since New in yat 0.12
42
43     Type Requirements:
44     - \c Iterator must be an \input_iterator
45     - \c Iterator 's \c reference type must be convertible to BamRead
46
47     This iterator works on a sorted range [begin, end) and provies a
48     covenient way to access the pairs rather than the reads
49     individually. When BamPairIterator is incremented, rather than
50     just step to next element in [begin, end), BamPairIterator keeps
51     stepping until it finds a read whose mate it has already seen, and
52     operator* return the pair of BamReads.
53   */
54  template<typename Base>
55  class BamPairIterator
56    : public boost::iterator_facade<
57    BamPairIterator<Base>, const std::pair<BamRead, BamRead>,
58    std::input_iterator_tag
59    >
60  {
61  public:
62    /**
63       \brief default constructor
64     */
65    BamPairIterator(void);
66
67    /**
68       Creates an iterator that will work on [begin, end).
69
70       \see bam_pair_iterator
71
72       \note Input range \c [\a first, \a last \c ) must be sorted or
73       behaviour is undefined.
74     */
75    explicit BamPairIterator(Base begin, Base end);
76  private:
77    Base iter_;
78    Base end_;
79    std::pair<BamRead, BamRead> x_;
80    std::map<std::string, BamRead> siam_reads_;
81    typedef std::pair<int32_t, int32_t> Position;
82    std::multimap<Position, BamRead> reads_;
83    friend class boost::iterator_core_access;
84
85    const std::pair<BamRead, BamRead>& dereference(void) const;
86    bool equal(const BamPairIterator& other) const;
87    void increment(void);
88    void find_next(void);
89  };
90
91
92  /**
93     Convenient function to create BamPairIterator
94
95     \relates BamPairIterator
96   */
97  template<typename Base>
98  BamPairIterator<Base> bam_pair_iterator(Base base, Base end)
99  { return BamPairIterator<Base>(base, end); }
100
101
102  /**
103     Convenient function to create BamPairIterator that works on empty
104     range. Equivalent to bam_pair_iterator(end, end).
105
106     \relates BamPairIterator
107  */
108  template<typename Base>
109  BamPairIterator<Base> bam_pair_iterator(Base end)
110  { return BamPairIterator<Base>(end, end); }
111
112
113  //             template implementations
114  ///////////////////////////////////////////////////////////////
115
116  template<typename Base>
117  BamPairIterator<Base>::BamPairIterator(Base base, Base end)
118    : iter_(base), end_(end)
119  {
120    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
121    using boost::Convertible;
122    typedef typename std::iterator_traits<Base>::reference reference_type;
123    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
124    find_next();
125  }
126
127
128  template<typename Base>
129  BamPairIterator<Base>::BamPairIterator(void)
130  {
131    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
132    using boost::Convertible;
133    typedef typename std::iterator_traits<Base>::reference reference_type;
134    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
135  }
136
137
138  template<typename Base>
139  const std::pair<BamRead, BamRead>&
140  BamPairIterator<Base>::dereference(void) const
141  {
142    return x_;
143  }
144
145
146  template<typename Base>
147  bool BamPairIterator<Base>::equal(const BamPairIterator& other) const
148  {
149    return iter_ == other.iter_;
150  }
151
152
153  template<typename Base>
154  void BamPairIterator<Base>::increment(void)
155  {
156    ++iter_;
157    find_next();
158  }
159
160
161  template<typename Base>
162  void BamPairIterator<Base>::find_next(void)
163  {
164    BamLessPos less;
165    for (; iter_!=end_; ++iter_) {
166      Position position(iter_->tid(), iter_->pos());
167      Position mate_position(iter_->mtid(), iter_->mpos());
168      // clear siam reads if iter is more advanced than siam reads
169      if (siam_reads_.size() && less(siam_reads_.begin()->second, *iter_))
170        siam_reads_.clear();
171
172      // have not seen mate yet
173      if (mate_position > position)
174        reads_.insert(std::make_pair(mate_position, *iter_));
175      else if (position > mate_position) {
176        std::multimap<Position, BamRead>::iterator
177          lower = reads_.lower_bound(position);
178        // erase all reads with mate position less than current
179        // position (assuming input range is sorted)
180        reads_.erase(reads_.begin(), lower);
181
182        // find mate and assign pair
183        for (; lower!=reads_.end() && lower->first == position; ++lower)
184          if (same_query_name(lower->second, *iter_)) {
185            x_.first = lower->second;
186            x_.second = *iter_;
187            return;
188          }
189      }
190      else { // siam read, i.e., iter_ and mate have same position
191        // check if we have already seen mate and stored it in map
192        std::map<std::string, BamRead>::iterator
193          mate = siam_reads_.lower_bound(iter_->name());
194        if (mate!=siam_reads_.end() && same_query_name(mate->second, *iter_)) {
195          x_.first = mate->second;
196          x_.second = *iter_;
197          return;
198        }
199        else
200          // insert with hint
201          siam_reads_.insert(mate, std::make_pair(iter_->name(), *iter_));
202      }
203    }
204  }
205
206}}}
207#endif
Note: See TracBrowser for help on using the repository browser.