source: trunk/yat/omic/BamPairIterator2.h @ 4057

Last change on this file since 4057 was 4057, checked in by Peter, 7 months ago

closes #798; new classes BamPairBuffer? and a more general SortedBuffer?.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.4 KB
Line 
1#ifndef theplu_yat_omic_bam_pair_iterator2
2#define theplu_yat_omic_bam_pair_iterator2
3
4// $Id: BamPairIterator2.h 4057 2021-03-31 05:46:00Z peter $
5
6/*
7  Copyright (C) 2020, 2021 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamPair.h"
26#include "BamRead.h"
27
28#include "yat/utility/config_public.h"
29#include "yat/utility/yat_assert.h"
30
31#include <boost/concept/assert.hpp>
32#include <boost/iterator/iterator_concepts.hpp>
33#include <boost/iterator/iterator_facade.hpp>
34
35#include <iterator>
36#include <map>
37#include <memory>
38#include <utility>
39
40namespace theplu {
41namespace yat {
42namespace omic {
43
44  /**
45     Type Requirments:
46     - \c Base is a \readable_iterator
47     - \c Base is a \single_pass_iterator
48     - \c value_type is BamRead
49
50     This iterator works on a sorted range [begin, end) and provides a
51     covenient way to access the pairs rather than the reads
52     individually. The pairs are iterated such that they appear sorted
53     with respect to \c first().
54
55     If read pairs with large isize (tlen) are included the iterator
56     will consumer a lot of memory because iterator does store
57     virtually all reads in the underlying range between the current
58     \c first() and its mate, \c second().
59
60     Note that BamPairIterator2 is a single-pass iterator, i.e., once it
61     is incremented the behaviour of its copies is undefined.
62
63     \see BamPairBuffer
64
65     \since New in yat 0.18
66   */
67  template<typename Base>
68  class BamPairIterator2
69    : public boost::iterator_facade<
70    BamPairIterator2<Base>, const BamPair, std::input_iterator_tag,
71    const BamPairProxy
72    >
73  {
74  public:
75    /**
76       \brief default constructor
77     */
78    BamPairIterator2(void);
79
80    /**
81       Creates an iterator that will work on [begin, end).
82
83       \note Input range \c [\a begin, \a end \c ) must be sorted or
84       behaviour is undefined.
85     */
86    explicit BamPairIterator2(Base begin, Base end);
87
88#ifndef YAT_HAVE_BOOST_ITERATOR_FACADE_PROXY_PTR
89    /**
90       This is workaround implementation of operator-> when
91       implementation in base class (boost) does not compile. This
92       implementation may be slow, so when using old boost it often
93       prefereble to use operator*.
94     */
95    typename BamPairIterator2::pointer operator->(void) const
96    {
97      // older versions of boost mandates pointer to be
98      // value_type*. To accomplish that we implement this slow thing
99      // which keeps a copy of a value_type as member.
100      // See https://svn.boost.org/trac/boost/ticket/5697 for discussion.
101
102      // Possibly stupid to assign each time, but why bother optimize
103      // for old bugs in boost. Users who are eager for speed, should
104      // either upgrade their boost or use (*iterator).function()
105      // rather than iterator->function().
106      YAT_ASSERT(first_);
107      YAT_ASSERT(second_);
108      dummie_.first() = first_->first;
109      dummie_.second() = second_->second;
110      return &dummie_;
111    }
112  private:
113    mutable BamPair dummie_;
114#endif
115
116  private:
117    Base iter_;
118    Base end_;
119    typedef std::pair<int32_t, int32_t> Position;
120    // multimap with reads' position as key
121    typedef std::multimap<Position, BamRead> MultiMap;
122    std::shared_ptr<MultiMap> reads_;
123    MultiMap::iterator first_;
124    MultiMap::iterator second_;
125    friend class boost::iterator_core_access;
126
127    void assign_pair(void);
128    const BamPairProxy dereference(void) const;
129    bool equal(const BamPairIterator2& other) const;
130    //
131    std::multimap<Position, BamRead>::iterator
132    find_mate(const yat::omic::BamRead&);
133    void increment(void);
134    void pop(void);
135    bool push(void);
136  };
137
138
139  /**
140     Convenient function to create BamPairIterator
141
142     \relates BamPairIterator
143   */
144  template<typename Base>
145  BamPairIterator2<Base> bam_pair_iterator2(Base base, Base end)
146  { return BamPairIterator2<Base>(base, end); }
147
148
149  /**
150     Convenient function to create BamPairIterator that works on empty
151     range. Equivalent to bam_pair_iterator(end, end).
152
153     \relates BamPairIterator
154  */
155  template<typename Base>
156  BamPairIterator2<Base> bam_pair_iterator2(Base end)
157  { return BamPairIterator2<Base>(end, end); }
158
159
160  //             template implementations
161  ///////////////////////////////////////////////////////////////
162
163  template<typename Base>
164  BamPairIterator2<Base>::BamPairIterator2(Base base, Base end)
165    : iter_(base), end_(end),
166      reads_(new std::multimap<Position, BamRead>),
167      first_(reads_->end()), second_(reads_->end())
168  {
169    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Base>));
170    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Base>));
171    using boost::Convertible;
172    typedef typename std::iterator_traits<Base>::value_type value_type;
173    BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>));
174    assign_pair();
175  }
176
177
178  template<typename Base>
179  BamPairIterator2<Base>::BamPairIterator2(void)
180  {
181    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
182    using boost::Convertible;
183    typedef typename std::iterator_traits<Base>::value_type value_type;
184    BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>));
185  }
186
187
188  template<typename Base>
189  void BamPairIterator2<Base>::assign_pair(void)
190  {
191    while (true) {
192      if (reads_->empty())
193        if (!push()) // if no reads, we are done
194          return;
195      YAT_ASSERT(reads_->size());
196      first_ = reads_->begin();
197      second_ = find_mate(first_->second);
198      if (second_ == reads_->end())
199        pop();
200      else
201        break;
202    }
203  }
204
205
206  template<typename Base>
207  const BamPairProxy
208  BamPairIterator2<Base>::dereference(void) const
209  {
210    return BamPairProxy(&first_->second, &second_->second);
211  }
212
213
214  template<typename Base>
215  bool BamPairIterator2<Base>::equal(const BamPairIterator2& other) const
216  {
217    return iter_ == other.iter_ && reads_->size() == other.reads_->size();
218  }
219
220
221  template<typename Base>
222  void BamPairIterator2<Base>::increment(void)
223  {
224    pop();
225    assign_pair();
226  }
227
228
229  template<typename Base>
230  BamPairIterator2<Base>::MultiMap::iterator
231  BamPairIterator2<Base>::find_mate(const yat::omic::BamRead& bam)
232  {
233    Position mate_pos(bam.mtid(), bam.mpos());
234    YAT_ASSERT(reads_->size());
235    // push in reads, ensure all reads with pos==mate_pos has been
236    // pushed in by pushing until pos of last one is one-past mate_pos
237    while (reads_->rbegin()->first <= mate_pos && iter_!=end_)
238      push();
239
240    typedef MultiMap::iterator iterator;
241    std::pair<iterator, iterator> range = reads_->equal_range(mate_pos);
242    for (; range.first != range.second; ++range.first) {
243      if (!same_query_name(bam, range.first->second))
244        continue;
245      // for siamese pair avoid pairing up with self
246      if (bam.pos()==bam.mpos() && bam.tid()==bam.mtid() &&
247          bam.flag() == range.first->second.flag())
248        continue;
249      return range.first;
250    }
251    return reads_->end();
252  }
253
254
255  template<typename Base>
256  void BamPairIterator2<Base>::pop(void)
257  {
258    YAT_ASSERT(reads_->size());
259    reads_->erase(first_);
260  }
261
262
263  template<typename Base>
264  bool BamPairIterator2<Base>::push(void)
265  {
266    if (iter_ == end_)
267      return false;
268    Position position(iter_->tid(), iter_->pos());
269    reads_->insert(reads_->end(), std::make_pair(position, *iter_));
270    ++iter_;
271    return true;
272  }
273
274}}}
275#endif
Note: See TracBrowser for help on using the repository browser.