source: trunk/yat/omic/BamPairIterator2.h @ 4019

Last change on this file since 4019 was 4019, checked in by Peter, 12 months ago

merge 0.18 into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.4 KB
Line 
1#ifndef theplu_yat_omic_bam_pair_iterator2
2#define theplu_yat_omic_bam_pair_iterator2
3
4// $Id: BamPairIterator2.h 4019 2020-11-06 01:55:37Z peter $
5
6/*
7  Copyright (C) 2020 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamPair.h"
26#include "BamRead.h"
27
28#include "yat/utility/config_public.h"
29#include "yat/utility/yat_assert.h"
30
31#include <boost/concept/assert.hpp>
32#include <boost/iterator/iterator_concepts.hpp>
33#include <boost/iterator/iterator_facade.hpp>
34
35#include <iterator>
36#include <map>
37#include <memory>
38#include <utility>
39
40namespace theplu {
41namespace yat {
42namespace omic {
43
44  /**
45     Type Requirments:
46     - \c Base is a \readable_iterator
47     - \c Base is a \single_pass_iterator
48     - \c value_type is BamRead
49
50     This iterator works on a sorted range [begin, end) and provides a
51     covenient way to access the pairs rather than the reads
52     individually. The pairs are iterated such that they appear sorted
53     with respect to \c first().
54
55     If read pairs with large isize (tlen) are included the iterator
56     will consumer a lot of memory because iterator does store
57     virtually all reads in the underlying range between the current
58     \c first() and its mate, \c second().
59
60     Note that BamPairIterator2 is a single-pass iterator, i.e., once it
61     is incremented the behaviour of its copies is undefined.
62
63     \since New in yat 0.18
64   */
65  template<typename Base>
66  class BamPairIterator2
67    : public boost::iterator_facade<
68    BamPairIterator2<Base>, const BamPair, std::input_iterator_tag,
69    const BamPairProxy
70    >
71  {
72  public:
73    /**
74       \brief default constructor
75     */
76    BamPairIterator2(void);
77
78    /**
79       Creates an iterator that will work on [begin, end).
80
81       \note Input range \c [\a begin, \a end \c ) must be sorted or
82       behaviour is undefined.
83     */
84    explicit BamPairIterator2(Base begin, Base end);
85
86#ifndef YAT_HAVE_BOOST_ITERATOR_FACADE_PROXY_PTR
87    /**
88       This is workaround implementation of operator-> when
89       implementation in base class (boost) does not compile. This
90       implementation may be slow, so when using old boost it often
91       prefereble to use operator*.
92     */
93    typename BamPairIterator2::pointer operator->(void) const
94    {
95      // older versions of boost mandates pointer to be
96      // value_type*. To accomplish that we implement this slow thing
97      // which keeps a copy of a value_type as member.
98      // See https://svn.boost.org/trac/boost/ticket/5697 for discussion.
99
100      // Possibly stupid to assign each time, but why bother optimize
101      // for old bugs in boost. Users who are eager for speed, should
102      // either upgrade their boost or use (*iterator).function()
103      // rather than iterator->function().
104      YAT_ASSERT(first_);
105      YAT_ASSERT(second_);
106      dummie_.first() = first_->first;
107      dummie_.second() = second_->second;
108      return &dummie_;
109    }
110  private:
111    mutable BamPair dummie_;
112#endif
113
114  private:
115    Base iter_;
116    Base end_;
117    typedef std::pair<int32_t, int32_t> Position;
118    // multimap with reads' position as key
119    typedef std::multimap<Position, BamRead> MultiMap;
120    std::shared_ptr<MultiMap> reads_;
121    MultiMap::iterator first_;
122    MultiMap::iterator second_;
123    friend class boost::iterator_core_access;
124
125    void assign_pair(void);
126    const BamPairProxy dereference(void) const;
127    bool equal(const BamPairIterator2& other) const;
128    //
129    std::multimap<Position, BamRead>::iterator
130    find_mate(const yat::omic::BamRead&);
131    void increment(void);
132    void pop(void);
133    bool push(void);
134  };
135
136
137  /**
138     Convenient function to create BamPairIterator
139
140     \relates BamPairIterator
141   */
142  template<typename Base>
143  BamPairIterator2<Base> bam_pair_iterator2(Base base, Base end)
144  { return BamPairIterator2<Base>(base, end); }
145
146
147  /**
148     Convenient function to create BamPairIterator that works on empty
149     range. Equivalent to bam_pair_iterator(end, end).
150
151     \relates BamPairIterator
152  */
153  template<typename Base>
154  BamPairIterator2<Base> bam_pair_iterator2(Base end)
155  { return BamPairIterator2<Base>(end, end); }
156
157
158  //             template implementations
159  ///////////////////////////////////////////////////////////////
160
161  template<typename Base>
162  BamPairIterator2<Base>::BamPairIterator2(Base base, Base end)
163    : iter_(base), end_(end),
164      reads_(new std::multimap<Position, BamRead>),
165      first_(reads_->end()), second_(reads_->end())
166  {
167    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Base>));
168    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Base>));
169    using boost::Convertible;
170    typedef typename std::iterator_traits<Base>::value_type value_type;
171    BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>));
172    assign_pair();
173  }
174
175
176  template<typename Base>
177  BamPairIterator2<Base>::BamPairIterator2(void)
178  {
179    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
180    using boost::Convertible;
181    typedef typename std::iterator_traits<Base>::value_type value_type;
182    BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>));
183  }
184
185
186  template<typename Base>
187  void BamPairIterator2<Base>::assign_pair(void)
188  {
189    while (true) {
190      if (reads_->empty())
191        if (!push()) // if no reads, we are done
192          return;
193      YAT_ASSERT(reads_->size());
194      first_ = reads_->begin();
195      second_ = find_mate(first_->second);
196      if (second_ == reads_->end())
197        pop();
198      else
199        break;
200    }
201  }
202
203
204  template<typename Base>
205  const BamPairProxy
206  BamPairIterator2<Base>::dereference(void) const
207  {
208    return BamPairProxy(&first_->second, &second_->second);
209  }
210
211
212  template<typename Base>
213  bool BamPairIterator2<Base>::equal(const BamPairIterator2& other) const
214  {
215    return iter_ == other.iter_ && reads_->size() == other.reads_->size();
216  }
217
218
219  template<typename Base>
220  void BamPairIterator2<Base>::increment(void)
221  {
222    pop();
223    assign_pair();
224  }
225
226
227  template<typename Base>
228  BamPairIterator2<Base>::MultiMap::iterator
229  BamPairIterator2<Base>::find_mate(const yat::omic::BamRead& bam)
230  {
231    Position mate_pos(bam.mtid(), bam.mpos());
232    YAT_ASSERT(reads_->size());
233    // push in reads, ensure all reads with pos==mate_pos has been
234    // pushed in by pushing until pos of last one is one-past mate_pos
235    while (reads_->rbegin()->first <= mate_pos && iter_!=end_)
236      push();
237
238    typedef MultiMap::iterator iterator;
239    std::pair<iterator, iterator> range = reads_->equal_range(mate_pos);
240    for (; range.first != range.second; ++range.first) {
241      if (!same_query_name(bam, range.first->second))
242        continue;
243      // for siamese pair avoid pairing up with self
244      if (bam.pos()==bam.mpos() && bam.tid()==bam.mtid() &&
245          bam.flag() == range.first->second.flag())
246        continue;
247      return range.first;
248    }
249    return reads_->end();
250  }
251
252
253  template<typename Base>
254  void BamPairIterator2<Base>::pop(void)
255  {
256    YAT_ASSERT(reads_->size());
257    reads_->erase(first_);
258  }
259
260
261  template<typename Base>
262  bool BamPairIterator2<Base>::push(void)
263  {
264    if (iter_ == end_)
265      return false;
266    Position position(iter_->tid(), iter_->pos());
267    reads_->insert(reads_->end(), std::make_pair(position, *iter_));
268    ++iter_;
269    return true;
270  }
271
272}}}
273#endif
Note: See TracBrowser for help on using the repository browser.