source: trunk/yat/omic/BamPairIterator.h @ 3197

Last change on this file since 3197 was 3197, checked in by Peter, 9 years ago

refs #790. workaround for old boost

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.0 KB
Line 
1#ifndef theplu_yat_omic_bam_pair_iterator
2#define theplu_yat_omic_bam_pair_iterator
3
4// $Id: BamPairIterator.h 3197 2014-04-29 01:40:53Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <iostream>
26
27#include "BamPair.h"
28#include "BamRead.h"
29
30#include <yat/utility/yat_assert.h>
31
32#include <boost/concept/assert.hpp>
33#include <boost/iterator/iterator_facade.hpp>
34#include <boost/shared_ptr.hpp>
35
36#include <iterator>
37#include <map>
38#include <utility>
39
40namespace theplu {
41namespace yat {
42namespace omic {
43
44  /**
45     \since New in yat 0.12
46
47     Type Requirements:
48     - \c Iterator must be an \input_iterator
49     - \c Iterator 's \c reference type must be convertible to BamRead
50
51     This iterator works on a sorted range [begin, end) and provies a
52     covenient way to access the pairs rather than the reads
53     individually. When BamPairIterator is incremented, rather than
54     just step to next element in [begin, end), BamPairIterator keeps
55     stepping until it finds a read whose mate it has already seen, and
56     operator* return the pair of BamReads.
57
58     Note that BamPairIterator is a single-pass iterator, i.e., once it
59     is incremented the behaviour of its copies is undefined.
60
61     \note \c operator-> is not working with Boost 1.48 (or
62     older). See <a
63     href="https://svn.boost.org/trac/boost/ticket/5697">Boost Bug
64     #5697</a>
65   */
66  template<typename Base>
67  class BamPairIterator
68    : public boost::iterator_facade<
69    BamPairIterator<Base>, const BamPair, std::input_iterator_tag,
70    const BamPairProxy
71    >
72  {
73  public:
74    /**
75       \brief default constructor
76     */
77    BamPairIterator(void);
78
79    /**
80       Creates an iterator that will work on [begin, end).
81
82       \see bam_pair_iterator
83
84       \note Input range \c [\a first, \a last \c ) must be sorted or
85       behaviour is undefined.
86     */
87    explicit BamPairIterator(Base begin, Base end);
88
89#if BOOST_VERSION < 104900
90    /**
91       This is workaround implementation of operator-> when
92       implementation in base class (boost) does not compile. This
93       implementation may be slow, so when using old boost it often
94       prefereble to use operator*.
95     */
96    typename BamPairIterator::pointer operator->(void) const
97    {
98      // older versions of boost mandates pointer to be
99      // value_type*. To accomplish that we implement this slow thing
100      // which keeps a copy of a value_type as member.
101      // See https://svn.boost.org/trac/boost/ticket/5697 for discussion.
102
103      // Possibly stupid to assign each time, but why bother optimize
104      // for old bugs in boost. Users who are eager for speed, should
105      // either upgrade their boost or use (*iterator).function()
106      // rather than iterator->function().
107      YAT_ASSERT(mate_);
108      YAT_ASSERT(iter_ != end_);
109      dummie_.first() = *mate_;
110      dummie_.second() = *iter_;
111      return &dummie_;
112    }
113  private:
114    mutable BamPair dummie_;
115#endif
116
117  private:
118    Base iter_;
119    Base end_;
120    const BamRead* mate_;
121    boost::shared_ptr<std::map<std::string, BamRead> > siam_reads_;
122    typedef std::pair<int32_t, int32_t> Position;
123    boost::shared_ptr<std::multimap<Position, BamRead> > reads_;
124    friend class boost::iterator_core_access;
125
126    const BamPairProxy dereference(void) const;
127    bool equal(const BamPairIterator& other) const;
128    void increment(void);
129    void find_next(void);
130  };
131
132
133  /**
134     Convenient function to create BamPairIterator
135
136     \relates BamPairIterator
137   */
138  template<typename Base>
139  BamPairIterator<Base> bam_pair_iterator(Base base, Base end)
140  { return BamPairIterator<Base>(base, end); }
141
142
143  /**
144     Convenient function to create BamPairIterator that works on empty
145     range. Equivalent to bam_pair_iterator(end, end).
146
147     \relates BamPairIterator
148  */
149  template<typename Base>
150  BamPairIterator<Base> bam_pair_iterator(Base end)
151  { return BamPairIterator<Base>(end, end); }
152
153
154  //             template implementations
155  ///////////////////////////////////////////////////////////////
156
157  template<typename Base>
158  BamPairIterator<Base>::BamPairIterator(Base base, Base end)
159    : iter_(base), end_(end),
160      siam_reads_(new std::map<std::string, BamRead>),
161      reads_(new std::multimap<Position, BamRead>)
162  {
163    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
164    using boost::Convertible;
165    typedef typename std::iterator_traits<Base>::reference reference_type;
166    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
167    find_next();
168  }
169
170
171  template<typename Base>
172  BamPairIterator<Base>::BamPairIterator(void)
173  {
174    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
175    using boost::Convertible;
176    typedef typename std::iterator_traits<Base>::reference reference_type;
177    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
178  }
179
180
181  template<typename Base>
182  const BamPairProxy
183  BamPairIterator<Base>::dereference(void) const
184  {
185    return BamPairProxy(mate_, &*iter_);
186  }
187
188
189  template<typename Base>
190  bool BamPairIterator<Base>::equal(const BamPairIterator& other) const
191  {
192    return iter_ == other.iter_;
193  }
194
195
196  template<typename Base>
197  void BamPairIterator<Base>::increment(void)
198  {
199    ++iter_;
200    find_next();
201  }
202
203
204  template<typename Base>
205  void BamPairIterator<Base>::find_next(void)
206  {
207    BamLessPos less;
208    for (; iter_!=end_; ++iter_) {
209      Position position(iter_->tid(), iter_->pos());
210      Position mate_position(iter_->mtid(), iter_->mpos());
211      // clear siam reads if iter is more advanced than siam reads
212      if (siam_reads_->size() && less(siam_reads_->begin()->second, *iter_))
213        siam_reads_->clear();
214
215      // have not seen mate yet
216      if (mate_position > position)
217        reads_->insert(std::make_pair(mate_position, *iter_));
218      else if (position > mate_position) {
219        std::multimap<Position, BamRead>::iterator
220          lower = reads_->lower_bound(position);
221        // erase all reads with mate position less than current
222        // position (assuming input range is sorted)
223        reads_->erase(reads_->begin(), lower);
224
225        // find mate and assign pair
226        for (; lower!=reads_->end() && lower->first == position; ++lower)
227          if (same_query_name(lower->second, *iter_)) {
228            mate_ = &lower->second;
229            return;
230          }
231      }
232      else { // siam read, i.e., iter_ and mate have same position
233        // check if we have already seen mate and stored it in map
234        std::map<std::string, BamRead>::iterator
235          mate = siam_reads_->lower_bound(iter_->name());
236        if (mate!=siam_reads_->end() && same_query_name(mate->second, *iter_)) {
237          mate_ = &mate->second;
238          return;
239        }
240        else
241          // insert with hint
242          siam_reads_->insert(mate, std::make_pair(iter_->name(), *iter_));
243      }
244    }
245  }
246
247}}}
248#endif
Note: See TracBrowser for help on using the repository browser.