source: trunk/yat/omic/BamPairIterator.h @ 3193

Last change on this file since 3193 was 3193, checked in by Peter, 9 years ago

document limitation due to bug in boost 1.48 (or older)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.0 KB
Line 
1#ifndef theplu_yat_omic_bam_pair_iterator
2#define theplu_yat_omic_bam_pair_iterator
3
4// $Id: BamPairIterator.h 3193 2014-04-22 05:45:36Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <iostream>
26
27#include "BamPair.h"
28#include "BamRead.h"
29
30#include <boost/concept/assert.hpp>
31#include <boost/iterator/iterator_facade.hpp>
32#include <boost/shared_ptr.hpp>
33
34#include <iterator>
35#include <map>
36#include <utility>
37
38namespace theplu {
39namespace yat {
40namespace omic {
41
42  /**
43     \since New in yat 0.12
44
45     Type Requirements:
46     - \c Iterator must be an \input_iterator
47     - \c Iterator 's \c reference type must be convertible to BamRead
48
49     This iterator works on a sorted range [begin, end) and provies a
50     covenient way to access the pairs rather than the reads
51     individually. When BamPairIterator is incremented, rather than
52     just step to next element in [begin, end), BamPairIterator keeps
53     stepping until it finds a read whose mate it has already seen, and
54     operator* return the pair of BamReads.
55
56     Note that BamPairIterator is a single-pass iterator, i.e., once it
57     is incremented the behaviour of its copies is undefined.
58
59     \note \c operator-> is not working with Boost 1.48 (or
60     older). See <a
61     href="https://svn.boost.org/trac/boost/ticket/5697">Boost Bug
62     #5697</a>
63   */
64  template<typename Base>
65  class BamPairIterator
66    : public boost::iterator_facade<
67    BamPairIterator<Base>, const BamPair, std::input_iterator_tag,
68    const BamPairProxy
69    >
70  {
71  public:
72    /**
73       \brief default constructor
74     */
75    BamPairIterator(void);
76
77    /**
78       Creates an iterator that will work on [begin, end).
79
80       \see bam_pair_iterator
81
82       \note Input range \c [\a first, \a last \c ) must be sorted or
83       behaviour is undefined.
84     */
85    explicit BamPairIterator(Base begin, Base end);
86  private:
87    Base iter_;
88    Base end_;
89    const BamRead* mate_;
90    boost::shared_ptr<std::map<std::string, BamRead> > siam_reads_;
91    typedef std::pair<int32_t, int32_t> Position;
92    boost::shared_ptr<std::multimap<Position, BamRead> > reads_;
93    friend class boost::iterator_core_access;
94
95    const BamPairProxy dereference(void) const;
96    bool equal(const BamPairIterator& other) const;
97    void increment(void);
98    void find_next(void);
99  };
100
101
102  /**
103     Convenient function to create BamPairIterator
104
105     \relates BamPairIterator
106   */
107  template<typename Base>
108  BamPairIterator<Base> bam_pair_iterator(Base base, Base end)
109  { return BamPairIterator<Base>(base, end); }
110
111
112  /**
113     Convenient function to create BamPairIterator that works on empty
114     range. Equivalent to bam_pair_iterator(end, end).
115
116     \relates BamPairIterator
117  */
118  template<typename Base>
119  BamPairIterator<Base> bam_pair_iterator(Base end)
120  { return BamPairIterator<Base>(end, end); }
121
122
123  //             template implementations
124  ///////////////////////////////////////////////////////////////
125
126  template<typename Base>
127  BamPairIterator<Base>::BamPairIterator(Base base, Base end)
128    : iter_(base), end_(end),
129      siam_reads_(new std::map<std::string, BamRead>),
130      reads_(new std::multimap<Position, BamRead>)
131  {
132    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
133    using boost::Convertible;
134    typedef typename std::iterator_traits<Base>::reference reference_type;
135    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
136    find_next();
137  }
138
139
140  template<typename Base>
141  BamPairIterator<Base>::BamPairIterator(void)
142  {
143    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
144    using boost::Convertible;
145    typedef typename std::iterator_traits<Base>::reference reference_type;
146    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
147  }
148
149
150  template<typename Base>
151  const BamPairProxy
152  BamPairIterator<Base>::dereference(void) const
153  {
154    return BamPairProxy(mate_, &*iter_);
155  }
156
157
158  template<typename Base>
159  bool BamPairIterator<Base>::equal(const BamPairIterator& other) const
160  {
161    return iter_ == other.iter_;
162  }
163
164
165  template<typename Base>
166  void BamPairIterator<Base>::increment(void)
167  {
168    ++iter_;
169    find_next();
170  }
171
172
173  template<typename Base>
174  void BamPairIterator<Base>::find_next(void)
175  {
176    BamLessPos less;
177    for (; iter_!=end_; ++iter_) {
178      Position position(iter_->tid(), iter_->pos());
179      Position mate_position(iter_->mtid(), iter_->mpos());
180      // clear siam reads if iter is more advanced than siam reads
181      if (siam_reads_->size() && less(siam_reads_->begin()->second, *iter_))
182        siam_reads_->clear();
183
184      // have not seen mate yet
185      if (mate_position > position)
186        reads_->insert(std::make_pair(mate_position, *iter_));
187      else if (position > mate_position) {
188        std::multimap<Position, BamRead>::iterator
189          lower = reads_->lower_bound(position);
190        // erase all reads with mate position less than current
191        // position (assuming input range is sorted)
192        reads_->erase(reads_->begin(), lower);
193
194        // find mate and assign pair
195        for (; lower!=reads_->end() && lower->first == position; ++lower)
196          if (same_query_name(lower->second, *iter_)) {
197            mate_ = &lower->second;
198            return;
199          }
200      }
201      else { // siam read, i.e., iter_ and mate have same position
202        // check if we have already seen mate and stored it in map
203        std::map<std::string, BamRead>::iterator
204          mate = siam_reads_->lower_bound(iter_->name());
205        if (mate!=siam_reads_->end() && same_query_name(mate->second, *iter_)) {
206          mate_ = &mate->second;
207          return;
208        }
209        else
210          // insert with hint
211          siam_reads_->insert(mate, std::make_pair(iter_->name(), *iter_));
212      }
213    }
214  }
215
216}}}
217#endif
Note: See TracBrowser for help on using the repository browser.