source: trunk/yat/omic/BamPairIterator.h @ 3199

Last change on this file since 3199 was 3199, checked in by Peter, 9 years ago

fixes #790

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.9 KB
Line 
1#ifndef theplu_yat_omic_bam_pair_iterator
2#define theplu_yat_omic_bam_pair_iterator
3
4// $Id: BamPairIterator.h 3199 2014-04-30 10:27:08Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamPair.h"
26#include "BamRead.h"
27
28#include <yat/utility/config_public.h>
29#include <yat/utility/yat_assert.h>
30
31#include <boost/concept/assert.hpp>
32#include <boost/iterator/iterator_facade.hpp>
33#include <boost/shared_ptr.hpp>
34
35#include <iterator>
36#include <map>
37#include <utility>
38
39namespace theplu {
40namespace yat {
41namespace omic {
42
43  /**
44     \since New in yat 0.12
45
46     Type Requirements:
47     - \c Iterator must be an \input_iterator
48     - \c Iterator 's \c reference type must be convertible to BamRead
49
50     This iterator works on a sorted range [begin, end) and provies a
51     covenient way to access the pairs rather than the reads
52     individually. When BamPairIterator is incremented, rather than
53     just step to next element in [begin, end), BamPairIterator keeps
54     stepping until it finds a read whose mate it has already seen, and
55     operator* return the pair of BamReads.
56
57     Note that BamPairIterator is a single-pass iterator, i.e., once it
58     is incremented the behaviour of its copies is undefined.
59   */
60  template<typename Base>
61  class BamPairIterator
62    : public boost::iterator_facade<
63    BamPairIterator<Base>, const BamPair, std::input_iterator_tag,
64    const BamPairProxy
65    >
66  {
67  public:
68    /**
69       \brief default constructor
70     */
71    BamPairIterator(void);
72
73    /**
74       Creates an iterator that will work on [begin, end).
75
76       \see bam_pair_iterator
77
78       \note Input range \c [\a first, \a last \c ) must be sorted or
79       behaviour is undefined.
80     */
81    explicit BamPairIterator(Base begin, Base end);
82
83#ifndef YAT_HAVE_BOOST_ITERATOR_FACADE_PROXY_PTR
84    /**
85       This is workaround implementation of operator-> when
86       implementation in base class (boost) does not compile. This
87       implementation may be slow, so when using old boost it often
88       prefereble to use operator*.
89     */
90    typename BamPairIterator::pointer operator->(void) const
91    {
92      // older versions of boost mandates pointer to be
93      // value_type*. To accomplish that we implement this slow thing
94      // which keeps a copy of a value_type as member.
95      // See https://svn.boost.org/trac/boost/ticket/5697 for discussion.
96
97      // Possibly stupid to assign each time, but why bother optimize
98      // for old bugs in boost. Users who are eager for speed, should
99      // either upgrade their boost or use (*iterator).function()
100      // rather than iterator->function().
101      YAT_ASSERT(mate_);
102      YAT_ASSERT(iter_ != end_);
103      dummie_.first() = *mate_;
104      dummie_.second() = *iter_;
105      return &dummie_;
106    }
107  private:
108    mutable BamPair dummie_;
109#endif
110
111  private:
112    Base iter_;
113    Base end_;
114    const BamRead* mate_;
115    boost::shared_ptr<std::map<std::string, BamRead> > siam_reads_;
116    typedef std::pair<int32_t, int32_t> Position;
117    boost::shared_ptr<std::multimap<Position, BamRead> > reads_;
118    friend class boost::iterator_core_access;
119
120    const BamPairProxy dereference(void) const;
121    bool equal(const BamPairIterator& other) const;
122    void increment(void);
123    void find_next(void);
124  };
125
126
127  /**
128     Convenient function to create BamPairIterator
129
130     \relates BamPairIterator
131   */
132  template<typename Base>
133  BamPairIterator<Base> bam_pair_iterator(Base base, Base end)
134  { return BamPairIterator<Base>(base, end); }
135
136
137  /**
138     Convenient function to create BamPairIterator that works on empty
139     range. Equivalent to bam_pair_iterator(end, end).
140
141     \relates BamPairIterator
142  */
143  template<typename Base>
144  BamPairIterator<Base> bam_pair_iterator(Base end)
145  { return BamPairIterator<Base>(end, end); }
146
147
148  //             template implementations
149  ///////////////////////////////////////////////////////////////
150
151  template<typename Base>
152  BamPairIterator<Base>::BamPairIterator(Base base, Base end)
153    : iter_(base), end_(end),
154      siam_reads_(new std::map<std::string, BamRead>),
155      reads_(new std::multimap<Position, BamRead>)
156  {
157    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
158    using boost::Convertible;
159    typedef typename std::iterator_traits<Base>::reference reference_type;
160    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
161    find_next();
162  }
163
164
165  template<typename Base>
166  BamPairIterator<Base>::BamPairIterator(void)
167  {
168    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
169    using boost::Convertible;
170    typedef typename std::iterator_traits<Base>::reference reference_type;
171    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
172  }
173
174
175  template<typename Base>
176  const BamPairProxy
177  BamPairIterator<Base>::dereference(void) const
178  {
179    return BamPairProxy(mate_, &*iter_);
180  }
181
182
183  template<typename Base>
184  bool BamPairIterator<Base>::equal(const BamPairIterator& other) const
185  {
186    return iter_ == other.iter_;
187  }
188
189
190  template<typename Base>
191  void BamPairIterator<Base>::increment(void)
192  {
193    ++iter_;
194    find_next();
195  }
196
197
198  template<typename Base>
199  void BamPairIterator<Base>::find_next(void)
200  {
201    BamLessPos less;
202    for (; iter_!=end_; ++iter_) {
203      Position position(iter_->tid(), iter_->pos());
204      Position mate_position(iter_->mtid(), iter_->mpos());
205      // clear siam reads if iter is more advanced than siam reads
206      if (siam_reads_->size() && less(siam_reads_->begin()->second, *iter_))
207        siam_reads_->clear();
208
209      // have not seen mate yet
210      if (mate_position > position)
211        reads_->insert(std::make_pair(mate_position, *iter_));
212      else if (position > mate_position) {
213        std::multimap<Position, BamRead>::iterator
214          lower = reads_->lower_bound(position);
215        // erase all reads with mate position less than current
216        // position (assuming input range is sorted)
217        reads_->erase(reads_->begin(), lower);
218
219        // find mate and assign pair
220        for (; lower!=reads_->end() && lower->first == position; ++lower)
221          if (same_query_name(lower->second, *iter_)) {
222            mate_ = &lower->second;
223            return;
224          }
225      }
226      else { // siam read, i.e., iter_ and mate have same position
227        // check if we have already seen mate and stored it in map
228        std::map<std::string, BamRead>::iterator
229          mate = siam_reads_->lower_bound(iter_->name());
230        if (mate!=siam_reads_->end() && same_query_name(mate->second, *iter_)) {
231          mate_ = &mate->second;
232          return;
233        }
234        else
235          // insert with hint
236          siam_reads_->insert(mate, std::make_pair(iter_->name(), *iter_));
237      }
238    }
239  }
240
241}}}
242#endif
Note: See TracBrowser for help on using the repository browser.