source: trunk/yat/omic/BamRead.h @ 2911

Last change on this file since 2911 was 2911, checked in by Peter, 9 years ago

accidently included in r2910

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.2 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 2911 2012-12-17 02:32:48Z peter $
5//
6// Copyright (C) 2012 Peter Johansson
7//
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12//
13// This program is distributed in the hope that it will be useful, but
14// WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16// General Public License for more details.
17//
18// You should have received a copy of the GNU General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21#include <sam.h>
22#include <bam.h>
23
24#include <functional>
25#include <string>
26#include <vector>
27
28// backport #defines from samtools 0.1.19
29#ifndef BAM_CIGAR_STR
30#define BAM_CIGAR_STR "MIDNSHP=XB"
31#define BAM_CIGAR_TYPE 0x3C1A7
32
33#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
34#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
35#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
36#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
37#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
38#endif
39
40/*
41  CIGAR
420 M 1 1 match
431 I 0 1 insert
442 D 1 0 deletion
453 N 1 0 spliced (rna)
464 S 0 1 soft clipped
475 H 0 0 hard clipped
486 P 0 0 padded
497 = 1 1 equal
508 X 1 1 mismatch
519 B 0 0
52
533rd column: consumes reference sequence
544th column: consumes query sequence
55 */
56
57namespace theplu {
58namespace yat {
59namespace omic {
60
61  /**
62     \brief Class holding a bam query
63
64     This is a wrapper around bam1_t and most of its information is
65     held in the core struct. A BamRead is typically created from a
66     InBamFile.
67
68     \see samtools
69
70     \since New in yat 0.10
71   */
72  class BamRead
73  {
74  public:
75    /**
76       \brief default constructor
77
78       Constructed object contains no data and most operations are not
79       defined
80     */
81    BamRead(void);
82
83    /**
84       \brief Copy constructor
85     */
86    BamRead(const BamRead& other);
87
88    /**
89       \brief Destructor
90     */
91    virtual ~BamRead(void);
92
93    /**
94       \brief assignment operator
95     */
96    const BamRead& operator=(const BamRead& rhs);
97
98    /**
99       \return pointer to auxiliary data
100     */
101    const uint8_t* aux(void) const;
102
103    /**
104       \brief access core data struct
105
106       \see samtools C api documentaion
107     */
108    const bam1_core_t& core(void) const;
109
110    /**
111       \brief access core data struct
112
113       \see samtools C api documentaion
114     */
115    bam1_core_t& core(void);
116
117    /**
118       \brief access CIGAR array
119
120       In each element the lower 4 bits gives a CIGAR operation and
121       the upper 28 bits keep the length.
122     */
123    const uint32_t* cigar(void) const;
124
125    /**
126       \return \a i th element of CIGAR array
127     */
128    uint32_t cigar(size_t i) const;
129
130    /**
131       \return \a i th CIGAR operation
132     */
133    uint32_t cigar_op(size_t i) const;
134
135    /**
136       \return length of \a i th CIGAR element
137     */
138    uint32_t cigar_oplen(size_t i) const;
139
140    /**
141       Translate CIGAR array to a string such as '72M3S'
142     */
143    std::string cigar_str(void) const;
144
145    /**
146       \brief set CIGAR
147
148       \param c new cigar
149    */
150    void cigar(const std::vector<uint32_t>& c);
151
152    /**
153       \return Quality of base \a i
154     */
155    uint8_t qual(size_t i) const;
156
157    /**
158       \return query name
159
160       Length of array is described by core().l_qname
161     */
162    const char* name(void) const;
163
164    /**
165       \brief chromosome ID
166     */
167    int32_t tid(void) const;
168
169    /**
170       \brief 0-based laftmost coordinate
171     */
172    int32_t pos(void) const;
173
174    /**
175       \brief rightmost coordinate
176
177       Coordinate is 0-based, i.e., the end is one passed the last
178       matching position.
179
180       \see bam_calend
181     */
182    int32_t end(void) const;
183
184    /**
185       \brief Chromosome ID for mate
186     */
187    int32_t mtid(void) const;
188
189    /**
190       \brief leftmost position for mate
191     */
192    int32_t mpos(void) const;
193
194    /**
195       Each character in returned sequence is either A, C, G, T, or N.
196
197       \return sequence
198     */
199    std::string sequence(void) const;
200
201    /**
202       4-bit integer describing base \a index
203
204       \see bam_nt16_rev_table
205     */
206    uint8_t sequence(size_t index) const;
207
208    /**
209       \see core().l_qseq
210     */
211    uint32_t sequence_length(void) const;
212
213    /**
214       \brief bitwise flag
215
216       \see Preprocessor defines BAM_F*
217     */
218    uint16_t flag(void) const;
219
220    /**
221       Exchanging this read with \a other.
222     */
223    void swap(BamRead& other);
224
225  private:
226    bam1_t* bam_;
227
228    friend class InBamFile;
229    friend class OutBamFile;
230    friend class BamReadIterator;
231  };
232
233  /**
234     Swap specialization for BamRead that is faster than generic
235     std::swap as it just swap a pointers.
236
237     \since New in yat 0.10
238   */
239  void swap(BamRead& lhs, BamRead& rhs);
240
241  /**
242     \return \c true if read is soft clipped, either left_soft_clipped
243     or right_soft_clipped.
244
245     \since New in yat 0.10
246   */
247  bool soft_clipped(const BamRead& bam);
248
249  /**
250     If read is soft clipped on left side, return how many bases are
251     clipped, otherwise return 0.
252
253     \since New in yat 0.10
254   */
255  uint32_t left_soft_clipped(const BamRead& bam);
256
257  /**
258     If read is soft clipped on right side, return how many bases are
259     clipped, otherwise return 0.
260
261     \since New in yat 0.10
262   */
263  uint32_t right_soft_clipped(const BamRead& bam);
264
265  /**
266     return \c true if query names are equal
267
268     \see BamRead::name()
269
270     \since New in yat 0.10
271   */
272  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
273
274  /**
275     Functor to compare two reads with respect to their leftmost
276     coordinate.
277
278     \see BamRead
279
280     \since New in yat 0.10
281   */
282  struct BamLessPos
283    : public std::binary_function<const BamRead&, const BamRead&, bool>
284  {
285    /**
286       \return true if lhs tid is less than rhs tid; or tid
287       are equal and lhs pos is smaller than rhs pos.
288
289       \see BamRead::tid() and BamRead::pos()
290     */
291    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
292  };
293
294
295  /**
296     Functor to compare two reads with respect to their rightmost
297     coordinate.
298
299     \see BamRead
300
301     \since New in yat 0.10
302   */
303  struct BamLessEnd
304    : public std::binary_function<const BamRead&, const BamRead&, bool>
305  {
306    /**
307       \return true if lhs tid is less than rhs tid; or tid
308       are equal and lhs end is smaller than rhs end.
309
310       \see BamRead::tid() and BamRead::end()
311     */
312    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
313  };
314
315}}}
316#endif
Note: See TracBrowser for help on using the repository browser.