source: trunk/yat/omic/BamRead.h @ 2986

Last change on this file since 2986 was 2986, checked in by Peter, 10 years ago

merge 0.10-stable branch into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.0 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 2986 2013-02-18 08:07:44Z peter $
5//
6// Copyright (C) 2012, 2013 Peter Johansson
7//
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12//
13// This program is distributed in the hope that it will be useful, but
14// WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16// General Public License for more details.
17//
18// You should have received a copy of the GNU General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21#include "config_bam.h"
22
23#include YAT_BAM_HEADER
24#include YAT_SAM_HEADER
25
26#include <functional>
27#include <string>
28#include <vector>
29
30// backport #defines from samtools 0.1.19
31#ifndef BAM_CIGAR_STR
32#define BAM_CIGAR_STR "MIDNSHP=XB"
33// lookup table used in bam_cigar_type
34#define BAM_CIGAR_TYPE 0x3C1A7
35
36/// index of operation in range [0, 9], see below.
37#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
38// length of the operation
39#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
40// char describing the operation, see column 2 in table below
41#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
42// Create a cigar element with length l and operation o
43#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
44// Returns a two-bits number describing the type of operation. The
45// first bit, bam_cigar_type & 1, (4th column in table below) is 1 iff
46// operation consumes query sequence. The second bit, bam_cigar_type &
47// 2, (3rd column below) is 1 iff operation consumes reference
48// sequence.
49#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
50#endif
51
52/*
53  CIGAR
540 M 1 1 match
551 I 0 1 insert
562 D 1 0 deletion
573 N 1 0 spliced (rna)
584 S 0 1 soft clipped
595 H 0 0 hard clipped
606 P 0 0 padded
617 = 1 1 equal
628 X 1 1 mismatch
639 B 0 0
64
653rd column: consumes reference sequence
664th column: consumes query sequence
67 */
68
69namespace theplu {
70namespace yat {
71namespace omic {
72
73  /**
74     \brief Class holding a bam query
75
76     This is a wrapper around bam1_t and most of its information is
77     held in the core struct. A BamRead is typically created from a
78     InBamFile.
79
80     \see samtools
81
82     \since New in yat 0.10
83   */
84  class BamRead
85  {
86  public:
87    /**
88       \brief default constructor
89
90       Constructed object contains no data and most operations are not
91       defined
92     */
93    BamRead(void);
94
95    /**
96       \brief Copy constructor
97     */
98    BamRead(const BamRead& other);
99
100    /**
101       \brief Destructor
102     */
103    virtual ~BamRead(void);
104
105    /**
106       \brief assignment operator
107     */
108    const BamRead& operator=(const BamRead& rhs);
109
110    /**
111       \return pointer to auxiliary data
112     */
113    const uint8_t* aux(void) const;
114
115    /**
116       \brief access core data struct
117
118       \see samtools C api documentaion
119     */
120    const bam1_core_t& core(void) const;
121
122    /**
123       \brief access core data struct
124
125       \see samtools C api documentaion
126     */
127    bam1_core_t& core(void);
128
129    /**
130       \brief access CIGAR array
131
132       In each element the lower 4 bits gives a CIGAR operation and
133       the upper 28 bits keep the length.
134     */
135    const uint32_t* cigar(void) const;
136
137    /**
138       \return \a i th element of CIGAR array
139     */
140    uint32_t cigar(size_t i) const;
141
142    /**
143       \return \a i th CIGAR operation
144     */
145    uint32_t cigar_op(size_t i) const;
146
147    /**
148       \return length of \a i th CIGAR element
149     */
150    uint32_t cigar_oplen(size_t i) const;
151
152    /**
153       Translate CIGAR array to a string such as '72M3S'
154     */
155    std::string cigar_str(void) const;
156
157    /**
158       \brief set CIGAR
159
160       \param c new cigar
161    */
162    void cigar(const std::vector<uint32_t>& c);
163
164    /**
165       \return Quality of base \a i
166     */
167    uint8_t qual(size_t i) const;
168
169    /**
170       \return query name
171
172       Length of array is described by core().l_qname
173     */
174    const char* name(void) const;
175
176    /**
177       \brief chromosome ID
178     */
179    int32_t tid(void) const;
180
181    /**
182       \brief 0-based laftmost coordinate
183     */
184    int32_t pos(void) const;
185
186    /**
187       \brief rightmost coordinate
188
189       Coordinate is 0-based, i.e., the end is one passed the last
190       matching position.
191
192       \see bam_calend
193     */
194    int32_t end(void) const;
195
196    /**
197       \brief Chromosome ID for mate
198     */
199    int32_t mtid(void) const;
200
201    /**
202       \brief leftmost position for mate
203     */
204    int32_t mpos(void) const;
205
206    /**
207       Each character in returned sequence is either A, C, G, T, or N.
208
209       \return sequence
210     */
211    std::string sequence(void) const;
212
213    /**
214       4-bit integer describing base \a index
215
216       \see bam_nt16_rev_table
217     */
218    uint8_t sequence(size_t index) const;
219
220    /**
221       \brief modify a base in sequence
222
223       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
224
225       \see bam_nt16_table
226     */
227    void sequence(size_t i, uint8_t x);
228
229    /**
230       \see core().l_qseq
231     */
232    uint32_t sequence_length(void) const;
233
234    /**
235       \brief bitwise flag
236
237       \see Preprocessor defines BAM_F*
238     */
239    uint16_t flag(void) const;
240
241    /**
242       Exchanging this read with \a other.
243     */
244    void swap(BamRead& other);
245
246  private:
247    bam1_t* bam_;
248
249    friend class InBamFile;
250    friend class OutBamFile;
251    friend class BamReadIterator;
252  };
253
254  /**
255     Swap specialization for BamRead that is faster than generic
256     std::swap as it just swap a pointers.
257
258     \since New in yat 0.10
259
260     \relates BamRead
261   */
262  void swap(BamRead& lhs, BamRead& rhs);
263
264  /**
265     \return \c true if read is soft clipped, either left_soft_clipped
266     or right_soft_clipped.
267
268     \since New in yat 0.10
269
270     \relates BamRead
271   */
272  bool soft_clipped(const BamRead& bam);
273
274  /**
275     If read is soft clipped on left side, return how many bases are
276     clipped, otherwise return 0.
277
278     \since New in yat 0.10
279
280     \relates BamRead
281   */
282  uint32_t left_soft_clipped(const BamRead& bam);
283
284  /**
285     If read is soft clipped on right side, return how many bases are
286     clipped, otherwise return 0.
287
288     \since New in yat 0.10
289
290     \relates BamRead
291   */
292  uint32_t right_soft_clipped(const BamRead& bam);
293
294  /**
295     return \c true if query names are equal
296
297     \see BamRead::name()
298
299     \since New in yat 0.10
300
301     \relates BamRead
302   */
303  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
304
305  /**
306     Functor to compare two reads with respect to their leftmost
307     coordinate.
308
309     \see BamRead
310
311     \since New in yat 0.10
312   */
313  struct BamLessPos
314    : public std::binary_function<const BamRead&, const BamRead&, bool>
315  {
316    /**
317       \return true if lhs tid is less than rhs tid; or tid
318       are equal and lhs pos is smaller than rhs pos.
319
320       \see BamRead::tid() and BamRead::pos()
321     */
322    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
323  };
324
325
326  /**
327     Functor to compare two reads with respect to their rightmost
328     coordinate.
329
330     \see BamRead
331
332     \since New in yat 0.10
333   */
334  struct BamLessEnd
335    : public std::binary_function<const BamRead&, const BamRead&, bool>
336  {
337    /**
338       \return true if lhs tid is less than rhs tid; or tid
339       are equal and lhs end is smaller than rhs end.
340
341       \see BamRead::tid() and BamRead::end()
342     */
343    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
344  };
345
346}}}
347#endif
Note: See TracBrowser for help on using the repository browser.