source: trunk/yat/omic/BamRead.h @ 3026

Last change on this file since 3026 was 3026, checked in by Peter, 10 years ago

add function to modify a base quality (BamRead?). refs #746

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.3 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 3026 2013-04-06 07:31:02Z peter $
5
6/*
7  Copyright (C) 2012, 2013 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "config_bam.h"
26
27#include YAT_BAM_HEADER
28#include YAT_SAM_HEADER
29
30#include <functional>
31#include <string>
32#include <vector>
33
34// backport #defines from samtools 0.1.19
35#ifndef BAM_CIGAR_STR
36#define BAM_CIGAR_STR "MIDNSHP=XB"
37// lookup table used in bam_cigar_type
38#define BAM_CIGAR_TYPE 0x3C1A7
39
40/// index of operation in range [0, 9], see below.
41#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
42// length of the operation
43#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
44// char describing the operation, see column 2 in table below
45#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
46// Create a cigar element with length l and operation o
47#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
48// Returns a two-bits number describing the type of operation. The
49// first bit, bam_cigar_type & 1, (4th column in table below) is 1 iff
50// operation consumes query sequence. The second bit, bam_cigar_type &
51// 2, (3rd column below) is 1 iff operation consumes reference
52// sequence.
53#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
54#endif
55
56/*
57  CIGAR
580 M 1 1 match
591 I 0 1 insert
602 D 1 0 deletion
613 N 1 0 spliced (rna)
624 S 0 1 soft clipped
635 H 0 0 hard clipped
646 P 0 0 padded
657 = 1 1 equal
668 X 1 1 mismatch
679 B 0 0
68
693rd column: consumes reference sequence
704th column: consumes query sequence
71 */
72
73namespace theplu {
74namespace yat {
75namespace omic {
76
77  /**
78     \brief Class holding a bam query
79
80     This is a wrapper around bam1_t and most of its information is
81     held in the core struct. A BamRead is typically created from a
82     InBamFile.
83
84     \see samtools
85
86     \since New in yat 0.10
87   */
88  class BamRead
89  {
90  public:
91    /**
92       \brief default constructor
93
94       Constructed object contains no data and most operations are not
95       defined
96     */
97    BamRead(void);
98
99    /**
100       \brief Copy constructor
101     */
102    BamRead(const BamRead& other);
103
104    /**
105       \brief Destructor
106     */
107    virtual ~BamRead(void);
108
109    /**
110       \brief assignment operator
111     */
112    const BamRead& operator=(const BamRead& rhs);
113
114    /**
115       \return pointer to auxiliary data
116     */
117    const uint8_t* aux(void) const;
118
119    /**
120       \brief access core data struct
121
122       \see samtools C api documentaion
123     */
124    const bam1_core_t& core(void) const;
125
126    /**
127       \brief access core data struct
128
129       \see samtools C api documentaion
130     */
131    bam1_core_t& core(void);
132
133    /**
134       \brief access CIGAR array
135
136       In each element the lower 4 bits gives a CIGAR operation and
137       the upper 28 bits keep the length.
138     */
139    const uint32_t* cigar(void) const;
140
141    /**
142       \return \a i th element of CIGAR array
143     */
144    uint32_t cigar(size_t i) const;
145
146    /**
147       \return \a i th CIGAR operation
148     */
149    uint32_t cigar_op(size_t i) const;
150
151    /**
152       \return length of \a i th CIGAR element
153     */
154    uint32_t cigar_oplen(size_t i) const;
155
156    /**
157       Translate CIGAR array to a string such as '72M3S'
158     */
159    std::string cigar_str(void) const;
160
161    /**
162       \brief set CIGAR
163
164       \param c new cigar
165    */
166    void cigar(const std::vector<uint32_t>& c);
167
168    /**
169       \return Quality of base \a i
170     */
171    uint8_t qual(size_t i) const;
172
173    /**
174       \brief set quality of a base
175
176       \param i base to modify
177       \param q new quality
178
179       \since New in yat 0.11
180     */
181    void qual(size_t i, uint8_t q);
182
183    /**
184       \return query name
185
186       Length of array is described by core().l_qname
187     */
188    const char* name(void) const;
189
190    /**
191       \brief chromosome ID
192     */
193    int32_t tid(void) const;
194
195    /**
196       \brief 0-based laftmost coordinate
197     */
198    int32_t pos(void) const;
199
200    /**
201       \brief rightmost coordinate
202
203       Coordinate is 0-based, i.e., the end is one passed the last
204       matching position.
205
206       \see bam_calend
207     */
208    int32_t end(void) const;
209
210    /**
211       \brief Chromosome ID for mate
212     */
213    int32_t mtid(void) const;
214
215    /**
216       \brief leftmost position for mate
217     */
218    int32_t mpos(void) const;
219
220    /**
221       Each character in returned sequence is either A, C, G, T, or N.
222
223       \return sequence
224     */
225    std::string sequence(void) const;
226
227    /**
228       4-bit integer describing base \a index
229
230       \see bam_nt16_rev_table
231     */
232    uint8_t sequence(size_t index) const;
233
234    /**
235       \brief modify a base in sequence
236
237       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
238
239       \see bam_nt16_table
240
241       \since New in yat 0.11
242     */
243    void sequence(size_t i, uint8_t x);
244
245    /**
246       \see core().l_qseq
247     */
248    uint32_t sequence_length(void) const;
249
250    /**
251       \brief bitwise flag
252
253       \see Preprocessor defines BAM_F*
254     */
255    uint16_t flag(void) const;
256
257    /**
258       Exchanging this read with \a other.
259     */
260    void swap(BamRead& other);
261
262  private:
263    bam1_t* bam_;
264
265    friend class InBamFile;
266    friend class OutBamFile;
267    friend class BamReadIterator;
268  };
269
270  /**
271     Swap specialization for BamRead that is faster than generic
272     std::swap as it just swap a pointers.
273
274     \since New in yat 0.10
275
276     \relates BamRead
277   */
278  void swap(BamRead& lhs, BamRead& rhs);
279
280  /**
281     \return \c true if read is soft clipped, either left_soft_clipped
282     or right_soft_clipped.
283
284     \since New in yat 0.10
285
286     \relates BamRead
287   */
288  bool soft_clipped(const BamRead& bam);
289
290  /**
291     If read is soft clipped on left side, return how many bases are
292     clipped, otherwise return 0.
293
294     \since New in yat 0.10
295
296     \relates BamRead
297   */
298  uint32_t left_soft_clipped(const BamRead& bam);
299
300  /**
301     If read is soft clipped on right side, return how many bases are
302     clipped, otherwise return 0.
303
304     \since New in yat 0.10
305
306     \relates BamRead
307   */
308  uint32_t right_soft_clipped(const BamRead& bam);
309
310  /**
311     return \c true if query names are equal
312
313     \see BamRead::name()
314
315     \since New in yat 0.10
316
317     \relates BamRead
318   */
319  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
320
321  /**
322     Functor to compare two reads with respect to their leftmost
323     coordinate.
324
325     \see BamRead
326
327     \since New in yat 0.10
328   */
329  struct BamLessPos
330    : public std::binary_function<const BamRead&, const BamRead&, bool>
331  {
332    /**
333       \return true if lhs tid is less than rhs tid; or tid
334       are equal and lhs pos is smaller than rhs pos.
335
336       \see BamRead::tid() and BamRead::pos()
337     */
338    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
339  };
340
341
342  /**
343     Functor to compare two reads with respect to their rightmost
344     coordinate.
345
346     \see BamRead
347
348     \since New in yat 0.10
349   */
350  struct BamLessEnd
351    : public std::binary_function<const BamRead&, const BamRead&, bool>
352  {
353    /**
354       \return true if lhs tid is less than rhs tid; or tid
355       are equal and lhs end is smaller than rhs end.
356
357       \see BamRead::tid() and BamRead::end()
358     */
359    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
360  };
361
362}}}
363#endif
Note: See TracBrowser for help on using the repository browser.