source: trunk/yat/omic/BamRead.h @ 3028

Last change on this file since 3028 was 3028, checked in by Peter, 10 years ago

functions to access, append, and remove aux field. refs #746

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.7 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 3028 2013-04-21 00:37:31Z peter $
5
6/*
7  Copyright (C) 2012, 2013 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "config_bam.h"
26
27#include YAT_BAM_HEADER
28#include YAT_SAM_HEADER
29
30#include <functional>
31#include <string>
32#include <vector>
33
34// backport #defines from samtools 0.1.19
35#ifndef BAM_CIGAR_STR
36#define BAM_CIGAR_STR "MIDNSHP=XB"
37// lookup table used in bam_cigar_type
38#define BAM_CIGAR_TYPE 0x3C1A7
39
40/// index of operation in range [0, 9], see below.
41#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
42// length of the operation
43#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
44// char describing the operation, see column 2 in table below
45#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
46// Create a cigar element with length l and operation o
47#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
48// Returns a two-bits number describing the type of operation. The
49// first bit, bam_cigar_type & 1, (4th column in table below) is 1 iff
50// operation consumes query sequence. The second bit, bam_cigar_type &
51// 2, (3rd column below) is 1 iff operation consumes reference
52// sequence.
53#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
54#endif
55
56/*
57  CIGAR
580 M 1 1 match
591 I 0 1 insert
602 D 1 0 deletion
613 N 1 0 spliced (rna)
624 S 0 1 soft clipped
635 H 0 0 hard clipped
646 P 0 0 padded
657 = 1 1 equal
668 X 1 1 mismatch
679 B 0 0
68
693rd column: consumes reference sequence
704th column: consumes query sequence
71 */
72
73namespace theplu {
74namespace yat {
75namespace omic {
76
77  /**
78     \brief Class holding a bam query
79
80     This is a wrapper around bam1_t and most of its information is
81     held in the core struct. A BamRead is typically created from a
82     InBamFile.
83
84     \see samtools
85
86     \since New in yat 0.10
87   */
88  class BamRead
89  {
90  public:
91    /**
92       \brief default constructor
93
94       Constructed object contains no data and most operations are not
95       defined
96     */
97    BamRead(void);
98
99    /**
100       \brief Copy constructor
101     */
102    BamRead(const BamRead& other);
103
104    /**
105       \brief Destructor
106     */
107    virtual ~BamRead(void);
108
109    /**
110       \brief assignment operator
111     */
112    const BamRead& operator=(const BamRead& rhs);
113
114    /**
115       \return pointer to auxiliary data
116     */
117    const uint8_t* aux(void) const;
118
119    /**
120       \since New in yat 0.11
121     */
122    const uint8_t* aux(const char tag[2]) const;
123
124    /**
125       \since New in yat 0.11
126     */
127    void aux(const char tag[2], char type, int len, uint8_t* data);
128
129    /**
130       \since New in yat 0.11
131     */
132    void aux_append(const char tag[2], char type, int len, uint8_t* data);
133
134    /**
135       \since New in yat 0.11
136     */
137    void aux_del(const char tag[2]);
138
139    /**
140       \since New in yat 0.11
141     */
142    int aux_size(void) const;
143
144    /**
145       \brief access core data struct
146
147       \see samtools C api documentaion
148     */
149    const bam1_core_t& core(void) const;
150
151    /**
152       \brief access core data struct
153
154       \see samtools C api documentaion
155     */
156    bam1_core_t& core(void);
157
158    /**
159       \brief access CIGAR array
160
161       In each element the lower 4 bits gives a CIGAR operation and
162       the upper 28 bits keep the length.
163     */
164    const uint32_t* cigar(void) const;
165
166    /**
167       \return \a i th element of CIGAR array
168     */
169    uint32_t cigar(size_t i) const;
170
171    /**
172       \return \a i th CIGAR operation
173     */
174    uint32_t cigar_op(size_t i) const;
175
176    /**
177       \return length of \a i th CIGAR element
178     */
179    uint32_t cigar_oplen(size_t i) const;
180
181    /**
182       Translate CIGAR array to a string such as '72M3S'
183     */
184    std::string cigar_str(void) const;
185
186    /**
187       \brief set CIGAR
188
189       \param c new cigar
190    */
191    void cigar(const std::vector<uint32_t>& c);
192
193    /**
194       \return Quality of base \a i
195     */
196    uint8_t qual(size_t i) const;
197
198    /**
199       \brief set quality of a base
200
201       \param i base to modify
202       \param q new quality
203
204       \since New in yat 0.11
205     */
206    void qual(size_t i, uint8_t q);
207
208    /**
209       \return query name
210
211       Length of array is described by core().l_qname
212     */
213    const char* name(void) const;
214
215    /**
216       \brief chromosome ID
217     */
218    int32_t tid(void) const;
219
220    /**
221       \brief 0-based laftmost coordinate
222     */
223    int32_t pos(void) const;
224
225    /**
226       \brief rightmost coordinate
227
228       Coordinate is 0-based, i.e., the end is one passed the last
229       matching position.
230
231       \see bam_calend
232     */
233    int32_t end(void) const;
234
235    /**
236       \brief Chromosome ID for mate
237     */
238    int32_t mtid(void) const;
239
240    /**
241       \brief leftmost position for mate
242     */
243    int32_t mpos(void) const;
244
245    /**
246       Each character in returned sequence is either A, C, G, T, or N.
247
248       \return sequence
249     */
250    std::string sequence(void) const;
251
252    /**
253       4-bit integer describing base \a index
254
255       \see bam_nt16_rev_table
256     */
257    uint8_t sequence(size_t index) const;
258
259    /**
260       \brief modify a base in sequence
261
262       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
263
264       \see bam_nt16_table
265
266       \since New in yat 0.11
267     */
268    void sequence(size_t i, uint8_t x);
269
270    /**
271       \see core().l_qseq
272     */
273    uint32_t sequence_length(void) const;
274
275    /**
276       \brief bitwise flag
277
278       \see Preprocessor defines BAM_F*
279     */
280    uint16_t flag(void) const;
281
282    /**
283       Exchanging this read with \a other.
284     */
285    void swap(BamRead& other);
286
287  private:
288    bam1_t* bam_;
289
290    friend class InBamFile;
291    friend class OutBamFile;
292    friend class BamReadIterator;
293  };
294
295  /**
296     Swap specialization for BamRead that is faster than generic
297     std::swap as it just swap a pointers.
298
299     \since New in yat 0.10
300
301     \relates BamRead
302   */
303  void swap(BamRead& lhs, BamRead& rhs);
304
305  /**
306     \return \c true if read is soft clipped, either left_soft_clipped
307     or right_soft_clipped.
308
309     \since New in yat 0.10
310
311     \relates BamRead
312   */
313  bool soft_clipped(const BamRead& bam);
314
315  /**
316     If read is soft clipped on left side, return how many bases are
317     clipped, otherwise return 0.
318
319     \since New in yat 0.10
320
321     \relates BamRead
322   */
323  uint32_t left_soft_clipped(const BamRead& bam);
324
325  /**
326     If read is soft clipped on right side, return how many bases are
327     clipped, otherwise return 0.
328
329     \since New in yat 0.10
330
331     \relates BamRead
332   */
333  uint32_t right_soft_clipped(const BamRead& bam);
334
335  /**
336     return \c true if query names are equal
337
338     \see BamRead::name()
339
340     \since New in yat 0.10
341
342     \relates BamRead
343   */
344  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
345
346  /**
347     Functor to compare two reads with respect to their leftmost
348     coordinate.
349
350     \see BamRead
351
352     \since New in yat 0.10
353   */
354  struct BamLessPos
355    : public std::binary_function<const BamRead&, const BamRead&, bool>
356  {
357    /**
358       \return true if lhs tid is less than rhs tid; or tid
359       are equal and lhs pos is smaller than rhs pos.
360
361       \see BamRead::tid() and BamRead::pos()
362     */
363    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
364  };
365
366
367  /**
368     Functor to compare two reads with respect to their rightmost
369     coordinate.
370
371     \see BamRead
372
373     \since New in yat 0.10
374   */
375  struct BamLessEnd
376    : public std::binary_function<const BamRead&, const BamRead&, bool>
377  {
378    /**
379       \return true if lhs tid is less than rhs tid; or tid
380       are equal and lhs end is smaller than rhs end.
381
382       \see BamRead::tid() and BamRead::end()
383     */
384    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
385  };
386
387}}}
388#endif
Note: See TracBrowser for help on using the repository browser.