source: trunk/yat/omic/BamRead.h @ 3029

Last change on this file since 3029 was 3029, checked in by Peter, 10 years ago

refs #746. Add docs and throw in aux_del if tag is absent (rather than undefined behaviour)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.2 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 3029 2013-04-21 01:04:41Z peter $
5
6/*
7  Copyright (C) 2012, 2013 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "config_bam.h"
26
27#include YAT_BAM_HEADER
28#include YAT_SAM_HEADER
29
30#include <functional>
31#include <string>
32#include <vector>
33
34// backport #defines from samtools 0.1.19
35#ifndef BAM_CIGAR_STR
36#define BAM_CIGAR_STR "MIDNSHP=XB"
37// lookup table used in bam_cigar_type
38#define BAM_CIGAR_TYPE 0x3C1A7
39
40/// index of operation in range [0, 9], see below.
41#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
42// length of the operation
43#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
44// char describing the operation, see column 2 in table below
45#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
46// Create a cigar element with length l and operation o
47#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
48// Returns a two-bits number describing the type of operation. The
49// first bit, bam_cigar_type & 1, (4th column in table below) is 1 iff
50// operation consumes query sequence. The second bit, bam_cigar_type &
51// 2, (3rd column below) is 1 iff operation consumes reference
52// sequence.
53#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
54#endif
55
56/*
57  CIGAR
580 M 1 1 match
591 I 0 1 insert
602 D 1 0 deletion
613 N 1 0 spliced (rna)
624 S 0 1 soft clipped
635 H 0 0 hard clipped
646 P 0 0 padded
657 = 1 1 equal
668 X 1 1 mismatch
679 B 0 0
68
693rd column: consumes reference sequence
704th column: consumes query sequence
71 */
72
73namespace theplu {
74namespace yat {
75namespace omic {
76
77  /**
78     \brief Class holding a bam query
79
80     This is a wrapper around bam1_t and most of its information is
81     held in the core struct. A BamRead is typically created from a
82     InBamFile.
83
84     \see samtools
85
86     \since New in yat 0.10
87   */
88  class BamRead
89  {
90  public:
91    /**
92       \brief default constructor
93
94       Constructed object contains no data and most operations are not
95       defined
96     */
97    BamRead(void);
98
99    /**
100       \brief Copy constructor
101     */
102    BamRead(const BamRead& other);
103
104    /**
105       \brief Destructor
106     */
107    virtual ~BamRead(void);
108
109    /**
110       \brief assignment operator
111     */
112    const BamRead& operator=(const BamRead& rhs);
113
114    /**
115       \return pointer to auxiliary data
116
117       \see aux_size(void)
118     */
119    const uint8_t* aux(void) const;
120
121    /**
122       Use bam_aux2? functions (in samtools C api) to convert returned
123       pointer to corresponding type.
124
125       \return pointer to field associated with \a tag, NULL if \a tag
126       doesn't exist.
127
128       \see bam_aux_get
129
130       \since New in yat 0.11
131     */
132    const uint8_t* aux(const char tag[2]) const;
133
134    /**
135       \brief append a new tag to aux field
136
137       \param tag two-charcter tag to append
138       \param type describes which type and can be 'iIsScCdfAZH'
139       \param len length of data
140       \param data pointer to data
141
142       \since New in yat 0.11
143
144       \see SAM specification
145     */
146    void aux_append(const char tag[2], char type, int len, uint8_t* data);
147
148    /**
149       \brief remove a tag in aux field
150
151       \throw utility::runtime_error if \a tag is not present in read
152
153       \since New in yat 0.11
154     */
155    void aux_del(const char tag[2]);
156
157    /**
158       \return length of aux field
159
160       \since New in yat 0.11
161     */
162    int aux_size(void) const;
163
164    /**
165       \brief access core data struct
166
167       \see samtools C api documentaion
168     */
169    const bam1_core_t& core(void) const;
170
171    /**
172       \brief access core data struct
173
174       \see samtools C api documentaion
175     */
176    bam1_core_t& core(void);
177
178    /**
179       \brief access CIGAR array
180
181       In each element the lower 4 bits gives a CIGAR operation and
182       the upper 28 bits keep the length.
183     */
184    const uint32_t* cigar(void) const;
185
186    /**
187       \return \a i th element of CIGAR array
188     */
189    uint32_t cigar(size_t i) const;
190
191    /**
192       \return \a i th CIGAR operation
193     */
194    uint32_t cigar_op(size_t i) const;
195
196    /**
197       \return length of \a i th CIGAR element
198     */
199    uint32_t cigar_oplen(size_t i) const;
200
201    /**
202       Translate CIGAR array to a string such as '72M3S'
203     */
204    std::string cigar_str(void) const;
205
206    /**
207       \brief set CIGAR
208
209       \param c new cigar
210    */
211    void cigar(const std::vector<uint32_t>& c);
212
213    /**
214       \return Quality of base \a i
215     */
216    uint8_t qual(size_t i) const;
217
218    /**
219       \brief set quality of a base
220
221       \param i base to modify
222       \param q new quality
223
224       \since New in yat 0.11
225     */
226    void qual(size_t i, uint8_t q);
227
228    /**
229       \return query name
230
231       Length of array is described by core().l_qname
232     */
233    const char* name(void) const;
234
235    /**
236       \brief chromosome ID
237     */
238    int32_t tid(void) const;
239
240    /**
241       \brief 0-based laftmost coordinate
242     */
243    int32_t pos(void) const;
244
245    /**
246       \brief rightmost coordinate
247
248       Coordinate is 0-based, i.e., the end is one passed the last
249       matching position.
250
251       \see bam_calend
252     */
253    int32_t end(void) const;
254
255    /**
256       \brief Chromosome ID for mate
257     */
258    int32_t mtid(void) const;
259
260    /**
261       \brief leftmost position for mate
262     */
263    int32_t mpos(void) const;
264
265    /**
266       Each character in returned sequence is either A, C, G, T, or N.
267
268       \return sequence
269     */
270    std::string sequence(void) const;
271
272    /**
273       4-bit integer describing base \a index
274
275       \see bam_nt16_rev_table
276     */
277    uint8_t sequence(size_t index) const;
278
279    /**
280       \brief modify a base in sequence
281
282       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
283
284       \see bam_nt16_table
285
286       \since New in yat 0.11
287     */
288    void sequence(size_t i, uint8_t x);
289
290    /**
291       \see core().l_qseq
292     */
293    uint32_t sequence_length(void) const;
294
295    /**
296       \brief bitwise flag
297
298       \see Preprocessor defines BAM_F*
299     */
300    uint16_t flag(void) const;
301
302    /**
303       Exchanging this read with \a other.
304     */
305    void swap(BamRead& other);
306
307  private:
308    bam1_t* bam_;
309
310    friend class InBamFile;
311    friend class OutBamFile;
312    friend class BamReadIterator;
313  };
314
315  /**
316     Swap specialization for BamRead that is faster than generic
317     std::swap as it just swap a pointers.
318
319     \since New in yat 0.10
320
321     \relates BamRead
322   */
323  void swap(BamRead& lhs, BamRead& rhs);
324
325  /**
326     \return \c true if read is soft clipped, either left_soft_clipped
327     or right_soft_clipped.
328
329     \since New in yat 0.10
330
331     \relates BamRead
332   */
333  bool soft_clipped(const BamRead& bam);
334
335  /**
336     If read is soft clipped on left side, return how many bases are
337     clipped, otherwise return 0.
338
339     \since New in yat 0.10
340
341     \relates BamRead
342   */
343  uint32_t left_soft_clipped(const BamRead& bam);
344
345  /**
346     If read is soft clipped on right side, return how many bases are
347     clipped, otherwise return 0.
348
349     \since New in yat 0.10
350
351     \relates BamRead
352   */
353  uint32_t right_soft_clipped(const BamRead& bam);
354
355  /**
356     return \c true if query names are equal
357
358     \see BamRead::name()
359
360     \since New in yat 0.10
361
362     \relates BamRead
363   */
364  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
365
366  /**
367     Functor to compare two reads with respect to their leftmost
368     coordinate.
369
370     \see BamRead
371
372     \since New in yat 0.10
373   */
374  struct BamLessPos
375    : public std::binary_function<const BamRead&, const BamRead&, bool>
376  {
377    /**
378       \return true if lhs tid is less than rhs tid; or tid
379       are equal and lhs pos is smaller than rhs pos.
380
381       \see BamRead::tid() and BamRead::pos()
382     */
383    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
384  };
385
386
387  /**
388     Functor to compare two reads with respect to their rightmost
389     coordinate.
390
391     \see BamRead
392
393     \since New in yat 0.10
394   */
395  struct BamLessEnd
396    : public std::binary_function<const BamRead&, const BamRead&, bool>
397  {
398    /**
399       \return true if lhs tid is less than rhs tid; or tid
400       are equal and lhs end is smaller than rhs end.
401
402       \see BamRead::tid() and BamRead::end()
403     */
404    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
405  };
406
407}}}
408#endif
Note: See TracBrowser for help on using the repository browser.