source: trunk/yat/omic/BamRead.h @ 3168

Last change on this file since 3168 was 3168, checked in by Peter, 8 years ago

improve docs

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.6 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 3168 2014-01-21 07:57:10Z peter $
5
6/*
7  Copyright (C) 2012, 2013 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "config_bam.h"
26
27#include YAT_BAM_HEADER
28#include YAT_SAM_HEADER
29
30#include <functional>
31#include <string>
32#include <vector>
33
34// backport #defines from samtools 0.1.19
35#ifndef BAM_CIGAR_STR
36#define BAM_CIGAR_STR "MIDNSHP=XB"
37// lookup table used in bam_cigar_type
38#define BAM_CIGAR_TYPE 0x3C1A7
39
40/// index of operation in range [0, 9], see below.
41#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
42// length of the operation
43#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
44// char describing the operation, see column 2 in table below
45#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
46// Create a cigar element with length l and operation o
47#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
48// Returns a two-bits number describing the type of operation. The
49// first bit, bam_cigar_type & 1, (4th column in table below) is 1 iff
50// operation consumes query sequence. The second bit, bam_cigar_type &
51// 2, (3rd column below) is 1 iff operation consumes reference
52// sequence.
53#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
54#endif
55
56/*
57  CIGAR
580 M 1 1 match
591 I 0 1 insert
602 D 1 0 deletion
613 N 1 0 spliced (rna)
624 S 0 1 soft clipped
635 H 0 0 hard clipped
646 P 0 0 padded
657 = 1 1 equal
668 X 1 1 mismatch
679 B 0 0
68
693rd column: consumes reference sequence
704th column: consumes query sequence
71 */
72
73namespace theplu {
74namespace yat {
75namespace omic {
76
77  /**
78     \brief Class holding a bam query
79
80     This is a wrapper around bam1_t and most of its information is
81     held in the core struct. A BamRead is typically created from a
82     InBamFile.
83
84     \see samtools
85
86     \since New in yat 0.10
87   */
88  class BamRead
89  {
90  public:
91    /**
92       \brief default constructor
93
94       Constructed object contains no data and most operations are not
95       defined
96     */
97    BamRead(void);
98
99    /**
100       \brief Copy constructor
101     */
102    BamRead(const BamRead& other);
103
104    /**
105       \brief Destructor
106     */
107    virtual ~BamRead(void);
108
109    /**
110       \brief assignment operator
111     */
112    const BamRead& operator=(const BamRead& rhs);
113
114    /**
115       \return pointer to auxiliary data
116
117       \see aux_size(void)
118     */
119    const uint8_t* aux(void) const;
120
121    /**
122       Use bam_aux2? functions (in samtools C api) to convert returned
123       pointer to corresponding type.
124
125       \return pointer to field associated with \a tag, NULL if \a tag
126       doesn't exist.
127
128       \see bam_aux_get
129
130       \since New in yat 0.11
131     */
132    const uint8_t* aux(const char tag[2]) const;
133
134    /**
135       \brief append a new tag to aux field
136
137       \param tag two-charcter tag to append
138       \param type describes which type and can be 'iIsScCdfAZH'
139       \param len length of data
140       \param data pointer to data
141
142       \since New in yat 0.11
143
144       \see SAM specification
145     */
146    void aux_append(const char tag[2], char type, int len, uint8_t* data);
147
148    /**
149       \brief remove a tag in aux field
150
151       \throw utility::runtime_error if \a tag is not present in read
152
153       \since New in yat 0.11
154     */
155    void aux_del(const char tag[2]);
156
157    /**
158       \return length of aux field
159
160       \since New in yat 0.11
161     */
162    int aux_size(void) const;
163
164    /**
165       \brief access core data struct
166
167       \see samtools C api documentaion
168     */
169    const bam1_core_t& core(void) const;
170
171    /**
172       \brief access core data struct
173
174       \see samtools C api documentaion
175     */
176    bam1_core_t& core(void);
177
178    /**
179       \brief access CIGAR array
180
181       In each element the lower 4 bits gives a CIGAR operation and
182       the upper 28 bits keep the length.
183     */
184    const uint32_t* cigar(void) const;
185
186    /**
187       \return \a i th element of CIGAR array
188     */
189    uint32_t cigar(size_t i) const;
190
191    /**
192       \return \a i th CIGAR operation
193     */
194    uint32_t cigar_op(size_t i) const;
195
196    /**
197       \return length of \a i th CIGAR element
198     */
199    uint32_t cigar_oplen(size_t i) const;
200
201    /**
202       Translate CIGAR array to a string such as '72M3S'
203     */
204    std::string cigar_str(void) const;
205
206    /**
207       \brief set CIGAR
208
209       \param c new cigar
210    */
211    void cigar(const std::vector<uint32_t>& c);
212
213    /**
214       \return Quality of base \a i
215     */
216    uint8_t qual(size_t i) const;
217
218    /**
219       \brief set quality of a base
220
221       \param i base to modify
222       \param q new quality
223
224       \since New in yat 0.11
225     */
226    void qual(size_t i, uint8_t q);
227
228    /**
229       \return query name
230
231       Length of array is described by core().l_qname
232     */
233    const char* name(void) const;
234
235    /**
236       \brief modify name
237
238       \since New in yat 0.11
239    */
240    void name(const std::string& n);
241
242    /**
243       \brief chromosome ID
244     */
245    int32_t tid(void) const;
246
247    /**
248       \brief 0-based laftmost coordinate
249     */
250    int32_t pos(void) const;
251
252    /**
253       \brief rightmost coordinate
254
255       Coordinate is 0-based, i.e., the end is one passed the last
256       matching position.
257
258       \see bam_calend
259     */
260    int32_t end(void) const;
261
262    /**
263       \brief Chromosome ID for mate
264     */
265    int32_t mtid(void) const;
266
267    /**
268       \brief leftmost position for mate
269     */
270    int32_t mpos(void) const;
271
272    /**
273       Each character in returned sequence is either A, C, G, T, or N.
274
275       \return sequence
276     */
277    std::string sequence(void) const;
278
279    /**
280       4-bit integer describing base \a index
281
282       \see bam_nt16_rev_table
283     */
284    uint8_t sequence(size_t index) const;
285
286    /**
287       \brief modify a base in sequence
288
289       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
290
291       \see bam_nt16_table
292
293       \since New in yat 0.11
294     */
295    void sequence(size_t i, uint8_t x);
296
297    /**
298       \brief set sequence and quality
299
300       \since New in yat 0.11
301     */
302    void sequence(const std::string& seq, const std::vector<uint8_t>& qual);
303
304    /**
305       \see core().l_qseq
306     */
307    uint32_t sequence_length(void) const;
308
309    /**
310       \brief bitwise flag
311
312       \see Preprocessor defines BAM_F*
313     */
314    uint16_t flag(void) const;
315
316    /**
317       Exchanging this read with \a other.
318
319       \see swap(BamRead&, BamRead&)
320     */
321    void swap(BamRead& other);
322
323  private:
324    // ensure capacity of data pointer is (at least) n
325    void reserve(int n);
326
327    bam1_t* bam_;
328
329    friend class InBamFile;
330    friend class OutBamFile;
331    friend class BamReadIterator;
332  };
333
334  /**
335     Swap specialization for BamRead that is faster than generic
336     std::swap as it just swap a pair of pointers.
337
338     \since New in yat 0.10
339
340     \relates BamRead
341   */
342  void swap(BamRead& lhs, BamRead& rhs);
343
344  /**
345     \return \c true if read is soft clipped, either left_soft_clipped
346     or right_soft_clipped.
347
348     \since New in yat 0.10
349
350     \relates BamRead
351   */
352  bool soft_clipped(const BamRead& bam);
353
354  /**
355     If read is soft clipped on left side, return how many bases are
356     clipped, otherwise return 0.
357
358     \since New in yat 0.10
359
360     \relates BamRead
361   */
362  uint32_t left_soft_clipped(const BamRead& bam);
363
364  /**
365     If read is soft clipped on right side, return how many bases are
366     clipped, otherwise return 0.
367
368     \since New in yat 0.10
369
370     \relates BamRead
371   */
372  uint32_t right_soft_clipped(const BamRead& bam);
373
374  /**
375     return \c true if query names are equal
376
377     \see BamRead::name()
378
379     \since New in yat 0.10
380
381     \relates BamRead
382   */
383  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
384
385  /**
386     Functor to compare two reads with respect to their leftmost
387     coordinate.
388
389     \see BamRead
390
391     \since New in yat 0.10
392   */
393  struct BamLessPos
394    : public std::binary_function<const BamRead&, const BamRead&, bool>
395  {
396    /**
397       \return true if lhs tid is less than rhs tid; or tid
398       are equal and lhs pos is smaller than rhs pos.
399
400       \see BamRead::tid() and BamRead::pos()
401     */
402    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
403  };
404
405
406  /**
407     Functor to compare two reads with respect to their rightmost
408     coordinate.
409
410     \see BamRead
411
412     \since New in yat 0.10
413   */
414  struct BamLessEnd
415    : public std::binary_function<const BamRead&, const BamRead&, bool>
416  {
417    /**
418       \return true if lhs tid is less than rhs tid; or tid
419       are equal and lhs end is smaller than rhs end.
420
421       \see BamRead::tid() and BamRead::end()
422     */
423    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
424  };
425
426}}}
427#endif
Note: See TracBrowser for help on using the repository browser.