source: trunk/yat/omic/BamRead.h @ 3055

Last change on this file since 3055 was 3055, checked in by Peter, 8 years ago

closes #746. New function BamRead::name(1)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.5 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 3055 2013-06-16 00:21:30Z peter $
5
6/*
7  Copyright (C) 2012, 2013 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "config_bam.h"
26
27#include YAT_BAM_HEADER
28#include YAT_SAM_HEADER
29
30#include <functional>
31#include <string>
32#include <vector>
33
34// backport #defines from samtools 0.1.19
35#ifndef BAM_CIGAR_STR
36#define BAM_CIGAR_STR "MIDNSHP=XB"
37// lookup table used in bam_cigar_type
38#define BAM_CIGAR_TYPE 0x3C1A7
39
40/// index of operation in range [0, 9], see below.
41#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
42// length of the operation
43#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
44// char describing the operation, see column 2 in table below
45#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
46// Create a cigar element with length l and operation o
47#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
48// Returns a two-bits number describing the type of operation. The
49// first bit, bam_cigar_type & 1, (4th column in table below) is 1 iff
50// operation consumes query sequence. The second bit, bam_cigar_type &
51// 2, (3rd column below) is 1 iff operation consumes reference
52// sequence.
53#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
54#endif
55
56/*
57  CIGAR
580 M 1 1 match
591 I 0 1 insert
602 D 1 0 deletion
613 N 1 0 spliced (rna)
624 S 0 1 soft clipped
635 H 0 0 hard clipped
646 P 0 0 padded
657 = 1 1 equal
668 X 1 1 mismatch
679 B 0 0
68
693rd column: consumes reference sequence
704th column: consumes query sequence
71 */
72
73namespace theplu {
74namespace yat {
75namespace omic {
76
77  /**
78     \brief Class holding a bam query
79
80     This is a wrapper around bam1_t and most of its information is
81     held in the core struct. A BamRead is typically created from a
82     InBamFile.
83
84     \see samtools
85
86     \since New in yat 0.10
87   */
88  class BamRead
89  {
90  public:
91    /**
92       \brief default constructor
93
94       Constructed object contains no data and most operations are not
95       defined
96     */
97    BamRead(void);
98
99    /**
100       \brief Copy constructor
101     */
102    BamRead(const BamRead& other);
103
104    /**
105       \brief Destructor
106     */
107    virtual ~BamRead(void);
108
109    /**
110       \brief assignment operator
111     */
112    const BamRead& operator=(const BamRead& rhs);
113
114    /**
115       \return pointer to auxiliary data
116
117       \see aux_size(void)
118     */
119    const uint8_t* aux(void) const;
120
121    /**
122       Use bam_aux2? functions (in samtools C api) to convert returned
123       pointer to corresponding type.
124
125       \return pointer to field associated with \a tag, NULL if \a tag
126       doesn't exist.
127
128       \see bam_aux_get
129
130       \since New in yat 0.11
131     */
132    const uint8_t* aux(const char tag[2]) const;
133
134    /**
135       \brief append a new tag to aux field
136
137       \param tag two-charcter tag to append
138       \param type describes which type and can be 'iIsScCdfAZH'
139       \param len length of data
140       \param data pointer to data
141
142       \since New in yat 0.11
143
144       \see SAM specification
145     */
146    void aux_append(const char tag[2], char type, int len, uint8_t* data);
147
148    /**
149       \brief remove a tag in aux field
150
151       \throw utility::runtime_error if \a tag is not present in read
152
153       \since New in yat 0.11
154     */
155    void aux_del(const char tag[2]);
156
157    /**
158       \return length of aux field
159
160       \since New in yat 0.11
161     */
162    int aux_size(void) const;
163
164    /**
165       \brief access core data struct
166
167       \see samtools C api documentaion
168     */
169    const bam1_core_t& core(void) const;
170
171    /**
172       \brief access core data struct
173
174       \see samtools C api documentaion
175     */
176    bam1_core_t& core(void);
177
178    /**
179       \brief access CIGAR array
180
181       In each element the lower 4 bits gives a CIGAR operation and
182       the upper 28 bits keep the length.
183     */
184    const uint32_t* cigar(void) const;
185
186    /**
187       \return \a i th element of CIGAR array
188     */
189    uint32_t cigar(size_t i) const;
190
191    /**
192       \return \a i th CIGAR operation
193     */
194    uint32_t cigar_op(size_t i) const;
195
196    /**
197       \return length of \a i th CIGAR element
198     */
199    uint32_t cigar_oplen(size_t i) const;
200
201    /**
202       Translate CIGAR array to a string such as '72M3S'
203     */
204    std::string cigar_str(void) const;
205
206    /**
207       \brief set CIGAR
208
209       \param c new cigar
210    */
211    void cigar(const std::vector<uint32_t>& c);
212
213    /**
214       \return Quality of base \a i
215     */
216    uint8_t qual(size_t i) const;
217
218    /**
219       \brief set quality of a base
220
221       \param i base to modify
222       \param q new quality
223
224       \since New in yat 0.11
225     */
226    void qual(size_t i, uint8_t q);
227
228    /**
229       \return query name
230
231       Length of array is described by core().l_qname
232     */
233    const char* name(void) const;
234
235    /**
236       \brief modify name
237
238       \since New in yat 0.11
239    */
240    void name(const std::string& n);
241
242    /**
243       \brief chromosome ID
244     */
245    int32_t tid(void) const;
246
247    /**
248       \brief 0-based laftmost coordinate
249     */
250    int32_t pos(void) const;
251
252    /**
253       \brief rightmost coordinate
254
255       Coordinate is 0-based, i.e., the end is one passed the last
256       matching position.
257
258       \see bam_calend
259     */
260    int32_t end(void) const;
261
262    /**
263       \brief Chromosome ID for mate
264     */
265    int32_t mtid(void) const;
266
267    /**
268       \brief leftmost position for mate
269     */
270    int32_t mpos(void) const;
271
272    /**
273       Each character in returned sequence is either A, C, G, T, or N.
274
275       \return sequence
276     */
277    std::string sequence(void) const;
278
279    /**
280       4-bit integer describing base \a index
281
282       \see bam_nt16_rev_table
283     */
284    uint8_t sequence(size_t index) const;
285
286    /**
287       \brief modify a base in sequence
288
289       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
290
291       \see bam_nt16_table
292
293       \since New in yat 0.11
294     */
295    void sequence(size_t i, uint8_t x);
296
297    /**
298       \brief set sequence and quality
299
300       \since New in yat 0.11
301     */
302    void sequence(const std::string& seq, const std::vector<uint8_t>& qual);
303
304    /**
305       \see core().l_qseq
306     */
307    uint32_t sequence_length(void) const;
308
309    /**
310       \brief bitwise flag
311
312       \see Preprocessor defines BAM_F*
313     */
314    uint16_t flag(void) const;
315
316    /**
317       Exchanging this read with \a other.
318     */
319    void swap(BamRead& other);
320
321  private:
322    // ensure capacity of data pointer is (at least) n
323    void reserve(int n);
324
325    bam1_t* bam_;
326
327    friend class InBamFile;
328    friend class OutBamFile;
329    friend class BamReadIterator;
330  };
331
332  /**
333     Swap specialization for BamRead that is faster than generic
334     std::swap as it just swap a pointers.
335
336     \since New in yat 0.10
337
338     \relates BamRead
339   */
340  void swap(BamRead& lhs, BamRead& rhs);
341
342  /**
343     \return \c true if read is soft clipped, either left_soft_clipped
344     or right_soft_clipped.
345
346     \since New in yat 0.10
347
348     \relates BamRead
349   */
350  bool soft_clipped(const BamRead& bam);
351
352  /**
353     If read is soft clipped on left side, return how many bases are
354     clipped, otherwise return 0.
355
356     \since New in yat 0.10
357
358     \relates BamRead
359   */
360  uint32_t left_soft_clipped(const BamRead& bam);
361
362  /**
363     If read is soft clipped on right side, return how many bases are
364     clipped, otherwise return 0.
365
366     \since New in yat 0.10
367
368     \relates BamRead
369   */
370  uint32_t right_soft_clipped(const BamRead& bam);
371
372  /**
373     return \c true if query names are equal
374
375     \see BamRead::name()
376
377     \since New in yat 0.10
378
379     \relates BamRead
380   */
381  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
382
383  /**
384     Functor to compare two reads with respect to their leftmost
385     coordinate.
386
387     \see BamRead
388
389     \since New in yat 0.10
390   */
391  struct BamLessPos
392    : public std::binary_function<const BamRead&, const BamRead&, bool>
393  {
394    /**
395       \return true if lhs tid is less than rhs tid; or tid
396       are equal and lhs pos is smaller than rhs pos.
397
398       \see BamRead::tid() and BamRead::pos()
399     */
400    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
401  };
402
403
404  /**
405     Functor to compare two reads with respect to their rightmost
406     coordinate.
407
408     \see BamRead
409
410     \since New in yat 0.10
411   */
412  struct BamLessEnd
413    : public std::binary_function<const BamRead&, const BamRead&, bool>
414  {
415    /**
416       \return true if lhs tid is less than rhs tid; or tid
417       are equal and lhs end is smaller than rhs end.
418
419       \see BamRead::tid() and BamRead::end()
420     */
421    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
422  };
423
424}}}
425#endif
Note: See TracBrowser for help on using the repository browser.