source: trunk/yat/omic/BamRead.h @ 2993

Last change on this file since 2993 was 2993, checked in by Peter, 9 years ago

conform GPL header to same style as in other yat files

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.1 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 2993 2013-03-03 07:38:22Z peter $
5
6/*
7  Copyright (C) 2012, 2013 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "config_bam.h"
26
27#include YAT_BAM_HEADER
28#include YAT_SAM_HEADER
29
30#include <functional>
31#include <string>
32#include <vector>
33
34// backport #defines from samtools 0.1.19
35#ifndef BAM_CIGAR_STR
36#define BAM_CIGAR_STR "MIDNSHP=XB"
37// lookup table used in bam_cigar_type
38#define BAM_CIGAR_TYPE 0x3C1A7
39
40/// index of operation in range [0, 9], see below.
41#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
42// length of the operation
43#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
44// char describing the operation, see column 2 in table below
45#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
46// Create a cigar element with length l and operation o
47#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
48// Returns a two-bits number describing the type of operation. The
49// first bit, bam_cigar_type & 1, (4th column in table below) is 1 iff
50// operation consumes query sequence. The second bit, bam_cigar_type &
51// 2, (3rd column below) is 1 iff operation consumes reference
52// sequence.
53#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
54#endif
55
56/*
57  CIGAR
580 M 1 1 match
591 I 0 1 insert
602 D 1 0 deletion
613 N 1 0 spliced (rna)
624 S 0 1 soft clipped
635 H 0 0 hard clipped
646 P 0 0 padded
657 = 1 1 equal
668 X 1 1 mismatch
679 B 0 0
68
693rd column: consumes reference sequence
704th column: consumes query sequence
71 */
72
73namespace theplu {
74namespace yat {
75namespace omic {
76
77  /**
78     \brief Class holding a bam query
79
80     This is a wrapper around bam1_t and most of its information is
81     held in the core struct. A BamRead is typically created from a
82     InBamFile.
83
84     \see samtools
85
86     \since New in yat 0.10
87   */
88  class BamRead
89  {
90  public:
91    /**
92       \brief default constructor
93
94       Constructed object contains no data and most operations are not
95       defined
96     */
97    BamRead(void);
98
99    /**
100       \brief Copy constructor
101     */
102    BamRead(const BamRead& other);
103
104    /**
105       \brief Destructor
106     */
107    virtual ~BamRead(void);
108
109    /**
110       \brief assignment operator
111     */
112    const BamRead& operator=(const BamRead& rhs);
113
114    /**
115       \return pointer to auxiliary data
116     */
117    const uint8_t* aux(void) const;
118
119    /**
120       \brief access core data struct
121
122       \see samtools C api documentaion
123     */
124    const bam1_core_t& core(void) const;
125
126    /**
127       \brief access core data struct
128
129       \see samtools C api documentaion
130     */
131    bam1_core_t& core(void);
132
133    /**
134       \brief access CIGAR array
135
136       In each element the lower 4 bits gives a CIGAR operation and
137       the upper 28 bits keep the length.
138     */
139    const uint32_t* cigar(void) const;
140
141    /**
142       \return \a i th element of CIGAR array
143     */
144    uint32_t cigar(size_t i) const;
145
146    /**
147       \return \a i th CIGAR operation
148     */
149    uint32_t cigar_op(size_t i) const;
150
151    /**
152       \return length of \a i th CIGAR element
153     */
154    uint32_t cigar_oplen(size_t i) const;
155
156    /**
157       Translate CIGAR array to a string such as '72M3S'
158     */
159    std::string cigar_str(void) const;
160
161    /**
162       \brief set CIGAR
163
164       \param c new cigar
165    */
166    void cigar(const std::vector<uint32_t>& c);
167
168    /**
169       \return Quality of base \a i
170     */
171    uint8_t qual(size_t i) const;
172
173    /**
174       \return query name
175
176       Length of array is described by core().l_qname
177     */
178    const char* name(void) const;
179
180    /**
181       \brief chromosome ID
182     */
183    int32_t tid(void) const;
184
185    /**
186       \brief 0-based laftmost coordinate
187     */
188    int32_t pos(void) const;
189
190    /**
191       \brief rightmost coordinate
192
193       Coordinate is 0-based, i.e., the end is one passed the last
194       matching position.
195
196       \see bam_calend
197     */
198    int32_t end(void) const;
199
200    /**
201       \brief Chromosome ID for mate
202     */
203    int32_t mtid(void) const;
204
205    /**
206       \brief leftmost position for mate
207     */
208    int32_t mpos(void) const;
209
210    /**
211       Each character in returned sequence is either A, C, G, T, or N.
212
213       \return sequence
214     */
215    std::string sequence(void) const;
216
217    /**
218       4-bit integer describing base \a index
219
220       \see bam_nt16_rev_table
221     */
222    uint8_t sequence(size_t index) const;
223
224    /**
225       \brief modify a base in sequence
226
227       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
228
229       \see bam_nt16_table
230     */
231    void sequence(size_t i, uint8_t x);
232
233    /**
234       \see core().l_qseq
235     */
236    uint32_t sequence_length(void) const;
237
238    /**
239       \brief bitwise flag
240
241       \see Preprocessor defines BAM_F*
242     */
243    uint16_t flag(void) const;
244
245    /**
246       Exchanging this read with \a other.
247     */
248    void swap(BamRead& other);
249
250  private:
251    bam1_t* bam_;
252
253    friend class InBamFile;
254    friend class OutBamFile;
255    friend class BamReadIterator;
256  };
257
258  /**
259     Swap specialization for BamRead that is faster than generic
260     std::swap as it just swap a pointers.
261
262     \since New in yat 0.10
263
264     \relates BamRead
265   */
266  void swap(BamRead& lhs, BamRead& rhs);
267
268  /**
269     \return \c true if read is soft clipped, either left_soft_clipped
270     or right_soft_clipped.
271
272     \since New in yat 0.10
273
274     \relates BamRead
275   */
276  bool soft_clipped(const BamRead& bam);
277
278  /**
279     If read is soft clipped on left side, return how many bases are
280     clipped, otherwise return 0.
281
282     \since New in yat 0.10
283
284     \relates BamRead
285   */
286  uint32_t left_soft_clipped(const BamRead& bam);
287
288  /**
289     If read is soft clipped on right side, return how many bases are
290     clipped, otherwise return 0.
291
292     \since New in yat 0.10
293
294     \relates BamRead
295   */
296  uint32_t right_soft_clipped(const BamRead& bam);
297
298  /**
299     return \c true if query names are equal
300
301     \see BamRead::name()
302
303     \since New in yat 0.10
304
305     \relates BamRead
306   */
307  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
308
309  /**
310     Functor to compare two reads with respect to their leftmost
311     coordinate.
312
313     \see BamRead
314
315     \since New in yat 0.10
316   */
317  struct BamLessPos
318    : public std::binary_function<const BamRead&, const BamRead&, bool>
319  {
320    /**
321       \return true if lhs tid is less than rhs tid; or tid
322       are equal and lhs pos is smaller than rhs pos.
323
324       \see BamRead::tid() and BamRead::pos()
325     */
326    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
327  };
328
329
330  /**
331     Functor to compare two reads with respect to their rightmost
332     coordinate.
333
334     \see BamRead
335
336     \since New in yat 0.10
337   */
338  struct BamLessEnd
339    : public std::binary_function<const BamRead&, const BamRead&, bool>
340  {
341    /**
342       \return true if lhs tid is less than rhs tid; or tid
343       are equal and lhs end is smaller than rhs end.
344
345       \see BamRead::tid() and BamRead::end()
346     */
347    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
348  };
349
350}}}
351#endif
Note: See TracBrowser for help on using the repository browser.