source: trunk/yat/omic/BamRead.h @ 2985

Last change on this file since 2985 was 2985, checked in by Peter, 10 years ago

refs #746. added function to modify one element in sequence

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.9 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 2985 2013-02-18 07:36:14Z peter $
5//
6// Copyright (C) 2012, 2013 Peter Johansson
7//
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12//
13// This program is distributed in the hope that it will be useful, but
14// WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16// General Public License for more details.
17//
18// You should have received a copy of the GNU General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21#include "config_bam.h"
22
23#include YAT_BAM_HEADER
24#include YAT_SAM_HEADER
25
26#include <functional>
27#include <string>
28#include <vector>
29
30// backport #defines from samtools 0.1.19
31#ifndef BAM_CIGAR_STR
32#define BAM_CIGAR_STR "MIDNSHP=XB"
33// lookup table used in bam_cigar_type
34#define BAM_CIGAR_TYPE 0x3C1A7
35
36/// index of operation in range [0, 9], see below.
37#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
38// length of the operation
39#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
40// char describing the operation, see column 2 in table below
41#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
42// Create a cigar element with length l and operation o
43#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
44// Returns a two-bits number describing the type of operation. The
45// first bit, bam_cigar_type & 1, (4th column in table below) is 1 iff
46// operation consumes query sequence. The second bit, bam_cigar_type &
47// 2, (3rd column below) is 1 iff operation consumes reference
48// sequence.
49#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
50#endif
51
52/*
53  CIGAR
540 M 1 1 match
551 I 0 1 insert
562 D 1 0 deletion
573 N 1 0 spliced (rna)
584 S 0 1 soft clipped
595 H 0 0 hard clipped
606 P 0 0 padded
617 = 1 1 equal
628 X 1 1 mismatch
639 B 0 0
64
653rd column: consumes reference sequence
664th column: consumes query sequence
67 */
68
69namespace theplu {
70namespace yat {
71namespace omic {
72
73  /**
74     \brief Class holding a bam query
75
76     This is a wrapper around bam1_t and most of its information is
77     held in the core struct. A BamRead is typically created from a
78     InBamFile.
79
80     \see samtools
81
82     \since New in yat 0.10
83   */
84  class BamRead
85  {
86  public:
87    /**
88       \brief default constructor
89
90       Constructed object contains no data and most operations are not
91       defined
92     */
93    BamRead(void);
94
95    /**
96       \brief Copy constructor
97     */
98    BamRead(const BamRead& other);
99
100    /**
101       \brief Destructor
102     */
103    virtual ~BamRead(void);
104
105    /**
106       \brief assignment operator
107     */
108    const BamRead& operator=(const BamRead& rhs);
109
110    /**
111       \return pointer to auxiliary data
112     */
113    const uint8_t* aux(void) const;
114
115    /**
116       \brief access core data struct
117
118       \see samtools C api documentaion
119     */
120    const bam1_core_t& core(void) const;
121
122    /**
123       \brief access core data struct
124
125       \see samtools C api documentaion
126     */
127    bam1_core_t& core(void);
128
129    /**
130       \brief access CIGAR array
131
132       In each element the lower 4 bits gives a CIGAR operation and
133       the upper 28 bits keep the length.
134     */
135    const uint32_t* cigar(void) const;
136
137    /**
138       \return \a i th element of CIGAR array
139     */
140    uint32_t cigar(size_t i) const;
141
142    /**
143       \return \a i th CIGAR operation
144     */
145    uint32_t cigar_op(size_t i) const;
146
147    /**
148       \return length of \a i th CIGAR element
149     */
150    uint32_t cigar_oplen(size_t i) const;
151
152    /**
153       Translate CIGAR array to a string such as '72M3S'
154     */
155    std::string cigar_str(void) const;
156
157    /**
158       \brief set CIGAR
159
160       \param c new cigar
161    */
162    void cigar(const std::vector<uint32_t>& c);
163
164    /**
165       \return Quality of base \a i
166     */
167    uint8_t qual(size_t i) const;
168
169    /**
170       \return query name
171
172       Length of array is described by core().l_qname
173     */
174    const char* name(void) const;
175
176    /**
177       \brief chromosome ID
178     */
179    int32_t tid(void) const;
180
181    /**
182       \brief 0-based laftmost coordinate
183     */
184    int32_t pos(void) const;
185
186    /**
187       \brief rightmost coordinate
188
189       Coordinate is 0-based, i.e., the end is one passed the last
190       matching position.
191
192       \see bam_calend
193     */
194    int32_t end(void) const;
195
196    /**
197       \brief Chromosome ID for mate
198     */
199    int32_t mtid(void) const;
200
201    /**
202       \brief leftmost position for mate
203     */
204    int32_t mpos(void) const;
205
206    /**
207       Each character in returned sequence is either A, C, G, T, or N.
208
209       \return sequence
210     */
211    std::string sequence(void) const;
212
213    /**
214       4-bit integer describing base \a index
215
216       \see bam_nt16_rev_table
217     */
218    uint8_t sequence(size_t index) const;
219
220    /**
221       \brief modify a base in sequence
222
223       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
224
225       \see bam_nt16_table
226     */
227    void sequence(size_t i, uint8_t x);
228
229    /**
230       \see core().l_qseq
231     */
232    uint32_t sequence_length(void) const;
233
234    /**
235       \brief bitwise flag
236
237       \see Preprocessor defines BAM_F*
238     */
239    uint16_t flag(void) const;
240
241    /**
242       Exchanging this read with \a other.
243     */
244    void swap(BamRead& other);
245
246  private:
247    bam1_t* bam_;
248
249    friend class InBamFile;
250    friend class OutBamFile;
251    friend class BamReadIterator;
252  };
253
254  /**
255     Swap specialization for BamRead that is faster than generic
256     std::swap as it just swap a pointers.
257
258     \since New in yat 0.10
259   */
260  void swap(BamRead& lhs, BamRead& rhs);
261
262  /**
263     \return \c true if read is soft clipped, either left_soft_clipped
264     or right_soft_clipped.
265
266     \since New in yat 0.10
267   */
268  bool soft_clipped(const BamRead& bam);
269
270  /**
271     If read is soft clipped on left side, return how many bases are
272     clipped, otherwise return 0.
273
274     \since New in yat 0.10
275   */
276  uint32_t left_soft_clipped(const BamRead& bam);
277
278  /**
279     If read is soft clipped on right side, return how many bases are
280     clipped, otherwise return 0.
281
282     \since New in yat 0.10
283   */
284  uint32_t right_soft_clipped(const BamRead& bam);
285
286  /**
287     return \c true if query names are equal
288
289     \see BamRead::name()
290
291     \since New in yat 0.10
292   */
293  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
294
295  /**
296     Functor to compare two reads with respect to their leftmost
297     coordinate.
298
299     \see BamRead
300
301     \since New in yat 0.10
302   */
303  struct BamLessPos
304    : public std::binary_function<const BamRead&, const BamRead&, bool>
305  {
306    /**
307       \return true if lhs tid is less than rhs tid; or tid
308       are equal and lhs pos is smaller than rhs pos.
309
310       \see BamRead::tid() and BamRead::pos()
311     */
312    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
313  };
314
315
316  /**
317     Functor to compare two reads with respect to their rightmost
318     coordinate.
319
320     \see BamRead
321
322     \since New in yat 0.10
323   */
324  struct BamLessEnd
325    : public std::binary_function<const BamRead&, const BamRead&, bool>
326  {
327    /**
328       \return true if lhs tid is less than rhs tid; or tid
329       are equal and lhs end is smaller than rhs end.
330
331       \see BamRead::tid() and BamRead::end()
332     */
333    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
334  };
335
336}}}
337#endif
Note: See TracBrowser for help on using the repository browser.