source: trunk/yat/omic/BamRead.h @ 2883

Last change on this file since 2883 was 2883, checked in by Peter, 10 years ago

New classes to wrap around functionality provided by samtools. refs #729

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.7 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 2883 2012-12-03 12:48:51Z peter $
5//
6// Copyright (C) 2012 Peter Johansson
7//
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12//
13// This program is distributed in the hope that it will be useful, but
14// WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16// General Public License for more details.
17//
18// You should have received a copy of the GNU General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21#include <sam.h>
22#include <bam.h>
23
24#include <functional>
25#include <string>
26#include <vector>
27
28// backport #defines from samtools 0.1.19
29#ifndef BAM_CIGAR_STR
30#define BAM_CIGAR_STR "MIDNSHP=XB"
31#define BAM_CIGAR_TYPE 0x3C1A7
32
33#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
34#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
35#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
36#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
37#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
38#endif
39
40/*
41  CIGAR
420 M 1 1 match
431 I 0 1 insert
442 D 1 0 deletion
453 N 1 0 spliced (rna)
464 S 0 1 soft clipped
475 H 0 0 hard clipped
486 P 0 0 padded
497 = 1 1 equal
508 X 1 1 mismatch
519 B 0 0
52
533rd column: consumes reference sequence
544th column: consumes query sequence
55 */
56
57namespace theplu {
58namespace yat {
59namespace omic {
60
61  /**
62     \brief Class holding a bam query
63
64     This is a wrapper around bam1_t and most of its information is
65     held in the core struct. A BamRead is typically created from a
66     InBamFile.
67
68     \see samtools
69   */
70  class BamRead
71  {
72  public:
73    /**
74       \brief default constructor
75
76       Constructed object contains no data and most operations are not
77       defined
78     */
79    BamRead(void);
80
81    /**
82       \brief Copy constructor
83     */
84    BamRead(const BamRead& other);
85
86    /**
87       \brief Destructor
88     */
89    virtual ~BamRead(void);
90
91    /**
92       \brief assignment operator
93     */
94    const BamRead& operator=(const BamRead& rhs);
95
96    /**
97       \return pointer to auxiliary data
98     */
99    const uint8_t* aux(void) const;
100
101    /**
102       \brief access core data struct
103
104       \see samtools C api documentaion
105     */
106    const bam1_core_t& core(void) const;
107
108    /**
109       \brief access core data struct
110
111       \see samtools C api documentaion
112     */
113    bam1_core_t& core(void);
114
115    /**
116       \brief access CIGAR array
117
118       In each element the lower 4 bits gives a CIGAR operation and
119       the upper 28 bits keep the length.
120     */
121    const uint32_t* cigar(void) const;
122
123    /**
124       \return \a i th element of CIGAR array
125     */
126    uint32_t cigar(size_t i) const;
127
128    /**
129       \return \a i th CIGAR operation
130     */
131    uint32_t cigar_op(size_t i) const;
132
133    /**
134       \return length of \a i th CIGAR element
135     */
136    uint32_t cigar_oplen(size_t i) const;
137
138    /**
139       Translate CIGAR array to a string such as '72M3S'
140     */
141    std::string cigar_str(void) const;
142
143    /**
144       \brief set CIGAR
145
146       \param c new cigar
147    */
148    void cigar(const std::vector<uint32_t>& c);
149
150    /**
151       \return Quality of base \a i
152     */
153    uint8_t qual(size_t i) const;
154
155    /**
156       \return query name
157
158       Length of array is described by core().l_qname
159     */
160    const char* name(void) const;
161
162    /**
163       \brief chromosome ID
164     */
165    int32_t tid(void) const;
166
167    /**
168       \brief 0-based laftmost coordinate
169     */
170    int32_t pos(void) const;
171
172    /**
173       \brief rightmost coordinate
174
175       Coordinate is 0-based, i.e., the end is one passed the last
176       matching position.
177
178       \see bam_calend
179     */
180    int32_t end(void) const;
181
182    /**
183       \brief Chromosome ID for mate
184     */
185    int32_t mtid(void) const;
186
187    /**
188       \brief leftmost position for mate
189     */
190    int32_t mpos(void) const;
191
192    /**
193       Each character in returned sequence is either A, C, G, T, or N.
194
195       \return sequence
196     */
197    std::string sequence(void) const;
198
199    /**
200       4-bit integer describing base \a index
201
202       \see bam_nt16_rev_table
203     */
204    uint8_t sequence(size_t index) const;
205
206    /**
207       \see core().l_qseq
208     */
209    uint32_t sequence_length(void) const;
210
211    /**
212       \brief bitwise flag
213
214       \see Preprocessor defines BAM_F*
215     */
216    uint16_t flag(void) const;
217  private:
218    bam1_t* bam_;
219
220    friend class InBamFile;
221    friend class OutBamFile;
222    friend class BamReadIterator;
223  };
224
225  /**
226     \return \c true if read is soft clipped, either left_soft_clipped
227     or right_soft_clipped.
228   */
229  bool soft_clipped(const BamRead& bam);
230
231  /**
232     If read is soft clipped on left side, return how many bases are
233     clipped, otherwise return 0.
234   */
235  uint32_t left_soft_clipped(const BamRead& bam);
236
237  /**
238     If read is soft clipped on right side, return how many bases are
239     clipped, otherwise return 0.
240   */
241  uint32_t right_soft_clipped(const BamRead& bam);
242
243  /**
244     return \c true if query names are equal
245
246     \see BamRead::name()
247   */
248  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
249
250  /**
251     Functor to compare two reads with respect to their leftmost
252     coordinate.
253
254     \see BamRead
255   */
256  struct BamLessPos
257    : public std::binary_function<const BamRead&, const BamRead&, bool>
258  {
259    /**
260       \return true if lhs tid is less than rhs tid; or if chromomes
261       are equal and lhs pos is smaller than rhs pos.
262
263       \see Bam::tid() Bam::pos()
264     */
265    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
266  };
267
268
269  /**
270     Functor to compare two reads with respect to their rightmost
271     coordinate.
272
273     \see BamRead
274   */
275  struct BamLessEnd
276    : public std::binary_function<const BamRead&, const BamRead&, bool>
277  {
278    /**
279       \return true if lhs tid is less than rhs tid; or if chromomes
280       are equal and lhs end is smaller than rhs end.
281
282       \see Bam::tid() Bam::pos()
283     */
284    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
285  };
286
287}}}
288#endif
Note: See TracBrowser for help on using the repository browser.