source: trunk/yat/utility/Aligner.h @ 3207

Last change on this file since 3207 was 3207, checked in by Peter, 9 years ago

make constructor utility::Aligner(double, double) explicit

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.5 KB
Line 
1#ifndef _theplu_yat_utility_aligner_
2#define _theplu_yat_utility_aligner_
3
4// $Id: Aligner.h 3207 2014-05-04 02:55:40Z peter $
5
6/*
7  Copyright (C) 2012 Peter Johansson
8  Copyright (C) 2013 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2014 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include <boost/cstdint.hpp>
28
29#include <deque>
30#include <cstddef>
31#include <iosfwd>
32#include <vector>
33
34namespace theplu {
35namespace yat {
36namespace utility {
37
38  class Matrix;
39
40  /**
41     \brief Aligning two sequences
42
43     General class aligning two sequences described in a dot-matrix,
44     D, in which \f$ D(i,j) \f$ describes how similar element \c i in
45     the first sequence is with element \c j in the second sequence. A
46     path through this matrix corresponds to an alignment of the two
47     sequences, in which a diagonal step correspoinds to a match
48     between corresponding sequence elements, and a vertical or
49     horizontal step correspond correspond to inserting a gap into one
50     of the sequences.
51
52     \since New in yat 0.9
53   */
54  class Aligner
55  {
56  public:
57    /**
58       Enum used to describe alignment.
59
60       \see alignment
61     */
62    enum direction { none, right, down, diagonal };
63
64    /**
65       \brief Constructor
66
67       Same as Aligner(gap, open_gap, gap, open_gap)
68     */
69    explicit Aligner(double gap=0, double open_gap=0);
70
71    /**
72       \brief constructor
73
74       \param vertical_gap Penalty for extending a vertical gap. A
75       vertical gap means alignment consumes an element in second
76       element and not, in the first element, i.e., an element has
77       been inserted to the second element or equivalently an element
78       has been deleted in first sequence.
79
80       \param open_vertical_gap Penalty for open a vertical gap. Total
81       cost for a vertical gap is open_vertical_gap + N *
82       vertical_gap.
83
84       \param horizon_gap Penalty for extending a horizontal gap
85
86       \param open_horizon_gap Penalty for open a horizontal
87       gap. Total penalty for a horizontal gap is open_horizon_gap + N
88       * horizon_gap.
89     */
90    Aligner(double vertical_gap, double open_vertical_gap,
91            double horizon_gap, double open_horizon_gap);
92
93    /**
94       Initialize a score matrix with \f$ S_{k,0} = -
95       \textrm{open\_vertical\_gap} - k * \textrm{vertical\_gap} \f$
96       and \f$ S_{0,k} = - \textrm{open\_horizon\_gap} - k *
97       \textrm{horizon\_gap} \f$ and align using operator().
98
99       \return most lower right element of score matrix
100     */
101    double needleman_wunsch(const Matrix& d);
102
103    /**
104       Initialize a score matrix with all elements equal to zero,
105       align using operator().
106
107       \return max element in score matrix.
108
109       \see SmithWaterman
110     */
111    double smith_waterman(const Matrix& d);
112
113    /**
114       \brief calculate score matrix based on the \a dot matrix
115
116       The algorithm calculates a maximum \a score matrix as
117
118       \f$ S_{i,j} = \textrm{max} \{ S_{ij}; S_{i,j-1} - F(\textrm{gap}_V);
119       S_{i-1,j-1} + D_{i-1,j-1}; S_{i-1,j} - F(\textrm{gap}_H) \} \f$
120
121       where \f$ F(\textrm{gap}) \f$ is gap if there was already a
122       gap, and gap + open_gap otherwise.
123
124       To get the CIGAR to behave as expected, the each row in \a dot
125       corresponds to an element in reference sequence and each row in
126       \a dot corresponds to an element in query reference. If that is
127       transposed, insertions and deletions are swapped in CIGAR.
128     */
129    void operator()(const Matrix& dot, Matrix& score);
130
131    /**
132       If, for example, alignment(i,j) returns Aligner::diagonal,
133       implies that score(i,j) was maximized as score(i-1,j-1) +
134       dot(i-1, j-1), in other words, element i-1 in first sequence
135       was aligned with element j-1 in second sequence.
136    */
137    const direction& alignment(size_t i, size_t j) const;
138
139    /**
140       \brief Compact Idiosyncratic Gapped Alignment Report
141
142       A CIGAR is a representation of an alignment, an array of
143       operation/length where the operation and length is packed into
144       a uint32_t. So a CIGAR of e.g. '3M1D5M' means alignment is
145       three matches, one deletion, and five matches, which is
146       represented by an array of length 3.
147
148       \since new in yat 0.12
149     */
150    class Cigar
151    {
152    public:
153      /**
154         Erases all elements.
155       */
156      void clear(void);
157
158      /**
159         See defines BAM_CIGAR_*
160
161         \return operation part of element \a i
162       */
163      uint8_t op(size_t i) const;
164
165      /**
166         Translate the op(i) to a descriptive character of one of the
167         following "MIDNSHP=XB"
168
169         \return character describing operation
170         i
171       */
172      char opchr(size_t i) const;
173
174      /**
175         \return length of element i
176       */
177      uint32_t oplen(size_t i) const;
178
179      /**
180         Erase last \a len operation.
181       */
182      void pop_back(uint32_t len=1);
183
184      /**
185         Erase first \a len operation.
186       */
187      void pop_front(uint32_t len=1);
188
189      /**
190         Add an operation \a op of length \a len at back of cigar. If
191         last operation equals \a op, length of that element is
192         changed and the size() is not modified.
193       */
194      void push_back(uint8_t op, uint32_t len=1);
195
196      /**
197         Add an operation \a op of length \a len at front of cigar.
198       */
199      void push_front(uint8_t op, uint32_t len=1);
200
201      /**
202         \return cigar element \a i
203       */
204      uint32_t operator[](size_t i) const;
205
206      /**
207         \return number of elements in cigar
208       */
209      size_t size(void) const;
210    private:
211      std::deque<uint32_t> cigar_;
212
213      // using compiler generated copy
214      // Cigar(const Cigar& other);
215      // Cigar& operator=(const Cigar&);
216    };
217
218    /**
219       \since new in yat 0.12
220     */
221    const Cigar cigar(size_t i, size_t j) const;
222
223  private:
224    direction& directions(size_t i, size_t j);
225
226    size_t columns_;
227    double vertical_gap_;
228    double open_vertical_gap_;
229    double horizon_gap_;
230    double open_horizon_gap_;
231    std::vector<direction> alignment_;
232  };
233
234
235  /**
236     Output e.g. '5S44M5I98M' if the cigar has four elements. Each
237     element is output as the length followed by the character
238     descibing the operation (see opchr).
239
240     \relates Aligner::Cigar
241   */
242  std::ostream& operator<<(std::ostream& os, const Aligner::Cigar& cigar);
243
244}}} // of namespace utility, yat, and theplu
245
246#endif
Note: See TracBrowser for help on using the repository browser.