source: trunk/yat/utility/Aligner.h @ 3194

Last change on this file since 3194 was 3194, checked in by Peter, 9 years ago

add missing file. see r3173

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 4.8 KB
Line 
1#ifndef _theplu_yat_utility_aligner_
2#define _theplu_yat_utility_aligner_
3
4// $Id: Aligner.h 3194 2014-04-22 09:24:56Z peter $
5
6/*
7  Copyright (C) 2012 Peter Johansson
8  Copyright (C) 2013 Jari Häkkinen, Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <boost/cstdint.hpp>
27
28#include <cstddef>
29#include <iosfwd>
30#include <vector>
31
32namespace theplu {
33namespace yat {
34namespace utility {
35
36  class Matrix;
37
38  /**
39     \brief Aligning two sequences
40
41     General class aligning two sequences described in a dot-matrix,
42     D, in which \f$ D(i,j) \f$ describes how similar element \c i in
43     the first sequence is with element \c j in the second sequence. A
44     path through this matrix corresponds to an alignment of the two
45     sequences, in which a diagonal step correspoinds to a match
46     between corresponding sequence elements, and a vertical or
47     horizontal step correspond correspond to inserting a gap into one
48     of the sequences.
49
50     \since New in yat 0.9
51   */
52  class Aligner
53  {
54  public:
55    /**
56       Enum used to describe alignment.
57
58       \see alignment
59     */
60    enum direction { none, right, down, diagonal };
61
62    /**
63       \brief Constructor
64
65       Same as Aligner(gap, open_gap, gap, open_gap)
66     */
67    Aligner(double gap=0, double open_gap=0);
68
69    /**
70       \brief constructor
71
72       \param vertical_gap Penalty for extending a vertical gap. A
73       vertical gap means alignment consumes an element in second
74       element and not, in the first element, i.e., an element has
75       been inserted to the second element or equivalently an element
76       has been deleted in first sequence.
77
78       \param open_vertical_gap Penalty for open a vertical gap. Total
79       cost for a vertical gap is open_vertical_gap + N *
80       vertical_gap.
81
82       \param horizon_gap Penalty for extending a horizontal gap
83
84       \param open_horizon_gap Penalty for open a horizontal
85       gap. Total penalty for a horizontal gap is open_horizon_gap + N
86       * horizon_gap.
87     */
88    Aligner(double vertical_gap, double open_vertical_gap,
89            double horizon_gap, double open_horizon_gap);
90
91    /**
92       Initialize a score matrix with \f$ S_{k,0} = -
93       \textrm{open\_vertical\_gap} - k * \textrm{vertical\_gap} \f$
94       and \f$ S_{0,k} = - \textrm{open\_horizon\_gap} - k *
95       \textrm{horizon\_gap} \f$ and align using operator().
96
97       \return most lower right element of score matrix
98     */
99    double needleman_wunsch(const Matrix& d);
100
101    /**
102       Initialize a score matrix with all elements equal to zero,
103       align using operator().
104
105       \return max element in score matrix.
106     */
107    double smith_waterman(const Matrix& d);
108
109    /**
110       \brief calculate score matrix based on the \a dot matrix
111
112       The algorithm calculates a maximum \a score matrix as
113
114       \f$ S_{i,j} = \textrm{max} \{ S_{ij}; S_{i,j-1} - F(\textrm{gap}_V);
115       S_{i-1,j-1} + D_{i-1,j-1}; S_{i-1,j} - F(\textrm{gap}_H) \} \f$
116
117       where \f$ F(\textrm{gap}) \f$ is gap if there was already a
118       gap, and gap + open_gap otherwise.
119
120     */
121    void operator()(const Matrix& dot, Matrix& score);
122
123    /**
124       If, for example, alignment(i,j) returns Aligner::diagonal,
125       implies that score(i,j) was maximized as score(i-1,j-1) +
126       dot(i-1, j-1), in other words, element i-1 in first sequence
127       was aligned with element j-1 in second sequence.
128    */
129    const direction& alignment(size_t i, size_t j) const;
130
131    /**
132       \since new in yat 0.12
133     */
134    class Cigar
135    {
136    public:
137      void clear(void);
138      void push_back(uint8_t op, uint32_t len=1);
139      void push_front(uint8_t op, uint32_t len=1);
140      void pop_back(void);
141      void pop_front(void);
142      uint8_t op(size_t) const;
143      char opchr(size_t) const;
144      uint32_t oplen(size_t) const;
145      uint32_t operator[](size_t) const;
146      size_t size(void) const;
147    private:
148      // using compiler generated copy
149      // Cigar(const Cigar& other);
150      // Cigar& operator=(const Cigar&);
151    };
152
153    /**
154       \since new in yat 0.12
155     */
156    const Cigar cigar(size_t i, size_t j) const;
157
158  private:
159    direction& directions(size_t i, size_t j);
160
161    size_t columns_;
162    double vertical_gap_;
163    double open_vertical_gap_;
164    double horizon_gap_;
165    double open_horizon_gap_;
166    std::vector<direction> alignment_;
167  };
168
169
170  /**
171   */
172  std::ostream& operator<<(std::ostream& os, const Aligner::Cigar& cigar);
173
174}}} // of namespace utility, yat, and theplu
175
176#endif
Note: See TracBrowser for help on using the repository browser.