1 | // $Id: alignment.cc 2817 2012-08-29 00:38:17Z peter $ |
---|
2 | |
---|
3 | /* |
---|
4 | Copyright (C) 2005 Jari Häkkinen, Peter Johansson |
---|
5 | Copyright (C) 2006 Jari Häkkinen |
---|
6 | Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson |
---|
7 | Copyright (C) 2012 Peter Johansson |
---|
8 | |
---|
9 | This file is part of the yat library, http://dev.thep.lu.se/yat |
---|
10 | |
---|
11 | The yat library is free software; you can redistribute it and/or |
---|
12 | modify it under the terms of the GNU General Public License as |
---|
13 | published by the Free Software Foundation; either version 3 of the |
---|
14 | License, or (at your option) any later version. |
---|
15 | |
---|
16 | The yat library is distributed in the hope that it will be useful, |
---|
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
19 | General Public License for more details. |
---|
20 | |
---|
21 | You should have received a copy of the GNU General Public License |
---|
22 | along with yat. If not, see <http://www.gnu.org/licenses/>. |
---|
23 | */ |
---|
24 | |
---|
25 | #include "Suite.h" |
---|
26 | |
---|
27 | #include "yat/utility/Aligner.h" |
---|
28 | #include "yat/utility/Alignment.h" |
---|
29 | |
---|
30 | #include "yat/utility/Matrix.h" |
---|
31 | |
---|
32 | #include <gsl/gsl_cdf.h> |
---|
33 | |
---|
34 | #include <cassert> |
---|
35 | #include <cmath> |
---|
36 | #include <fstream> |
---|
37 | #include <iostream> |
---|
38 | #include <sstream> |
---|
39 | #include <string> |
---|
40 | #include <vector> |
---|
41 | |
---|
42 | using namespace theplu::yat; |
---|
43 | |
---|
44 | void read_vector(std::vector<double>& v,std::string& str) |
---|
45 | { |
---|
46 | std::istringstream s(str); |
---|
47 | while (!s.eof()) { |
---|
48 | double d; |
---|
49 | s >> d; |
---|
50 | if (!s.fail()) |
---|
51 | v.push_back(d); |
---|
52 | } |
---|
53 | } |
---|
54 | |
---|
55 | std::istream& operator>>(std::istream& s, |
---|
56 | std::vector<std::vector<double> >& m) |
---|
57 | { |
---|
58 | while (!s.eof()) { |
---|
59 | std::string row; |
---|
60 | char ch; |
---|
61 | while (((ch=s.get())!='\n') && s.good()) |
---|
62 | row+=ch; |
---|
63 | if (row.size() || s.good()) { // if stream good, read an empty peak set |
---|
64 | std::vector<double> v; |
---|
65 | read_vector(v,row); |
---|
66 | m.push_back(v); |
---|
67 | } |
---|
68 | } |
---|
69 | return s; |
---|
70 | } |
---|
71 | |
---|
72 | double match(const double x, const double y, const double s) |
---|
73 | { |
---|
74 | return 2*gsl_cdf_gaussian_Q(std::abs(x-y),sqrt(2)*s); |
---|
75 | } |
---|
76 | |
---|
77 | |
---|
78 | double score(const std::vector<double>& l1, |
---|
79 | const std::vector<double>& l2, |
---|
80 | const double sigma, |
---|
81 | theplu::yat::utility::Matrix& dot_matrix, |
---|
82 | std::vector<std::pair<size_t,size_t> >& path) |
---|
83 | { |
---|
84 | dot_matrix.resize(l1.size(),l2.size()); |
---|
85 | for (size_t i=0; i<l1.size(); i++) |
---|
86 | for (size_t j=0; j<l2.size(); j++) { |
---|
87 | assert(i<dot_matrix.rows()); |
---|
88 | assert(j<dot_matrix.columns()); |
---|
89 | dot_matrix(i,j)=match(l1[i],l2[j],sigma); |
---|
90 | } |
---|
91 | return theplu::yat::utility::NeedlemanWunsch(dot_matrix,path,0); |
---|
92 | } |
---|
93 | |
---|
94 | |
---|
95 | int main(int argc, char* argv[]) |
---|
96 | { |
---|
97 | test::Suite suite(argc, argv); |
---|
98 | |
---|
99 | std::ifstream s(test::filename("data/isoform.peaks").c_str()); |
---|
100 | std::vector<std::vector<double> > peaksets; |
---|
101 | s >> peaksets; |
---|
102 | |
---|
103 | for (size_t i=0; i<peaksets.size()-1; i++) |
---|
104 | for (size_t j=i+1; j<peaksets.size(); j++) { |
---|
105 | utility::Matrix dot_m; |
---|
106 | std::vector<std::pair<size_t,size_t> > path; |
---|
107 | score(peaksets[i], peaksets[j], 1.0, dot_m, path); |
---|
108 | } |
---|
109 | |
---|
110 | std::string a("AGGUUGUCCGUGGUGAGUUCGCA"); |
---|
111 | std::string b("GAGGUUGUCCGUGGUGAGUUCG"); |
---|
112 | utility::Matrix m(a.size(), b.size()); |
---|
113 | for (size_t j=0; j<a.size(); ++j) |
---|
114 | for (size_t k=0; k<b.size(); ++k) |
---|
115 | m(j,k) = (a[j]==b[k] ? 1 : -1000); |
---|
116 | double score = utility::Aligner(100, 100).smith_waterman(m); |
---|
117 | if (score!=21) |
---|
118 | suite.add(false); |
---|
119 | |
---|
120 | // testing ssearch |
---|
121 | if (utility::ssearch("Hello", "Hll", 0.0, 1.0)!=2){ |
---|
122 | suite.err() << "aligning 'Hello' and 'Hll' gives score " |
---|
123 | << utility::ssearch("Hello", "Hll", 0.0, 1.0) |
---|
124 | << " expected " << 2 << std::endl; |
---|
125 | suite.add(false); |
---|
126 | } |
---|
127 | if (utility::ssearch("Hello", "Peter said you can't say 'allo", 1, 1)!=3) |
---|
128 | suite.add(false); |
---|
129 | |
---|
130 | return suite.return_value(); |
---|
131 | } |
---|