Skip to content

Commit 4bd15b5

Browse files
authored
edit distance solution added
1 parent 89de114 commit 4bd15b5

File tree

2 files changed

+428
-0
lines changed

2 files changed

+428
-0
lines changed

edit_distance_backtracking.cpp

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#include <bits/stdc++.h>
2+
using namespace std;
3+
4+
5+
class Alignment {
6+
7+
string sx, sy, sa, sb;
8+
9+
vector<vector<int> > A;
10+
int sxL, syL;
11+
int cost = 0;
12+
13+
//insertion cost
14+
inline int insC() {
15+
return 4;
16+
}
17+
18+
//deletion cost
19+
inline int delC() {
20+
return 4;
21+
}
22+
23+
//modification cost
24+
inline int modC(char fr, char to) {
25+
if(fr == to)
26+
return 0;
27+
return 3;
28+
}
29+
30+
public:
31+
32+
//constructor
33+
Alignment(string s1, string s2) : sx(s1), sy(s2),
34+
sxL(s1.length()), syL(s2.length())
35+
{
36+
//allocating size to array needed
37+
A.resize(s2.length()+4, vector<int>(s1.length()+4, 0));
38+
}
39+
40+
//recurrence
41+
int rec(int i, int j) {
42+
return min(modC(sx[i-1], sy[j-1]) + A[i-1][j-1], min(delC() + A[i-1][j], insC() + A[i][j-1]));
43+
//i-1, j-1 in sx, sy b/c of 0-indexing in string
44+
}
45+
46+
//building array of cost
47+
void form_array() {
48+
49+
//initialising the base needed in dp
50+
//building up the first column, consider taking first string sx and second as null
51+
for(int i = 0; i <= sxL; i++) {
52+
A[i][0] = i*(delC());
53+
}
54+
55+
//building up the first row, consider taking first string null and second as string sy
56+
for(int i = 0; i <= syL; i++) {
57+
A[0][i] = i*(insC());
58+
}
59+
60+
//building up whole array from previously known values
61+
for(int i = 1; i <= sxL; i++) {
62+
for(int j = 1; j <= syL; j++) {
63+
64+
A[i][j] = rec(i, j);
65+
}
66+
}
67+
68+
cost = A[sxL][syL];
69+
70+
}
71+
72+
//finding the alignment
73+
void trace_back(int i, int j) {
74+
75+
while(true) {
76+
if(i == 0 || j == 0) {
77+
break;
78+
}
79+
//A[i][j] will have one of the three above values from which it is derived
80+
//so comapring from each one
81+
if(i >= 0 && j >= 0 && rec(i, j) == modC(sx[i-1], sy[j-1]) + A[i-1][j-1]) {
82+
sa.push_back(sx[i-1]);
83+
sb.push_back(sy[j-1]);
84+
i--, j--;
85+
}
86+
87+
else if((i-1) >= 0 && j >= 0 && rec(i, j) == delC() + A[i-1][j]) {
88+
sa.push_back(sx[i-1]); //0-indexing of string
89+
sb.push_back('-');
90+
i -= 1;
91+
}
92+
93+
else if(i >= 0 && (j-1) >= 0){
94+
sa.push_back('-'); //0-indexing of string
95+
sb.push_back(sy[j-1]);
96+
j -= 1;
97+
}
98+
}
99+
100+
if(i != 0) {
101+
while(i) {
102+
sa.push_back(sx[i-1]);
103+
sb.push_back('-');
104+
i--;
105+
}
106+
}
107+
108+
else {
109+
while(j) {
110+
sa.push_back('-');
111+
sb.push_back(sy[j-1]);
112+
j--;
113+
}
114+
}
115+
}
116+
117+
//returning the alignment
118+
pair<string, string> alignst() {
119+
//reversing the alignments because we have formed the
120+
//alignments from backward(see: trace_back, i, j started from m, n respectively)
121+
reverse(sa.begin(), sa.end());
122+
reverse(sb.begin(), sb.end());
123+
return make_pair(sa, sb);
124+
}
125+
126+
127+
//returning the cost
128+
int kyc() { return cost;}
129+
};
130+
131+
int main() {
132+
133+
134+
//converting sx to sy
135+
string sx, sy;
136+
sx = "GCCCTAGCG";
137+
sy = "GCGCAATG";
138+
139+
//standard input stream
140+
//cin >> sx >> sy;
141+
142+
pair<string, string> st;
143+
144+
Alignment dyn(sx, sy);
145+
dyn.form_array();
146+
dyn.trace_back(sx.length(), sy.length());
147+
st = dyn.alignst();
148+
//Alignments can be different for same strings but cost will be same
149+
150+
cout << "Alignments of the strings\n";
151+
cout << st.first << "\n";
152+
cout << st.second << "\n";
153+
cout << "Cost associated = ";
154+
cout << dyn.kyc() << "\n";
155+
156+
/* Alignments
157+
M - modification, D - deletion, I - insertion for converting string1 to string2
158+
159+
M M MD
160+
string1 - GCCCTAGCG
161+
string2 - GCGCAAT-G
162+
163+
*/
164+
165+
}

0 commit comments

Comments
 (0)