Skip to content

Commit f317332

Browse files
author
Takanori MAEHARA
committed
Robin-Scott's powerset construction for NFA-to-DFA conversion
1 parent 0ca8b1e commit f317332

1 file changed

Lines changed: 230 additions & 0 deletions

File tree

string/NFAtoDFA.cc

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
//
2+
// Regex --> NFA --> DFA
3+
//
4+
// Description:
5+
// We are given a regex, eg, "a(b|c)d*e".
6+
// It construct NFA by recursive descent parsing,
7+
// and then construct DFA by powerset construction.
8+
//
9+
// Algorithm:
10+
// recursive descent parsing.
11+
// Robin-Scott's powerset construction.
12+
//
13+
// Complexity:
14+
// recursive descent parsiong: O(n)
15+
// powerset construction: O(2^n) in worst case.
16+
//
17+
// Verified:
18+
// SPOJ 10354: Count Strings
19+
//
20+
21+
#include <iostream>
22+
#include <queue>
23+
#include <vector>
24+
#include <unordered_map>
25+
#include <unordered_set>
26+
#include <map>
27+
#include <cstring>
28+
#include <set>
29+
#include <cstdio>
30+
#include <bitset>
31+
32+
#include <algorithm>
33+
#include <functional>
34+
35+
using namespace std;
36+
37+
#define fst first
38+
#define snd second
39+
#define all(c) ((c).begin()), ((c).end())
40+
#define TEST(s) if (!s) { cout << __LINE__ << " " << #s << endl; exit(-1); }
41+
42+
43+
44+
const int NFA_STATE = 128, DFA_STATE = 1000, ALPHA = 256;
45+
typedef bitset<NFA_STATE> subset;
46+
struct NFA {
47+
static int size;
48+
static vector<int> next[NFA_STATE][ALPHA];
49+
static int new_node() {
50+
for (int a = 0; a < ALPHA; ++a)
51+
next[size][a].clear();
52+
return size++;
53+
}
54+
static NFA symbol(char a) {
55+
int begin = new_node(), end = new_node();
56+
next[begin][a].push_back(end);
57+
return {begin, end};
58+
}
59+
static NFA unite(NFA x, NFA y) {
60+
int begin = new_node(), end = new_node();
61+
next[begin][0].push_back(x.begin);
62+
next[begin][0].push_back(y.begin);
63+
next[x.end][0].push_back(end);
64+
next[y.end][0].push_back(end);
65+
return {begin, end};
66+
}
67+
static NFA concat(NFA x, NFA y) {
68+
next[x.end][0].push_back(y.begin);
69+
return {x.begin, y.end};
70+
}
71+
static NFA star(NFA x) {
72+
int begin = new_node(), end = new_node();
73+
next[begin][0].push_back(x.begin);
74+
next[begin][0].push_back(end);
75+
next[x.end][0].push_back(x.begin);
76+
next[x.end][0].push_back(end);
77+
return {begin, end};
78+
}
79+
int begin, end;
80+
81+
void closure(int u, subset &x) {
82+
x[u] = 1;
83+
for (int v: next[u][0])
84+
if (!x[v])
85+
closure(v, x);
86+
}
87+
bool run(const char *s) {
88+
subset x;
89+
closure(begin, x);
90+
for (; *s; ++s) {
91+
subset y;
92+
for (int u = 0; u < size; ++u)
93+
if (x[u])
94+
for (int v: next[u][*s])
95+
closure(v, y);
96+
x = y;
97+
}
98+
return x[end];
99+
}
100+
};
101+
int NFA::size;
102+
vector<int> NFA::next[NFA_STATE][ALPHA];
103+
104+
NFA parse(const char *s) {
105+
function<NFA ()> regex, factor, term;
106+
regex = [&]() {
107+
NFA a = factor();
108+
if (*s == '|') { ++s; a = NFA::unite(a, regex()); }
109+
return a;
110+
};
111+
factor = [&]() {
112+
NFA a = term();
113+
if (*s == '*') { a = NFA::star(a); ++s; }
114+
if (*s && *s != '|' && *s != ')') a = NFA::concat(a, factor());
115+
return a;
116+
};
117+
term = [&]() {
118+
if (*s == '(') { ++s; NFA a = regex(); ++s; return a; }
119+
else { NFA a = NFA::symbol(*s); ++s; return a; }
120+
};
121+
return regex();
122+
}
123+
124+
125+
struct DFA {
126+
static int size, next[DFA_STATE][ALPHA];
127+
static int new_node() {
128+
memset(next[size], -1, sizeof(next[size]));
129+
return size++;
130+
}
131+
int begin, end[DFA_STATE];
132+
133+
bool run(const char *s) {
134+
int u = begin;
135+
for (; *s; ++s) {
136+
u = next[u][*s];
137+
if (u < 0) return false;
138+
}
139+
return end[u];
140+
}
141+
};
142+
int DFA::size = 0, DFA::next[DFA_STATE][ALPHA];
143+
144+
145+
DFA convert(NFA x) {
146+
DFA z;
147+
unordered_map<subset, int> states;
148+
vector<subset> process(1);
149+
x.closure(x.begin, process[0]);
150+
states[process[0]] = z.begin = z.new_node();
151+
while (!process.empty()) {
152+
auto S = process.back();
153+
process.pop_back();
154+
for (int a = 1; a < ALPHA; ++a) {
155+
subset T;
156+
for (int u = 0; u < x.size; ++u)
157+
if (S[u])
158+
for (int v: x.next[u][a])
159+
x.closure(v, T);
160+
if (T == 0) continue;
161+
if (!states.count(T)) {
162+
states[T] = z.new_node();
163+
z.end[states[T]] = T[x.end];
164+
process.push_back(T);
165+
}
166+
DFA::next[states[S]][a] = states[T];
167+
}
168+
}
169+
return z;
170+
}
171+
172+
173+
typedef long long ll;
174+
void mul(int n, ll A[], ll B[], ll M) {
175+
ll C[n*n];
176+
for (int i = 0; i < n*n; ++i) C[i] = 0;
177+
for (int i = 0; i < n; ++i)
178+
for (int k = 0; k < n; ++k)
179+
for (int j = 0; j < n; ++j)
180+
C[n*i+j] = (C[n*i+j] + A[n*i+k] * B[n*k+j]) % M;
181+
for (int i = 0; i < n*n; ++i) A[i] = C[i];
182+
}
183+
void mulvec(int n, ll A[], ll x[], ll M) {
184+
ll y[n];
185+
for (int i = 0; i < n; ++i) {
186+
y[i] = 0;
187+
for (int j = 0; j < n; ++j)
188+
y[i] = (y[i] + A[n*i+j] * x[j]) % M;
189+
}
190+
for (int i = 0; i < n; ++i) x[i] = y[i];
191+
}
192+
void powmul(int n, ll A[], ll k, ll x[], ll M) {
193+
for (; k > 0; k >>= 1) {
194+
if (k & 1) mulvec(n, A, x, M);
195+
mul(n, A, A, M);
196+
}
197+
}
198+
199+
const ll M = 1000000007;
200+
int main() {
201+
int ncase;
202+
scanf("%d", &ncase);
203+
for (int icase = 0; icase < ncase; ++icase) {
204+
DFA::size = NFA::size = 0;
205+
char s[1024];
206+
int l;
207+
scanf("%s %d", s, &l);
208+
DFA x = convert(parse(s));
209+
210+
ll A[x.size*x.size];
211+
for (int i = 0; i < x.size*x.size; ++i) A[i] = 0;
212+
213+
for (int i = 0; i < x.size; ++i)
214+
for (int a = 1; a <= ALPHA; ++a)
215+
if (x.next[i][a] >= 0)
216+
A[x.size*x.next[i][a]+i] = 1;
217+
218+
ll a[x.size];
219+
for (int i = 0; i < x.size; ++i) a[i] = 0;
220+
a[0] = 1;
221+
powmul(x.size, A, l, a, M);
222+
ll ans = 0;
223+
for (int i = 0; i < x.size; ++i) if (x.end[i]) {
224+
ans += a[i];
225+
if (ans >= M) ans -= M;
226+
}
227+
printf("%lld\n", ans);
228+
}
229+
}
230+

0 commit comments

Comments
 (0)