Source :
Code :
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Aho-Corasick algorithm | |
#include<bits/stdc++.h> | |
using namespace std; | |
const int N = 200005; | |
const int K = 26; | |
int node; | |
int Next[K][N], fail[N], num[N]; | |
bool endMark[N], vis[N]; | |
void init() | |
{ | |
node = 0; | |
for(int i = 0; i < K; i++) { | |
for(int j = 0; j < N; j++) { | |
Next[i][j] = 0; | |
} | |
} | |
for(int i = 0; i < N; i++) { | |
endMark[i] = false; | |
vis[i] = false; | |
fail[i] = 0; | |
num[i] = 0; | |
} | |
} | |
void Insert(string s) | |
{ | |
int v = 0; | |
for(int i = 0; i < s.size(); i++) { | |
int id = s[i] - 'a'; | |
if(!vis[Next[id][v]]) { | |
Next[id][v] = ++node; | |
vis[node] = true; | |
} | |
v = Next[id][v]; | |
} | |
endMark[v] = true; | |
} | |
void build_fail() | |
{ | |
queue<int>q; | |
for(int i = 0; i < K; i++) { | |
if(vis[Next[i][0]]) { | |
q.push(Next[i][0]); // root's child | |
} | |
} | |
while(!q.empty()) { | |
int u = q.front(); | |
q.pop(); | |
for(int i = 0; i < K; i++) { | |
int v = Next[i][u]; | |
if(!vis[v]) continue; | |
q.push(v); | |
int p = fail[u]; | |
while(p && vis[Next[i][p]]==0) { | |
p = fail[p]; | |
} | |
fail[v] = Next[i][p]; | |
} | |
} | |
} | |
void Search(string s) | |
{ | |
int v = 0; | |
for(int i = 0; i < s.size(); i++) { | |
int id = s[i] - 'a'; | |
while(v && !vis[Next[id][v]]) { | |
v = fail[v]; | |
} | |
v = Next[id][v]; | |
int tmp = v; | |
while(tmp) { | |
num[tmp]++; | |
tmp = fail[tmp]; | |
} | |
} | |
} | |
int main() | |
{ | |
init(); | |
string s; | |
cin>>s; | |
int n; | |
cin>>n; | |
while(n--) { | |
string a; | |
cin>>a; | |
Insert(a); | |
} | |
build_fail(); | |
Search(s); | |
for(int i = 0; i <= node; i++) { | |
if(endMark[i]) { | |
cout<<num[i]<<endl; // number of occurance of a word | |
} | |
} | |
return 0; | |
} |
1. Find all sub-stings from a given set in a text & how many times they occurs in the text. (Implemented above)
2. Find the lexicographical smallest string of a given length that does not match any given strings.
3. Find the shortest string contain all given strings.
4. Find lexicographical smallest string of length L containing K strings.
No comments:
Post a Comment