Source :
Code :
// O(n) | |
#include<bits/stdc++.h> | |
using namespace std; | |
#define ll long long | |
const int N = 100005; | |
struct state { | |
int len, link; | |
map<char, int> Next; | |
}; | |
state st[3 * N]; | |
int sz, last; | |
void init() | |
{ | |
st[0].len = 0; | |
st[0].link = -1; | |
sz = 0; | |
last = 0; | |
} | |
void SA_Extend(char ch) | |
{ | |
int curr = ++sz; | |
st[curr].len = st[last].len + 1; | |
int p = last; | |
while(p!=-1 && !st[p].Next.count(ch)) { | |
st[p].Next[ch] = curr; | |
p = st[p].link; | |
} | |
if(p==-1) { | |
st[curr].link = 0; | |
} | |
else { | |
int q = st[p].Next[ch]; | |
if(st[p].len + 1==st[q].len) { | |
st[curr].link = q; | |
} | |
else { | |
int clone = ++sz; | |
st[clone].len = st[p].len + 1; | |
st[clone].Next = st[q].Next; | |
st[clone].link = st[q].link; | |
while(p!=-1 && st[p].Next[ch]==q) { | |
st[p].Next[ch] = clone; | |
p = st[p].link; | |
} | |
st[q].link = st[curr].link = clone; | |
} | |
} | |
last = curr; | |
} | |
int main() | |
{ | |
init(); | |
string s; | |
cin>>s; | |
for(int i = 0; i < s.size(); i++) { | |
SA_Extend(s[i]); | |
} | |
return 0; | |
} |
Application :
1. Find pattern P in text T in O(|T| + |P|).
:: Build the automaton then loop through P and find if there exists transitions.
2. Find number of distinct sub-strings in S.
:: These can be calculated recursively by calculating for each node the number of different paths starting from that node. The number of different paths starting from a node is the sum of the corresponding numbers of its direct successors, plus 1 corresponding to the path that does not leave the node.
int dfs(int u) | |
{ | |
int &res = dp[u]; | |
if(res) return res; | |
res = 1; | |
for(auto ch : st[u].Next) { | |
int v = ch.second; | |
res += dfs(v); | |
} | |
return res; | |
} |
3. Total length of all distinct sub-strings in S.
:: From previous code we know value of dp[i]. Now, we will do same thing here ans[u] += ans[v] + dp[v].
4. The lexicography Kth sub-string in S. (only distinct sub-strings / all possible sub-strings)
:: (distinct) Pick the transition with smallest character then if the path length becomes K, we will return the character and take all visited characters from that path.
void Klex(int ver) | |
{ | |
for(char ch = 'a'; ch <= 'z'; ch++) { | |
if(st[ver].Next[ch]) { | |
path++; | |
if(path==k) { | |
ans.push_back(ch); | |
return; | |
} | |
Klex(st[ver].Next[ch]); | |
if(path==k) { | |
ans.push_back(ch); | |
return; | |
} | |
} | |
} | |
} |
:: (Not distinct) First we will find all occurrence of each sub-strings. Then we will Find Kth smallest sub-string. See this.
[Let cnt[v] is the For all the states v number of occurrence of the sub-strings associated with v.
Initially if state v was not cloned then cnt[v] = 1 , otherwise it’s 0.
We go through each state in descending order of length. and for each
state we do cnt[link[v]]+=cnt[v]; After that , if u is the state
corresponding to pattern P , then cnt[u] is the ans.]
5. (...)
No comments:
Post a Comment