📄 scl_后缀数组.cc
字号:
Suffix Array 后缀数组
#include <algorithm>
using namespace std;
const int MAXLEN = 200100;
int n;//字符串长度
int id[MAXLEN], rank[MAXLEN]; //后缀排序数组,名次数组
int head[MAXLEN], b[MAXLEN], nb[MAXLEN], next[MAXLEN];
int height[MAXLEN];
//height[i]=lcp(suffix(id[i-1]),suffix(id[i]))
char str[MAXLEN];//字符串
bool cmp(const int &a, const int &b){
return str[a] < str[b];
}
//Suffix Sort 后缀排序,求出id,rank
void suffixSort(){
int i, j, k, h;
for (i = 0; i < n; i++) id[i] = i;
sort(id, id + n, cmp);
for (i = 0; i < n; i++)
if (i == 0 || str[id[i]] != str[id[i-1]]) b[id[i]] = i;
else b[id[i]] = b[id[i-1]];
for (h = 1; h < n; h <<= 1){
for (i = 0; i < n; i++) head[i] = next[i] = -1;
for (i = n - 1; i >= 0; i--) if (id[i]){
j = id[i] - h; if (j < 0) j += n;
next[j] = head[b[j]]; head[b[j]] = j;
}
j = n - h; next[j] = head[b[j]]; head[b[j]] = j;
for (i = k = 0; i < n; i++) if (head[i] >= 0)
for (j = head[i]; j >= 0; j = next[j]) id[k++] = j;
for (i = 0; i < n; i++) if (i > 0 && id[i] + h < n && id[i-1] + h < n
&& b[id[i]] == b[id[i-1]] && b[id[i] + h] == b[id[i-1]+h])
nb[id[i]] = nb[id[i-1]]; else nb[id[i]] = i;
for (i = 0; i < n; i++) b[i] = nb[i];
}
}
//求heighe数组,maxlcp为后缀中的最长公共前缀
void getHeight(){
int i, j, h;
height[0] = 0; maxlcp = 0;
for (i = 0; i < n; i++) rank[id[i]] = i;
for (h = 0, i = 0; i < n; i++)
if (rank[i] > 0){
j = id[rank[i]-1];
while (str[i+h] == str[j+h]) ++h;
height[rank[i]] = h;
maxlcp >?= h;
if (h > 0) --h;
}
}
//求两字符串最长公共子串
void LCS(){
gets(str);
int l1 = strlen(str);
str[l1] = '$';
gets(str + l1 + 1);
n = strlen(str);
suffixSort();
getHeight();
int ans = 0;
for (int i = 1; i < n; i++){
if (height[i] >= ans && id[i - 1] < l1 && id[i] > l1)
ans = height[i];
if (height[i] >= ans && id[i] < l1 && id[i - 1] > l1)
ans = height[i];
}
printf("%d\n", ans);
}
//求最长重复子串(不可重叠)
bool check(int l){
int tmin = n - id[0], tmax = n - id[0];
for (int i = 1; i < n; i++)
if (height[i] < l){
tmin = tmax = n - id[i];
}
else{
tmax >?= n - id[i];
tmin <?= n - id[i];
if (tmax - tmin > l) return true;
}
return false;
}
void calculate(){
int low = 0, high = maxlcp + 1 ,mid;
while (low + 1 < high){
mid = (low + high) / 2;
if (check(mid)) low = mid; else high = mid;
}
printf("%d\n", low);
}
//最长K重复子串(可重叠)
bool check(int l){
int i, t = (n - id[0] >= l);
for (i = 1; i < n; i++){
if (height[i] >= l && n - id[i] >= l) t++;
else t = (n - id[i] >= l);
if (t >= k) return true;
}
return false;
}
void calculate(){
int low = 0, high = maxlcp + 1 ,mid;
while (low + 1 < high){
mid = (low + high) / 2;
if (check(mid)) low = mid; else high = mid;
}
printf("%d\n", low);
}
//询问后缀排名为x和y之间的lcp
int r[20][MAXLEN];
void rmq(){
int i, k;
for (i = 0; i < n; i++) r[0][i] = height[i];
for (k = 1; (1 << k) <= n; k++)
for (i = 0; i < n; i++){
r[k][i] = r[k-1][i];
if (i + (1 << k - 1) < n && r[k-1][i+(1<<k-1)] < r[k][i])
r[k][i] = r[k-1][i+(1<<k-1)];
}
}
int askLcp(int x, int y){
if (x > y) swap(x, y);
int k = 0;
while ((1 << k ) <= (y - x)) k++;
k--;
return min(r[k][x+1], r[k][y-(1<<k)+1]);
}
//mth Longest Commen Substring
//poj2324
bool init(){
scanf("%d", &m);
if (m == 0) return false;
gets(ss); gets(ss);
memset(s, 0, sizeof(s)); n = 0;
while (gets(ss) && strcmp(ss ,"END TDP CODEBASE")){
strcpy(s + n, ss);
n += strlen(ss);
strcpy(s + n,"\n");
n++;
}
l1 = n;
s[n++] = '~';
gets(ss);
while (gets(ss) && strcmp(ss, "END JCN CODEBASE")){
strcpy(s + n, ss);
n += strlen(ss);
strcpy(s + n,"\n");
n++;
}
return true;
}
struct Answer{
int pos, len;
bool operator < (const Answer &b) const{
if (len != b.len) return len > b.len;
return pos < b.pos;
}
} ans[MAXLEN];
int num;
void calculate(){
int i, j, k, p, u, v;
memset(h, 0, sizeof(h));
for (i = 0; i < n; i++) if (i < n - 1 && id[i] < l1 && id[i+1] > l1){
k = MAXLEN;
for (j = i + 1; j < n; j++){
if (id[j] < l1) break;
if (height[j] < k) k = height[j]; h[j] = k;
}
i = j - 1;
}
for (i = n - 1; i > 0; i--) if (id[i] < l1 && id[i-1] > l1){
k = MAXLEN;
for (j = i - 1; j >= 0; j--){
if (id[j] < l1) break;
if (height[j+1] < k) k = height[j+1];
if (k > h[j]) h[j] = k;
}
i = j + 1;
}
num = 0;
for (i = 0; i < n; i++)
if (h[rank[i]] != 0 && (i == 0 || h[rank[i-1]] <= h[rank[i]])){
k = rank[i];
ans[num].pos = id[k]; ans[num].len = h[k];
num++;
}
sort(ans, ans + num);
if (cases) printf("\n");
printf("CASE %d\n", ++cases);
for (i = j = 0; i < num && j < m; i++, j++){
char ch = s[ans[i].pos + h[rank[ans[i].pos]]];
printf("INFRINGING SEGMENT %d LENGTH %d POSITION %d\n", j+1, ans[i].len, ans[i].pos - l1 - 1);
s[ans[i].pos + h[rank[ans[i].pos]]] = 0;
puts(s + ans[i].pos);
s[ans[i].pos + h[rank[ans[i].pos]]] = ch;
}
}
void solve(){
suffixSort();
getHeight();
calculate();
}
int main(){
cases = 0;
while (init()) solve();
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -