[SPOJ]LCS2

和LCS那题很像,但是现在有十个串了……

还是考虑构建一个串的SAM。然后考虑SAM上每个点,剩余若干个串都可能会覆盖一下这个点,我们要取最短的那个。

然后我们就直接把其他几个串都在SAM上跑一遍(方法类似于LCS)就行了,并且注意每个点对自己的祖先也是有贡献的。

代码:

#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <cctype>
#include <algorithm>
#include <utility>
#include <queue>
#include <unordered_set>
const int maxn = 100005;
const int INF = 0x3f3f3f3f;
const int maxno = maxn << 1;
struct Node {
  Node *fa; int len; int ans[12];
  Node *ch[26];
  Node *lc, *rb;
};
Node pool[maxno];
Node *rt, *last;
Node *cnt;
void init_pool() {
  rt = last = cnt = pool;
  rt -> fa = NULL; rt -> len = 0;
  rt -> ans[0] = 0;
  // memset(rt -> ans, 0, sizeof(rt -> ans));
}
Node *alloc_node(int len = 0, Node *fa = NULL) {
  Node *ret = ++ cnt;
  ret -> len = len; ret -> fa = fa;
  // memset(ret -> ans, 0, sizeof(ret -> ans));
  ret -> ans[0] = len;
  return ret;
}

int idx(char c) {
  return c - 'a';
}
void insert(char cc) {
  int c = idx(cc);
  Node *np = alloc_node(last -> len + 1);
  Node *p = last;
  while(p != NULL && p -> ch[c] == NULL) {
    p -> ch[c] = np;
    p = p -> fa;
  }
  if(!p) {
    np -> fa = rt;
  } else {
    Node *q = p -> ch[c];
    if(q -> len == p -> len + 1) {
      np -> fa = q;
    } else {
      Node *nq = alloc_node(p -> len + 1, q -> fa);
      memcpy(nq -> ch, q -> ch, sizeof(q -> ch));
      q -> fa = np -> fa = nq;
      while(p != NULL && p -> ch[c] == q) {
        p -> ch[c] = nq;
        p = p -> fa;
      }
    }
  }
  last = np;
}
void dfs(int id, Node *u) {
  for(Node *c = u -> lc; c; c = c -> rb) {
    dfs(id, c);
    u -> ans[id] = std::max(u -> ans[id], c -> ans[id]);
  }
}
void run(int id, char *S) {
  int n = strlen(S);
  Node *u = rt;
  int len = 0;
  for(int i = 0; i < n; i ++) {
    int c = idx(S[i]);
    if(u -> ch[c]) {
      len ++; u = u -> ch[c];
    } else {
      Node *p = u;
      while(p != NULL && p -> ch[c] == NULL) p = p -> fa;
      if(p == NULL) {
        len = 0; u = rt;
      } else {
        len = p -> len + 1; u = p -> ch[c];
      }
    }
    u -> ans[id] = std::max(u -> ans[id], len);
  }
  dfs(id, rt);
}
int tot = 0;
int dfs_2(Node *u) {
  int ans = *std::min_element(u -> ans, u -> ans + tot + 1);
  for(Node *c = u -> lc; c; c = c -> rb) {
    ans = std::max(ans, dfs_2(c));
  }
  return ans;
}

int main() {
  static char buf[maxn];
  scanf("%s", buf);
  init_pool();
  int n = strlen(buf);
  for(int i = 0; i < n; i ++) {
    insert(buf[i]);
  }
  for(int i = 1; i <= (cnt - pool); i ++) {
    Node *u = pool + i;
    Node *f = u -> fa;
    u -> rb = f -> lc;
    f -> lc = u;
  }
  while(scanf("%s", buf) == 1) {
#ifdef LOCAL
    if(buf[0] == '-') break;
#endif
    run(++ tot, buf);
    // memset(buf, 0, sizeof(buf));
  }
  printf("%d\n", dfs_2(rt));
  return 0;
}

[SPOJ]LCS

震惊!SAM与AC自动机间的神秘关系,竟然是为了……

考虑到是子串嘛,我们建立\(A\)的后缀自动机,考虑把\(B\)放在上面进行匹配。然后考虑当前节点下面再加一个字符,如果有这个字符的转移那么直接走过去即可。如果没有呢?

观察到SAM上的父亲其实就是一个被儿子包含了的子串,所以这一步走不下去,可以考虑去找父亲解决。所以找到最低的有该转移的祖先,转移过去即可。

代码:

#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <cctype>
#include <algorithm>
#include <utility>
#include <queue>
const int maxn = 250005;
const int maxsiz = 30000005;
char buf[maxsiz];
char *cur = buf;
void *alloc(size_t size) {
  if(cur - buf + size > maxsiz) {
    return malloc(size);
  } else {
    void *ret = (void*)cur;
    cur += size;
    return ret;
  }
}

int idx(char c) {
  return c - 'a';
}
struct Node {
  Node *fa;
  int len;
  Node *ch[26];
};
Node *alloc_node(int len = 0, Node *fa = NULL) {
  Node *ret = (Node*)alloc(sizeof(Node));
  ret -> len = len; ret -> fa = fa;
  memset(ret -> ch, 0, sizeof(ret -> ch));
  return ret;
}
Node *rt, *last;
void init_sam() {
  rt = alloc_node();
  last = rt;
}
void extend(char c) {
  int i = idx(c);
  Node *np = alloc_node(last -> len + 1);
  Node *p = last;
  while(p != NULL && p -> ch[i] == NULL) {
    p -> ch[i] = np;
    p = p -> fa;
  }
  if(p == NULL) {
    np -> fa = rt;
  } else {
    Node *q = p -> ch[i];
    if(q -> len == p -> len + 1) {
      np -> fa = q;
    } else {
      Node *nq = alloc_node(p -> len + 1, q -> fa);
      memcpy(nq -> ch, q -> ch, sizeof(q -> ch));
      np -> fa = q -> fa = nq;
      while(p != NULL && p -> ch[i] == q) {
        p -> ch[i] = nq;
        p = p -> fa;
      }
    }
  }
  last = np;
}

char A[maxn], B[maxn];
int search() {
  int n = strlen(B);
  Node *u = rt;
  int ans = 0, len = 0;
  for(int i = 0; i < n; i ++) {
    int c = idx(B[i]);
    if(u -> ch[c]) {
      u = u -> ch[c];
      len ++;
    } else {
      while(u != NULL && u -> ch[c] == NULL) u = u -> fa;
      if(u == NULL) {
        u = rt; len = 0;
      } else {
        len = u -> len + 1;
        u = u -> ch[c];
      }
    }
    ans = std::max(ans, len);
  }
  return ans;
}

int main() {
  scanf("%s%s", A, B);
  int n = strlen(A);
  init_sam();
  for(int i = 0; i < n; i ++) {
    extend(A[i]);
  }
  printf("%d\n", search());
  return 0;
}

[SPOJ]NSUBSTR

第一次做了一道有一定难度的SAM题……

考虑每个SAM上的结点\(x\),他的子串表示范围一定是一个区间\([Min(x), Max(x)]\),然后他的\(Right(x)\)的大小就是该点所代表的每个子串的出现次数。

那么考虑怎么搞出这个\(|Right(x)|\)。我们把每个“实点”(就是在SAM每一次插入后成为\(last\)的结点)的点权钦定为1,其他搞成0。然后每个点的\(|Right(x)|\)就是\(x\)在Right树上的子树的点权和。

然后接下来看上去可以直接做……但是区间chkmax这种黑暗操作,是不是要涉及毒瘤数据结构?

答案是否定的。任何短的子串都是某一个长子串的子串,所以说我们只需要在\(Max(x)\)那个地方chkmax,然后搞个后缀最大值即可。

代码:

#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <cctype>
#include <algorithm>
#include <utility>
#include <queue>
const int maxn = 250005;
const int sigma_siz = 26;
struct Node {
  int maxl, rs;
  Node *fa;
  Node *ch[sigma_siz];
};
Node pool[maxn << 1]; Node *tot = pool;
Node *rt = tot, *last = tot;
void extend(int c) {
  Node *np = ++ tot;
  np -> maxl = last -> maxl + 1; np -> rs = 1;
  Node *p = last;
  while(p != NULL && p -> ch[c] == NULL) {
    p -> ch[c] = np;
    p = p -> fa;
  }
  if(p == NULL) {
    np -> fa = rt;
  } else {
    Node *q = p -> ch[c];
    if(q -> maxl == p -> maxl + 1) {
      np -> fa = q;
    } else { 
      Node *nq = ++ tot;
      nq -> maxl = p -> maxl + 1; nq -> fa = q -> fa;
      memcpy(nq -> ch, q -> ch, sizeof(q -> ch));
      q -> fa = np -> fa = nq;
      while(p != NULL && p -> ch[c] == q) {
        p -> ch[c] = nq;
        p = p -> fa;
      }
    }
  }
  last = np;
}

int first[maxn << 1];
int next[maxn << 2], to[maxn << 2];
int gcnt = 0;
void add_edge(int u, int v) {
  gcnt ++;
  next[gcnt] = first[u];
  first[u] = gcnt;
  to[gcnt] = v;
}
int len[maxn << 1], siz[maxn << 1];
int F[maxn];
void dfs(int x) {
  for(int i = first[x]; i; i = next[i]) {
    int v = to[i];
    dfs(v);
    siz[x] += siz[v];
  }
  F[len[x]] = std::max(F[len[x]], siz[x]);
}

int main() {
  static char S[maxn];
  scanf("%s", S);
  int n = strlen(S);
  for(int i = 0; i < n; i ++) {
    extend(S[i] - 'a');
  }
  for(int i = 0; i <= (tot - pool); i ++) {
    Node *p = pool + i;
    len[i] = p -> maxl;
    siz[i] = p -> rs;
    if(p -> fa) {
      int fa = (p -> fa) - pool;
      add_edge(fa, i);
    }
  }
  dfs(0);
  for(int i = n - 1; i >= 0; i --) {
    F[i] = std::max(F[i], F[i + 1]);
  }
  for(int i = 1; i <= n; i ++) {
    printf("%d\n", F[i]);
  }
  return 0;
}

[BZOJ 2588]Count on a tree

泥萌感觉我还能说什么……

这题就是DFS序套主席树,顺便带上倍增LCA,但有两大坑点:

  1. 存放数据不一定要用long long,但输入必须要。
  2. 输出数据蛋疼,最后一行行末没回车。

代码:

继续阅读