neal2018 / blog

some blogs and some code collections
Other
3 stars 0 forks source link

KMP #5

Open neal2018 opened 3 years ago

neal2018 commented 3 years ago
def kmp(txt: str, pat: str) -> None:
    # print all the match index
    if len(txt) < len(pat):
        return kmp(pat, txt)

    n = len(txt)
    m = len(pat)

    # find next list of `pat`
    # nex[i]: longest common prefix & suffix for pat[0]...pat[i]
    # the common prefix & suffix cannot be the whole string
    # e.g. pat = "ABABC", nex = [0, 0, 1, 2, 0]
    # e.g. pat = "ABABAC", nex = [0, 0, 1, 2, 3, 0]
    nex = [0]*m
    i = 1 # the current index to calculate
    # nex[0] = 0 to prevent loop
    j = 0 # the length of current common prefix & suffix
    while i < m:
        if pat[i] == pat[j]:
            j += 1
            nex[i] = j
            i += 1
        elif j != 0:
            # match fail, jump to next possible
            j = nex[j-1]
        else:
            i += 1

    # kmp
    i = 0 # index for txt
    j = 0 # index for pat
    while i < n:
        if txt[i] == pat[j]:
            j += 1
            i += 1
            if j == m:
                # found one match
                print(i-j)
                j = nex[j-1]
        elif j != 0:
            j = nex[j-1]
        else:
            i += 1

if __name__ == "__main__":
    txt = "ABABDABACDABABCABAB"
    pat = "ABACD"
    kmp(txt, pat)
neal2018 commented 3 years ago
#include <bits/stdc++.h>
using namespace std;

void kmp(string_view txt, string_view pat) {
  if (txt.size() < pat.size()) kmp(pat, txt);
  int n = int(txt.size()), m = int(pat.size());
  vector<int> nex(m);  // nex[i]: longest common prefix & suffix for pat[0..i]
  for (int i = 1, j = 0; i < m; i++) {
    while (j && pat[j] != pat[i]) j = nex[j - 1];
    if (pat[j] == pat[i]) j++;
    nex[i] = j;
  }
  for (int i = 0, j = 0; i < n; i++) {
    while (j && pat[j] != txt[i]) j = nex[j - 1];
    if (pat[j] == txt[i]) j++;
    if (j == m) {
      cout << i - m + 1 << endl;  // start index of pat
      j = nex[j - 1];
    }
  }
}

int main() {
  string s, p;
  cin >> s >> p;
  kmp(s, p);
  return 0;
}
vector<int> get_next(string_view pat) {
  int m = int(pat.size());
  vector<int> nex(m);  // nex[i]: LC prefix & suffix for pat[0..i]
  for (int i = 1, j = 0; i < m; i++) {
    while (j && pat[j] != pat[i]) j = nex[j - 1];
    if (pat[j] == pat[i]) j++;
    nex[i] = j;
  }
  return nex;
}

KMP optimized

auto get_next(string_view pat) {
  int m = int(pat.length());
  vector<int> nex(m), fail(m);
  for (int i = 1, j = 0; i < m; i++) {
    while (j && pat[j] != pat[i]) j = nex[j - 1];
    if (pat[j] == pat[i]) {
      j++;
    }
    nex[i] = (i < m - 1 && pat[i + 1] == pat[j]) ? fail[max(j - 1, 0)] : j;
  }
  return pair{nex, fail};
}
neal2018 commented 3 years ago

AC Automaton

struct ACAM {
  const int SZ = 26;
  vector<vector<int>> e = {vector<int>(SZ)};
  vector<int> fail = {0}, end = {0}, fast = {0};  // closest end

  int insert(string& s) {
    int p = 0;
    for (auto c : s) {
      c -= 'a';
      if (!e[p][c]) {
        e.emplace_back(SZ), fail.emplace_back();
        end.emplace_back(), fast.emplace_back();
        e[p][c] = int(e.size()) - 1;
      }
      p = e[p][c];
    }
    end[p]++;
    return p;
  }

  void build() {
    queue<int> q;
    for (int i = 0; i < SZ; i++) {
      if (e[0][i]) q.push(e[0][i]);
    }
    while (!q.empty()) {
      int p = q.front();
      q.pop();
      fast[p] = end[p] ? p : fast[fail[p]];
      for (int i = 0; i < SZ; i++) {
        if (e[p][i]) {
          fail[e[p][i]] = e[fail[p]][i];
          end[e[p][i]] += end[fail[e[p][i]]];
          q.push(e[p][i]);
        } else {
          e[p][i] = e[fail[p]][i];
        }
      }
    }
  }
};
neal2018 commented 2 years ago

Z function, lcp of s and s[i:]

#include <bits/stdc++.h>
using namespace std;
#define ll long long

vector<int> z_function(string_view s) {
  int n = (int)s.size();
  vector<int> z(n);
  for (int i = 1, l = 0, r = 0; i < n; ++i) {
    if (i <= r) z[i] = min(r - i + 1, z[i - l]);
    while (i + z[i] < n && s[z[i]] == s[i + z[i]]) ++z[i];
    if (i + z[i] - 1 > r) l = i, r = i + z[i] - 1;
  }
  return z;
}