KMP 算法

leetcode-28. 找出字符串中第一个匹配项的下标

KMP 算法是一种用于字符串匹配的算法，其名字来源于算法的发明者 Knuth、Morris 和 Pratt 的姓氏首字母。在匹配过程中，它利用已经匹配过的部分信息，尽可能地减少不必要的比较次数，从而提高匹配的效率。 KMP 算法的核心是构建 next 数组，该数组保存的是模式串中最大公共前缀和最大公共后缀的长度(不包括整个字符串)。在匹配的时候，当出现字符匹配失败时，我们可以通过 next 数组找到模式串中下一个要匹配的位置，而无需回溯到正在匹配的字符串的开头重新匹配。 KMP算法的时间复杂度为 O(m+n)，其中 m 为模式串的长度，n 为文本串的长度。在文本串中找到一个模式串的匹配位置，KMP 算法是相对高效的算法，尤其是在模式串较长的情况下。

计算字符串的next数组

2023-6-2016:19:39.png20230620161930.png

// 判断str2是否是str1的子序列，如果是返回匹配的第一个位置下标。如果不是返回-1
function KMP(str1, str2) {
  if (str2.length > str1.length || str1 == null || str2 == null) return -1;
  // 获取next数组
  let nextArr = getNextArr(str2);
  let i1 = 0;
  let i2 = 0;
  while (i1 < str1.length && i2 < str2.length) {
    if (str1[i1] === str2[i2]) {
      i1++;
      i2++;
    } else if (i2 == 0) {
      i1++;
    } else {
      i2 = nextArr[i2];
    }
  }
  // 这里就是i1或者i2已经越界了
  return i2 == str2.length ? i1 - i2 : -1;
  function getNextArr(str) {
    if (str.length == 1) {
      return [-1];
    }
    let next = new Array(str.length);
    next[0] = -1;
    next[1] = 0;
    let n = 2; //n位置和cn比较,cn==next[n-1]
    let cn = 0;
    while (n < next.length) {
      if (str[n - 1] == str[cn]) {
        next[n++] = ++cn; //n++是求下一个位置,++cn是因为要在n-1位置上加加
      } else if (cn > 0) {
        //在当前cn的值和n-1不等,并且cn可以往前跳
        cn = next[cn];
      } else {
        // cn无法往前
        next[n++] = 0;
      }
    }
    return next;
  }
}
console.log(KMP("sadbutsad", "but"));

JavaScript实现KMP算法

KMP 算法