Knuth-Morris-Pratt Algorithm in C++
KMP Algorithm for Pattern Searching - GeeksforGeeks | Knuth-Morris-Pratt Algorithm
Given a text txt[0..n-1] and a pattern pat[0..m-1], write a function search(char pat[], char txt[]) that prints all occurrences of pat[] in txt[]. You may assume that n > m.
The worst case complexity of the Naive algorithm is O(m(n-m+1)). The time complexity of KMP algorithm is O(n) in the worst case.
findPrefix(pattern, m, prefArray)
构造数组lps,lps[i]以pattern[i]为结尾长度为lps[i]的子字符串可以作为pattern的前缀。
Examples of lps[] construction:
For the pattern “AAAA”,
lps[] is [0, 1, 2, 3]
For the pattern “ABCDE”,
lps[] is [0, 0, 0, 0, 0]
For the pattern “AABAACAABAA”,
lps[] is [0, 1, 0, 1, 2, 0, 1, 2, 3, 4, 5]
For the pattern “AAACAAAAAC”,
lps[] is [0, 1, 2, 0, 1, 2, 3, 3, 3, 4]
For the pattern “AAABAAA”,
lps[] is [0, 1, 2, 0, 1, 2, 3]
求lps的伪代码:
findPrefix(pattern, m, prefArray)
Input − The pattern, the length of pattern and an array to store prefix location
Output − The array to store where prefixes are located
Begin
length := 0
prefArray[0] := 0
for all character index ‘i’ of pattern, do
if pattern[i] = pattern[length], then
increase length by 1
prefArray[i] := length
else
if length ≠ 0 then
length := prefArray[length - 1]
decrease i by 1
else
prefArray[i] := 0
done
End
kmpAlgorithm(text, pattern)
在获得lps数组后求匹配字符串的伪代码
kmpAlgorithm(text, pattern)
Input: The main text, and the pattern, which will be searched
Output − The location where patterns are found
Begin
n := size of text
m := size of pattern
call findPrefix(pattern, m, prefArray)
while i < n, do
if text[i] = pattern[j], then
increase i and j by 1
if j = m, then
print the location (i-j) as there is the pattern
j := prefArray[j-1]
else if i < n AND pattern[j] ≠ text[i] then
if j ≠ 0 then
j := prefArray[j - 1]
else
increase i by 1
done
End
例子:
txt = "ABABDABACDABABCABAB";
pattern = "ABABCABAB"
lps={0,0,1,2,0,1,2,3,4}
i=0,j=0
ABABDABACDABABCABAB
ABABCABAB
...
i=3,j=3
ABABDABACDABABCABAB
ABABCABAB
i=4,j=4
ABABDABACDABABCABAB
ABABCABAB
0,0,1,2,0,1,2,3,4
0,1,2,3,4,5,6,7,8
i=4,j = lps[j - 1]=lps[3]=2
ABABDABACDABABCABAB
ABABCABAB
0,0,1,2,0,1,2,3,4
0,1,2,3,4,5,6,7,8
i=4,j = lps[j - 1]=lps[2]=0
ABABDABACDABABCABAB
ABABCABAB
0,0,1,2,0,1,2,3,4
0,1,2,3,4,5,6,7,8
i=i+1=5,j = 0
ABABDABACDABABCABAB
ABABCABAB
0,0,1,2,0,1,2,3,4
0,1,2,3,4,5,6,7,8
...
#include<iostream>
#include<vector>
#include<unordered_map>
#include<algorithm>
#include <iomanip>
using namespace std;
// C++ program for implementation of KMP pattern searching
// algorithm
void computeLPSArray(char* pat, int M, int* lps);
// Prints occurrences of txt[] in pat[]
void KMPSearch(char* pat, char* txt)
{
int M = strlen(pat);
int N = strlen(txt);
// create lps[] that will hold the longest prefix suffix
// values for pattern
// int lps[M]; // 报错,M需要是常数,报错,使用超出生命周期的变量
int* lps = new int[M];
// Preprocess the pattern (calculate lps[] array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
int j = 0; // index for pat[]
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
printf("Found pattern at index %d ", i - j);
j = lps[j - 1];
}
// mismatch after j matches
else if (i < N && pat[j] != txt[i]) {
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
}
// Fills lps[] for given patttern pat[0..M-1]
void computeLPSArray(char* pat, int M, int* lps)
{
// length of the previous longest prefix suffix
int len = 0;
lps[0] = 0; // lps[0] is always 0
// the loop calculates lps[i] for i = 1 to M-1
int i = 1;
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0) {
len = lps[len - 1];
// Also, note that we do not increment
// i here
}
else // if (len == 0)
{
lps[i] = 0;
i++;
}
}
}
}
// Driver program to test above function
int main()
{
char txt[] = "ABABDABACDABABCABAB";
char pat[] = "ABABCABAB"; // lps={0,0,1,2,0,1,2,3,4}
KMPSearch(pat, txt);
return 0;
}
编程题 | leetcode 214. 最短回文串
给定一个字符串 s,你可以通过在字符串前面添加字符将其转换为回文串。找到并返回可以用这种方式转换的最短回文串。
思路简述:求s1="oonjk"的翻转字符串s2="kjnoo",求s2的后缀与s1的前缀的匹配。即串接字符串s=s1+"#"+s2(原字符串s中不包含"#"字符),应用求lps的算法。
class Solution {
public:
string shortestPalindrome(string s) {
// KMP算法中求lps数组的思想
int sl=s.size();
string s2(s);
reverse(s2.begin(),s2.end());
s+="#"+s2; // 注意:这一步的前提是原字符串s中不包含"#"字符
vector<int> lps(s.size(),0);
int len=0,i=1;
while(i<s.size())
{
if(s[i]==s[len])
{
lps[i++]=++len;
}
else if(len!=0)len=lps[len-1];
else i++;
}
s=s.substr(sl+1,sl-lps[lps.size()-1])+s.substr(0,sl);
return s;
}
};
又解:
class Solution {
public:
string shortestPalindrome(string s) {
string s2(s), s_new;
reverse(s2.begin(), s2.end());
s_new = s + "#" + s2;
int i = -1, j = 0; //i指向前缀下标初始-1,j指向后缀下标初始0
//next[j]表示j之前的字符串的最长前缀和后缀相等的字符个数,即最长前缀的下一个字符下标
vector<int> next(s_new.size() + 1, 0);
next[0] = -1;
//循环扫描字符串s_new
while (j < s_new.size())
{
if (i == -1 || s_new[i] == s_new[j])
{
i++;
j++;
next[j] = i;
}
else
i = next[i];
}
//将s_new的s2的与s非重复重复部分截取拼接上s就是最短回文串结果
return s_new.substr(s.size() + 1, s.size() - next[s_new.size()]) + s;
}
};