随机算法homework
分别用KMP、Monte Carlo和Las Vegas算 法编写3个程序,并随机生成不小于5000 对、长度较长、且长度不等的01串X和Y (三个程序处理相同的串)
统计算法的执行时间、Monte Carlo算法的出错率,并根据运行结果对三种算法进行深入的比较
KMP算法
/**
* @author SJ
* @date 2020/11/3
*/
public class KMP {
//模式串
public static char[] pattern;
//文本串
public static char[] txt;
//next数组
public static int[] next;
public KMP(char[] pattern1, char[] txt1) {
pattern = pattern1;
txt = txt1;
next = new int[pattern.length];
}
//构造next数组
public static void makeNext() {
int length = pattern.length;
next[0] = 0;
for (int q = 1, k = 0; q < length; ++q) {
while (k > 0 && pattern[q] != pattern[k])
k = next[k - 1];
if (pattern[q] == pattern[k])
k++;
next[q] = k;
}
}
public static int kmp() {
makeNext();
for (int i = 0, j = 0; i < txt.length; ++i) {
while (j > 0 && pattern[j] != txt[i])
j = next[j - 1];
if (pattern[j] == txt[i])
j++;
if (j == pattern.length)
//找到的话就返回txt中 pattern的下标
return i - pattern.length + 1;
}
return -1;
}
}
Monte Carlo
/**
* @author SJ
* @date 2020/11/5
*/
public class MonteCarlo {
//得到串chars的指纹
//就是一个二进制字符串对p取余的结果
public static int getIP(char[] chars, int from, int to, int p) {
int ip1 = 0;
for (int k = to; k >= from; k--) {
ip1 += (chars[k] - '0') * (int) Math.pow(2, chars.length - 1 - k) % p;
ip1 %= p;
}
return ip1;
}
//主串s和模式串t,,随机素数p
//输出:模式串在主串的位置(下标)
public static int monteCarlo(char[] txt, char[] pattern, int p) {
int p_len = pattern.length;
int patternIP = getIP(pattern, 0, p_len - 1, p);
int txtIP = getIP(txt, 0, p_len - 1, p);
int wp = 1;
for (int i = 0; i < p_len; i++) {
wp = wp * 2 % p;
}
int index = 0;
while (index < txt.length - p_len) {
if (patternIP == txtIP)
return index;
else {
//根据上一步的txtIP计算向后挪一位之后的txtIP
txtIP = (txtIP * 2 - wp * (txt[index] - '0') + txt[index + p_len] - '0') % p;
index++;
}
}
return -1;
}
}
LasVegas
import java.util.Arrays;
/**
* @author SJ
* @date 2020/11/5
*/
public class LasVegas {
public static int lasVegas(char[] txt, char[] pattern, int p) {
int p_len = pattern.length;
int patternIP = MonteCarlo.getIP(pattern, 0, p_len - 1, p);
int txtIP = MonteCarlo.getIP(txt, 0, p_len - 1, p);
int wp = 1;
for (int i = 0; i < p_len; i++) {
wp = wp * 2 % p;
}
int index = 0;
while (index < txt.length - p_len) {
char[] chars = Arrays.copyOfRange(txt, index, index + p_len);
//当指纹相同时开始逐一判断是否确实两个字符串相等
if (patternIP == txtIP && Arrays.toString(chars).equals(Arrays.toString(pattern)))
return index;
else {
txtIP = (txtIP * 2 - wp * (txt[index] - '0') + txt[index + p_len] - '0') % p;
index++;
}
}
return -1;
}
}
生成随机串:
import java.util.Arrays;
import java.util.Random;
/**
* @author SJ
* @date 2020/11/5
*/
public class RandomUtil {
// 随机生成不小于5000对、长度较长、且长度不等的01串X和Y
// X长度可为50、500、5000、50000位,Y不能太短
//生成长度为length的01字符串
public static String makeRandomString(int length){
Random random = new Random();
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < length; i++) {
//随机生成0-2之间的值(左闭右开)
stringBuilder.append(random.nextInt(2));
}
return stringBuilder.toString();
}
public static boolean isPrime(int num){
for (int i = 2; i <Math.sqrt(num) ; i++) {
if (num%i==0)
return false;
}
return true;
}
//随机产生一个从from到to的素数
public static int makeRandomPrim(int from,int to){
int bound=to-from;
Random random = new Random();
int i = random.nextInt(bound)+from;
int temp=i;
while (!isPrime(i)&&i>=from){
i--;
}
if (!isPrime(i)){
while (!isPrime(temp)&&temp<to)
temp++;
i=temp;
}
return i;
}
// public static void main(String[] args) {
// for (int i = 0; i < 20; i++) {
// int i1 = makeRandomPrim(10, 20);
// System.out.println(i1);
//
// }
//
// }
}
先验证正确性:
/**
* @author SJ
* @date 2020/11/6
*/
public class TestResult {
public static void main(String[] args) {
String txt = "10101110";
String pattern = "1011";
for (int p = 2; p < 11 && RandomUtil.isPrime(p); p++) {
System.out.println("选择的素数:" + p);
new KMP(pattern.toCharArray(), txt.toCharArray());
int kmp = KMP.kmp();
System.out.println("kmp位置:" + kmp);
int i = MonteCarlo.monteCarlo(txt.toCharArray(), pattern.toCharArray(), p);
System.out.println("monte carlo位置:" + i);
int i1 = LasVegas.lasVegas(txt.toCharArray(), pattern.toCharArray(), p);
System.out.println("las vegas位置:" + i1);
System.out.println("------------------------------------------------------------");
}
}
}
测试结果:
"C:\Program Files\Java\jdk1.8.0_131\bin\java.exe"
选择的素数:2
kmp位置:2
monte carlo位置:1
las vegas位置:2
------------------------------------------------------------
选择的素数:3
kmp位置:2
monte carlo位置:1
las vegas位置:2
------------------------------------------------------------
选择的素数:4
kmp位置:2
monte carlo位置:2
las vegas位置:2
------------------------------------------------------------
选择的素数:5
kmp位置:2
monte carlo位置:2
las vegas位置:2
------------------------------------------------------------
Process finished with exit code 0
las vegas的结果总是对的;monte carlo的结果不一定对,当素数的值越大,monte carlo的结果越准确。
比较效率:
/**
* @author SJ
* @date 2020/11/6
*/
public class Test {
//并随机生成不小于5000对、长度较长、且长度不等的01串X和Y
public static void main(String[] args) {
int MaxCapacity = 5000;
int[] txtStringLength = {50, 500, 5000, 50000};
int[] patternStringLength = {35, 80, 200, 500};
String[] txts = new String[MaxCapacity];
String[] patterns = new String[MaxCapacity];
int[] prims = new int[MaxCapacity];
int[] kmpResult = new int[MaxCapacity];
int[] MCResult = new int[MaxCapacity];
int[] LVResult = new int[MaxCapacity];
for (int j = 0; j < 4; j++) {
// int j=0;
for (int i = 0; i < MaxCapacity; i++) {
txts[i] = RandomUtil.makeRandomString(txtStringLength[j]);
patterns[i] = RandomUtil.makeRandomString(patternStringLength[j]);
prims[i] = RandomUtil.makeRandomPrim(2, 10000);
}
//kmp算法
System.out.println("进行5000次kmp运算,txt串长度为:" + txtStringLength[j] + ";pattern串长度为:" + patternStringLength[j]);
long start = System.currentTimeMillis();
for (int i = 0; i < MaxCapacity; i++) {
String txt = txts[i];
String pattern = patterns[i];
new KMP(pattern.toCharArray(), txt.toCharArray());
kmpResult[i] = KMP.kmp();
}
long end = System.currentTimeMillis();
System.out.println("花费时间:" + (end - start) + "毫秒");
//mc算法
int McMiscount = 0;
System.out.println("进行5000次蒙特卡洛运算,txt串长度为:" + txtStringLength[j] + ";pattern串长度为:" + patternStringLength[j]);
long start1 = System.currentTimeMillis();
for (int i = 0; i < MaxCapacity; i++) {
String txt = txts[i];
String pattern = patterns[i];
int p = prims[i];
MCResult[i] = MonteCarlo.monteCarlo(txt.toCharArray(), pattern.toCharArray(), p);
if (MCResult[i] != kmpResult[i])
McMiscount++;
}
long end1 = System.currentTimeMillis();
System.out.println("花费时间:" + (end1 - start1) + "毫秒");
//lv算法
int LvMiscount = 0;
System.out.println("进行5000次lv运算,txt串长度为:" + txtStringLength[j] + ";pattern串长度为:" + patternStringLength[j]);
long start2 = System.currentTimeMillis();
for (int i = 0; i < MaxCapacity; i++) {
String txt = txts[i];
String pattern = patterns[i];
int p = prims[i];
LVResult[i] = LasVegas.lasVegas(txt.toCharArray(), pattern.toCharArray(), p);
if (LVResult[i] != kmpResult[i])
LvMiscount++;
}
long end2 = System.currentTimeMillis();
System.out.println("花费时间:" + (end2 - start2) + "毫秒");
//误判率
System.out.println("Monte Carlo的误判率为:" + (double) McMiscount / MaxCapacity + "%");
System.out.println("Las Vegas的误判率为:" + (double) LvMiscount / MaxCapacity + "%");
System.out.println("------------------------------------------------------------------");
}
}
}
测试结果:
"C:\Program Files\Java\jdk1.8.0_131\bin\java.exe" ...t
进行5000次kmp运算,txt串长度为:50;pattern串长度为:35
花费时间:25毫秒
进行5000次蒙特卡洛运算,txt串长度为:50;pattern串长度为:35
花费时间:74毫秒
进行5000次lv运算,txt串长度为:50;pattern串长度为:35
花费时间:64毫秒
Monte Carlo的误判率为:0.006%
Las Vegas的误判率为:0.0%
------------------------------------------------------------------
进行5000次kmp运算,txt串长度为:500;pattern串长度为:80
花费时间:50毫秒
进行5000次蒙特卡洛运算,txt串长度为:500;pattern串长度为:80
花费时间:157毫秒
进行5000次lv运算,txt串长度为:500;pattern串长度为:80
花费时间:192毫秒
Monte Carlo的误判率为:0.0128%
Las Vegas的误判率为:0.0%
------------------------------------------------------------------
进行5000次kmp运算,txt串长度为:5000;pattern串长度为:200
花费时间:259毫秒
进行5000次蒙特卡洛运算,txt串长度为:5000;pattern串长度为:200
花费时间:1083毫秒
进行5000次lv运算,txt串长度为:5000;pattern串长度为:200
花费时间:2118毫秒
Monte Carlo的误判率为:0.0242%
Las Vegas的误判率为:0.0%
------------------------------------------------------------------
进行5000次kmp运算,txt串长度为:50000;pattern串长度为:500
花费时间:1911毫秒
进行5000次蒙特卡洛运算,txt串长度为:50000;pattern串长度为:500
花费时间:4270毫秒
进行5000次lv运算,txt串长度为:50000;pattern串长度为:500
花费时间:29006毫秒
Monte Carlo的误判率为:0.0342%
Las Vegas的误判率为:0.0%
------------------------------------------------------------------
Process finished with exit code 0
测试了很多把,monte karlo的误判率取决于素数的取值,素数的取值越大,monte karlo的误判率趋近于0.
同时发现,素数的取值越大,程序的运行时间越长。
lasvegas找到的总是正确的结果,因为多了字符串比较的过程,所以花费时间相对来说更长。
为了直观的判断monte karlo确实会出错,所以把素数的范围调到了10000以下。
对于我的测试数据来看,kmp的效率更高。