算法篇——随机算法例题

165 阅读6分钟

随机算法homework

分别用KMP、Monte Carlo和Las Vegas算 法编写3个程序,并随机生成不小于5000 对、长度较长、且长度不等的01串X和Y (三个程序处理相同的串)

统计算法的执行时间、Monte Carlo算法的出错率,并根据运行结果对三种算法进行深入的比较

KMP算法

/**
 * @author SJ
 * @date 2020/11/3
 */
public class KMP {
    //模式串
    public static char[] pattern;
    //文本串
    public static char[] txt;

    //next数组
    public static int[] next;

    public KMP(char[] pattern1, char[] txt1) {
        pattern = pattern1;
        txt = txt1;
        next = new int[pattern.length];
    }

    //构造next数组
    public static void makeNext() {
        int length = pattern.length;
        next[0] = 0;
        for (int q = 1, k = 0; q < length; ++q) {
            while (k > 0 && pattern[q] != pattern[k])
                k = next[k - 1];
            if (pattern[q] == pattern[k])
                k++;
            next[q] = k;
        }
    }

    public static int kmp() {
        makeNext();
        for (int i = 0, j = 0; i < txt.length; ++i) {
            while (j > 0 && pattern[j] != txt[i])
                j = next[j - 1];

            if (pattern[j] == txt[i])
                j++;
            if (j == pattern.length)
                //找到的话就返回txt中 pattern的下标
                return i - pattern.length + 1;


        }
        return -1;
    }


}

Monte Carlo

/**
 * @author SJ
 * @date 2020/11/5
 */
public class MonteCarlo {
    //得到串chars的指纹
    //就是一个二进制字符串对p取余的结果
    public static int getIP(char[] chars, int from, int to, int p) {

        int ip1 = 0;
        for (int k = to; k >= from; k--) {
            ip1 += (chars[k] - '0') * (int) Math.pow(2, chars.length - 1 - k) % p;
            ip1 %= p;
        }
        return ip1;
    }

    //主串s和模式串t,,随机素数p
    //输出:模式串在主串的位置(下标)
    public static int monteCarlo(char[] txt, char[] pattern, int p) {
        int p_len = pattern.length;
        int patternIP = getIP(pattern, 0, p_len - 1, p);
        int txtIP = getIP(txt, 0, p_len - 1, p);
        int wp = 1;
        for (int i = 0; i < p_len; i++) {
            wp = wp * 2 % p;
        }
        int index = 0;
        while (index < txt.length - p_len) {
            if (patternIP == txtIP)
                return index;
            else {
                //根据上一步的txtIP计算向后挪一位之后的txtIP
                txtIP = (txtIP * 2 - wp * (txt[index] - '0') + txt[index + p_len] - '0') % p;
                index++;
            }
        }
        return -1;


    }

}

LasVegas

import java.util.Arrays;

/**
 * @author SJ
 * @date 2020/11/5
 */
public class LasVegas {
    public static int lasVegas(char[] txt, char[] pattern, int p) {
        int p_len = pattern.length;
        int patternIP = MonteCarlo.getIP(pattern, 0, p_len - 1, p);
        int txtIP = MonteCarlo.getIP(txt, 0, p_len - 1, p);
        int wp = 1;
        for (int i = 0; i < p_len; i++) {
            wp = wp * 2 % p;
        }
        int index = 0;
        while (index < txt.length - p_len) {
            char[] chars = Arrays.copyOfRange(txt, index, index + p_len);
            
            //当指纹相同时开始逐一判断是否确实两个字符串相等
            if (patternIP == txtIP && Arrays.toString(chars).equals(Arrays.toString(pattern)))
                return index;

            else {
                txtIP = (txtIP * 2 - wp * (txt[index] - '0') + txt[index + p_len] - '0') % p;
                index++;

            }
        }
        return -1;
    }


}

生成随机串:

import java.util.Arrays;
import java.util.Random;

/**
 * @author SJ
 * @date 2020/11/5
 */
public class RandomUtil {
//    随机生成不小于5000对、长度较长、且长度不等的01串X和Y
// X长度可为50、500、5000、50000位,Y不能太短


   //生成长度为length的01字符串
    public static String makeRandomString(int length){
        Random random = new Random();

        StringBuilder stringBuilder = new StringBuilder();
        for (int i = 0; i < length; i++) {
            //随机生成0-2之间的值(左闭右开)
            stringBuilder.append(random.nextInt(2));
        }
        return stringBuilder.toString();

    }
    public static boolean isPrime(int num){
        for (int i = 2; i <Math.sqrt(num) ; i++) {
            if (num%i==0)
                return false;
        }
        return true;
    }
    //随机产生一个从from到to的素数
    public static int makeRandomPrim(int from,int to){
        int bound=to-from;
        Random random = new Random();
        int i = random.nextInt(bound)+from;
        int temp=i;
        while (!isPrime(i)&&i>=from){
            i--;
        }

        if (!isPrime(i)){
            while (!isPrime(temp)&&temp<to)
                temp++;
            i=temp;
        }
        return i;


    }
    
//    public static void main(String[] args) {
//        for (int i = 0; i < 20; i++) {
//            int i1 = makeRandomPrim(10, 20);
//            System.out.println(i1);
//
//        }
//
//    }
}

先验证正确性:

/**
 * @author SJ
 * @date 2020/11/6
 */
public class TestResult {
    public static void main(String[] args) {
        String txt = "10101110";
        String pattern = "1011";
        for (int p = 2; p < 11 && RandomUtil.isPrime(p); p++) {
            System.out.println("选择的素数:" + p);
            new KMP(pattern.toCharArray(), txt.toCharArray());
            int kmp = KMP.kmp();
            System.out.println("kmp位置:" + kmp);
            int i = MonteCarlo.monteCarlo(txt.toCharArray(), pattern.toCharArray(), p);
            System.out.println("monte carlo位置:" + i);
            int i1 = LasVegas.lasVegas(txt.toCharArray(), pattern.toCharArray(), p);
            System.out.println("las vegas位置:" + i1);
            System.out.println("------------------------------------------------------------");
        }


    }
}

测试结果:

"C:\Program Files\Java\jdk1.8.0_131\bin\java.exe"
选择的素数:2
kmp位置:2
monte carlo位置:1
las vegas位置:2
------------------------------------------------------------
选择的素数:3
kmp位置:2
monte carlo位置:1
las vegas位置:2
------------------------------------------------------------
选择的素数:4
kmp位置:2
monte carlo位置:2
las vegas位置:2
------------------------------------------------------------
选择的素数:5
kmp位置:2
monte carlo位置:2
las vegas位置:2
------------------------------------------------------------

Process finished with exit code 0

las vegas的结果总是对的;monte carlo的结果不一定对,当素数的值越大,monte carlo的结果越准确。

比较效率:

/**
 * @author SJ
 * @date 2020/11/6
 */
public class Test {
    //并随机生成不小于5000对、长度较长、且长度不等的01串X和Y
    public static void main(String[] args) {
        int MaxCapacity = 5000;
        int[] txtStringLength = {50, 500, 5000, 50000};
        int[] patternStringLength = {35, 80, 200, 500};
        String[] txts = new String[MaxCapacity];
        String[] patterns = new String[MaxCapacity];
        int[] prims = new int[MaxCapacity];

        int[] kmpResult = new int[MaxCapacity];
        int[] MCResult = new int[MaxCapacity];
        int[] LVResult = new int[MaxCapacity];

        for (int j = 0; j < 4; j++) {
            // int j=0;
            for (int i = 0; i < MaxCapacity; i++) {
                txts[i] = RandomUtil.makeRandomString(txtStringLength[j]);
                patterns[i] = RandomUtil.makeRandomString(patternStringLength[j]);

                prims[i] = RandomUtil.makeRandomPrim(2, 10000);
            }

            //kmp算法
            System.out.println("进行5000次kmp运算,txt串长度为:" + txtStringLength[j] + ";pattern串长度为:" + patternStringLength[j]);
            long start = System.currentTimeMillis();
            for (int i = 0; i < MaxCapacity; i++) {
                String txt = txts[i];
                String pattern = patterns[i];
                new KMP(pattern.toCharArray(), txt.toCharArray());
                kmpResult[i] = KMP.kmp();

            }
            long end = System.currentTimeMillis();
            System.out.println("花费时间:" + (end - start) + "毫秒");

            //mc算法
            int McMiscount = 0;
            System.out.println("进行5000次蒙特卡洛运算,txt串长度为:" + txtStringLength[j] + ";pattern串长度为:" + patternStringLength[j]);
            long start1 = System.currentTimeMillis();
            for (int i = 0; i < MaxCapacity; i++) {
                String txt = txts[i];
                String pattern = patterns[i];
                int p = prims[i];
                MCResult[i] = MonteCarlo.monteCarlo(txt.toCharArray(), pattern.toCharArray(), p);
                if (MCResult[i] != kmpResult[i])
                    McMiscount++;


            }
            long end1 = System.currentTimeMillis();
            System.out.println("花费时间:" + (end1 - start1) + "毫秒");

            //lv算法
            int LvMiscount = 0;
            System.out.println("进行5000次lv运算,txt串长度为:" + txtStringLength[j] + ";pattern串长度为:" + patternStringLength[j]);
            long start2 = System.currentTimeMillis();
            for (int i = 0; i < MaxCapacity; i++) {
                String txt = txts[i];
                String pattern = patterns[i];
                int p = prims[i];
                LVResult[i] = LasVegas.lasVegas(txt.toCharArray(), pattern.toCharArray(), p);
                if (LVResult[i] != kmpResult[i])
                    LvMiscount++;

            }
            long end2 = System.currentTimeMillis();
            System.out.println("花费时间:" + (end2 - start2) + "毫秒");

            //误判率
            System.out.println("Monte Carlo的误判率为:" + (double) McMiscount / MaxCapacity + "%");
            System.out.println("Las Vegas的误判率为:" + (double) LvMiscount / MaxCapacity + "%");
            System.out.println("------------------------------------------------------------------");


        }

    }

}

测试结果:

"C:\Program Files\Java\jdk1.8.0_131\bin\java.exe" ...t
进行5000次kmp运算,txt串长度为:50;pattern串长度为:35
花费时间:25毫秒
进行5000次蒙特卡洛运算,txt串长度为:50;pattern串长度为:35
花费时间:74毫秒
进行5000次lv运算,txt串长度为:50;pattern串长度为:35
花费时间:64毫秒
Monte Carlo的误判率为:0.006%
Las Vegas的误判率为:0.0%
------------------------------------------------------------------
进行5000次kmp运算,txt串长度为:500;pattern串长度为:80
花费时间:50毫秒
进行5000次蒙特卡洛运算,txt串长度为:500;pattern串长度为:80
花费时间:157毫秒
进行5000次lv运算,txt串长度为:500;pattern串长度为:80
花费时间:192毫秒
Monte Carlo的误判率为:0.0128%
Las Vegas的误判率为:0.0%
------------------------------------------------------------------
进行5000次kmp运算,txt串长度为:5000;pattern串长度为:200
花费时间:259毫秒
进行5000次蒙特卡洛运算,txt串长度为:5000;pattern串长度为:200
花费时间:1083毫秒
进行5000次lv运算,txt串长度为:5000;pattern串长度为:200
花费时间:2118毫秒
Monte Carlo的误判率为:0.0242%
Las Vegas的误判率为:0.0%
------------------------------------------------------------------
进行5000次kmp运算,txt串长度为:50000;pattern串长度为:500
花费时间:1911毫秒
进行5000次蒙特卡洛运算,txt串长度为:50000;pattern串长度为:500
花费时间:4270毫秒
进行5000次lv运算,txt串长度为:50000;pattern串长度为:500
花费时间:29006毫秒
Monte Carlo的误判率为:0.0342%
Las Vegas的误判率为:0.0%
------------------------------------------------------------------

Process finished with exit code 0

测试了很多把,monte karlo的误判率取决于素数的取值,素数的取值越大,monte karlo的误判率趋近于0.

同时发现,素数的取值越大,程序的运行时间越长。

lasvegas找到的总是正确的结果,因为多了字符串比较的过程,所以花费时间相对来说更长。

为了直观的判断monte karlo确实会出错,所以把素数的范围调到了10000以下。

对于我的测试数据来看,kmp的效率更高。