抽奖算法: 别名算法、蓄水池算法

1,100 阅读3分钟

别名算法,将概率图形化为不同区域,通过随机数抽奖。

/**
 * ****************************************************************************
 *
 * <p>Author: Keith Schwarz (htiek@cs.stanford.edu)
 *
 * <p>An implementation of the alias method implemented using Vose's algorithm. The alias method
 * allows for efficient sampling of random values from a discrete probability distribution (i.e.
 * rolling a loaded die) in O(1) time each after O(n) preprocessing time.
 *
 * <p>For a complete writeup on the alias method, including the intuition and important proofs,
 * please see the article "Darts, Dice, and Coins: Smpling from a Discrete Distribution" at
 *
 * <p>http://www.keithschwarz.com/darts-dice-coins/
 */
public final class AliasMethod {
    /**
     * The random number generator used to sample from the distribution.
     */
    private final Random random;

    /**
     * The probability and alias tables.
     */
    private final int[] alias;

    private final double[] probability;

    /**
     * Constructs a new AliasMethod to sample from a discrete distribution and hand back outcomes
     * based on the probability distribution.
     *
     * <p>Given as input a list of probabilities corresponding to outcomes 0, 1, ..., n - 1, this
     * constructor creates the probability and alias tables needed to efficiently sample from this
     * distribution.
     *
     * @param probabilities The list of probabilities.
     */
    public AliasMethod(List<Double> probabilities) {
        this(probabilities, ThreadLocalRandom.current());
    }

    /**
     * Constructs a new AliasMethod to sample from a discrete distribution and hand back outcomes
     * based on the probability distribution.
     *
     * <p>Given as input a list of probabilities corresponding to outcomes 0, 1, ..., n - 1, along
     * with the random number generator that should be used as the underlying generator, this
     * constructor creates the probability and alias tables needed to efficiently sample from this
     * distribution.
     *
     * @param probabilities The list of probabilities.
     * @param random        The random number generator
     */
    public AliasMethod(List<Double> probabilities, Random random) {
        /* Begin by doing basic structural checks on the inputs. */
        if (probabilities == null || random == null) {
            throw new IllegalArgumentException("param is null.");
        }
        if (probabilities.isEmpty()) {
            throw new IllegalArgumentException("Probability vector must be nonempty.");
        }

        /* Allocate space for the probability and alias tables. */
        probability = new double[probabilities.size()];
        alias = new int[probabilities.size()];

        /* Store the underlying generator. */
        this.random = random;

        /* Compute the average probability and cache it for later use. */
        final double average = 1.0 / probabilities.size();

        /* Make a copy of the probabilities list, since we will be making
         * changes to it.
         */
        probabilities = new ArrayList<>(probabilities);

        /* Create two stacks to act as worklists as we populate the tables. */
        Deque<Integer> small = new ArrayDeque<>();
        Deque<Integer> large = new ArrayDeque<>();

        /* Populate the stacks with the input probabilities. */
        for (int i = 0; i < probabilities.size(); ++i) {
            /* If the probability is below the average probability, then we add
             * it to the small list; otherwise we add it to the large list.
             */
            if (probabilities.get(i) >= average) {
                large.add(i);

            } else {
                small.add(i);
            }
        }

        /* As a note: in the mathematical specification of the algorithm, we
         * will always exhaust the small list before the big list.  However,
         * due to floating point inaccuracies, this is not necessarily true.
         * Consequently, this inner loop (which tries to pair small and large
         * elements) will have to check that both lists aren't empty.
         */
        while (!small.isEmpty() && !large.isEmpty()) {
            /* Get the index of the small and the large probabilities. */
            int less = small.removeLast();
            int more = large.removeLast();

            /* These probabilities have not yet been scaled up to be such that
             * 1/n is given weight 1.0.  We do this here instead.
             */
            probability[less] = probabilities.get(less) * probabilities.size();
            alias[less] = more;

            /* Decrease the probability of the larger one by the appropriate
             * amount.
             */
            probabilities.set(more, (probabilities.get(more) + probabilities.get(less)) - average);

            /* If the new probability is less than the average, add it into the
             * small list; otherwise add it to the large list.
             */
            if (probabilities.get(more) >= 1.0 / probabilities.size()) {
                large.add(more);

            } else {
                small.add(more);
            }
        }

        /* At this point, everything is in one list, which means that the
         * remaining probabilities should all be 1/n.  Based on this, set them
         * appropriately.  Due to numerical issues, we can't be sure which
         * stack will hold the entries, so we empty both.
         */
        // 当入参的概率总和小于1(分数转换成小数计算时精度丢失)或大于1(四舍五入)时,直接取为1,概率有稍许变化
        while (!small.isEmpty()) {
            probability[small.removeLast()] = 1.0;
        }

        while (!large.isEmpty()) {
            probability[large.removeLast()] = 1.0;
        }
    }

    /**
     * Samples a value from the underlying distribution.
     *
     * @return A random value sampled from the underlying distribution.
     */
    public int next() {
        /* Generate a fair die roll to determine which column to inspect. */
        int column = random.nextInt(probability.length);

        /* Generate a biased coin toss to determine which option to pick. */
        boolean coinToss = random.nextDouble() < probability[column];

        /* Based on the outcome, return either the column or its alias. */
        return coinToss ? column : alias[column];
    }
}

蓄水池算法,可用来处理不确定有多少样本,从中等概率取样的场景,如参加活动的所有人,随机抽出来几个

private static final String ADD_ITEM_LUA =

        "local poolKey = KEYS[1]; \n" +

        "local countingKey = KEYS[2]; \n" +

        "local poolSize = tonumber(ARGV[1]); \n" +

        "local item = ARGV[2]; \n" +

        "local expire = ARGV[4]; \n" +

        "local uvCounting = tonumber(redis.call('incr', countingKey)); \n" + // 当前一共有多数人流经池子

        "local curPoolSize = tonumber(redis.call('llen', poolKey)); \n" + // 蓄水池(结果集)里有多少人

        "if curPoolSize < poolSize then \n" + // 结果集不足n个人,直接加到池子里

        " redis.call('lpush', poolKey, item); \n" +

        " redis.call('expire', poolKey, expire); \n" +

        " redis.call('expire', countingKey, expire); \n" +

        " return {'true'}; \n" +

        "end \n" +

        "local rseed = ARGV[3]; \n" + // 随机种子

        "math.randomseed(rseed); \n" +

        "local randomIndex = math.random(0,uvCounting - 1); " +

        "if randomIndex < poolSize then \n" + // 随机移除一个元素,把当前元素塞进去

        "  local randomItem = redis.call('lindex', poolKey, randomIndex); \n" +

        "  redis.call('lrem', poolKey, 1, randomItem); \n" +

        "  redis.call('lpush', poolKey, 1, item); \n" +

        "end" +

        "redis.call('expire', poolKey, expire); \n" +

        "redis.call('expire', countingKey, expire); \n" +

        "return {'true'}; \n"

        ;