别名算法,将概率图形化为不同区域,通过随机数抽奖。
- 斯坦福论文及实现地址:论文www.keithschwarz.com/darts-dice-…
- 通俗解释:blog.csdn.net/haolexiao/a…
- 实现
/**
* ****************************************************************************
*
* <p>Author: Keith Schwarz (htiek@cs.stanford.edu)
*
* <p>An implementation of the alias method implemented using Vose's algorithm. The alias method
* allows for efficient sampling of random values from a discrete probability distribution (i.e.
* rolling a loaded die) in O(1) time each after O(n) preprocessing time.
*
* <p>For a complete writeup on the alias method, including the intuition and important proofs,
* please see the article "Darts, Dice, and Coins: Smpling from a Discrete Distribution" at
*
* <p>http://www.keithschwarz.com/darts-dice-coins/
*/
public final class AliasMethod {
/**
* The random number generator used to sample from the distribution.
*/
private final Random random;
/**
* The probability and alias tables.
*/
private final int[] alias;
private final double[] probability;
/**
* Constructs a new AliasMethod to sample from a discrete distribution and hand back outcomes
* based on the probability distribution.
*
* <p>Given as input a list of probabilities corresponding to outcomes 0, 1, ..., n - 1, this
* constructor creates the probability and alias tables needed to efficiently sample from this
* distribution.
*
* @param probabilities The list of probabilities.
*/
public AliasMethod(List<Double> probabilities) {
this(probabilities, ThreadLocalRandom.current());
}
/**
* Constructs a new AliasMethod to sample from a discrete distribution and hand back outcomes
* based on the probability distribution.
*
* <p>Given as input a list of probabilities corresponding to outcomes 0, 1, ..., n - 1, along
* with the random number generator that should be used as the underlying generator, this
* constructor creates the probability and alias tables needed to efficiently sample from this
* distribution.
*
* @param probabilities The list of probabilities.
* @param random The random number generator
*/
public AliasMethod(List<Double> probabilities, Random random) {
/* Begin by doing basic structural checks on the inputs. */
if (probabilities == null || random == null) {
throw new IllegalArgumentException("param is null.");
}
if (probabilities.isEmpty()) {
throw new IllegalArgumentException("Probability vector must be nonempty.");
}
/* Allocate space for the probability and alias tables. */
probability = new double[probabilities.size()];
alias = new int[probabilities.size()];
/* Store the underlying generator. */
this.random = random;
/* Compute the average probability and cache it for later use. */
final double average = 1.0 / probabilities.size();
/* Make a copy of the probabilities list, since we will be making
* changes to it.
*/
probabilities = new ArrayList<>(probabilities);
/* Create two stacks to act as worklists as we populate the tables. */
Deque<Integer> small = new ArrayDeque<>();
Deque<Integer> large = new ArrayDeque<>();
/* Populate the stacks with the input probabilities. */
for (int i = 0; i < probabilities.size(); ++i) {
/* If the probability is below the average probability, then we add
* it to the small list; otherwise we add it to the large list.
*/
if (probabilities.get(i) >= average) {
large.add(i);
} else {
small.add(i);
}
}
/* As a note: in the mathematical specification of the algorithm, we
* will always exhaust the small list before the big list. However,
* due to floating point inaccuracies, this is not necessarily true.
* Consequently, this inner loop (which tries to pair small and large
* elements) will have to check that both lists aren't empty.
*/
while (!small.isEmpty() && !large.isEmpty()) {
/* Get the index of the small and the large probabilities. */
int less = small.removeLast();
int more = large.removeLast();
/* These probabilities have not yet been scaled up to be such that
* 1/n is given weight 1.0. We do this here instead.
*/
probability[less] = probabilities.get(less) * probabilities.size();
alias[less] = more;
/* Decrease the probability of the larger one by the appropriate
* amount.
*/
probabilities.set(more, (probabilities.get(more) + probabilities.get(less)) - average);
/* If the new probability is less than the average, add it into the
* small list; otherwise add it to the large list.
*/
if (probabilities.get(more) >= 1.0 / probabilities.size()) {
large.add(more);
} else {
small.add(more);
}
}
/* At this point, everything is in one list, which means that the
* remaining probabilities should all be 1/n. Based on this, set them
* appropriately. Due to numerical issues, we can't be sure which
* stack will hold the entries, so we empty both.
*/
// 当入参的概率总和小于1(分数转换成小数计算时精度丢失)或大于1(四舍五入)时,直接取为1,概率有稍许变化
while (!small.isEmpty()) {
probability[small.removeLast()] = 1.0;
}
while (!large.isEmpty()) {
probability[large.removeLast()] = 1.0;
}
}
/**
* Samples a value from the underlying distribution.
*
* @return A random value sampled from the underlying distribution.
*/
public int next() {
/* Generate a fair die roll to determine which column to inspect. */
int column = random.nextInt(probability.length);
/* Generate a biased coin toss to determine which option to pick. */
boolean coinToss = random.nextDouble() < probability[column];
/* Based on the outcome, return either the column or its alias. */
return coinToss ? column : alias[column];
}
}
蓄水池算法,可用来处理不确定有多少样本,从中等概率取样的场景,如参加活动的所有人,随机抽出来几个
private static final String ADD_ITEM_LUA =
"local poolKey = KEYS[1]; \n" +
"local countingKey = KEYS[2]; \n" +
"local poolSize = tonumber(ARGV[1]); \n" +
"local item = ARGV[2]; \n" +
"local expire = ARGV[4]; \n" +
"local uvCounting = tonumber(redis.call('incr', countingKey)); \n" + // 当前一共有多数人流经池子
"local curPoolSize = tonumber(redis.call('llen', poolKey)); \n" + // 蓄水池(结果集)里有多少人
"if curPoolSize < poolSize then \n" + // 结果集不足n个人,直接加到池子里
" redis.call('lpush', poolKey, item); \n" +
" redis.call('expire', poolKey, expire); \n" +
" redis.call('expire', countingKey, expire); \n" +
" return {'true'}; \n" +
"end \n" +
"local rseed = ARGV[3]; \n" + // 随机种子
"math.randomseed(rseed); \n" +
"local randomIndex = math.random(0,uvCounting - 1); " +
"if randomIndex < poolSize then \n" + // 随机移除一个元素,把当前元素塞进去
" local randomItem = redis.call('lindex', poolKey, randomIndex); \n" +
" redis.call('lrem', poolKey, 1, randomItem); \n" +
" redis.call('lpush', poolKey, 1, item); \n" +
"end" +
"redis.call('expire', poolKey, expire); \n" +
"redis.call('expire', countingKey, expire); \n" +
"return {'true'}; \n"
;