网上搜索了几个版本的布隆过滤器实现,记录一波。
版本1
<?php
$redisKey = 'BF@' . date('Ymd', time());
$mod = pow(2, 32);//redis string max length
$k = 8;
$bf = new Bf($redisKey, $mod, $k);
//会在调用的时候把对应的str添加进去。也就是第一次调用的时候返回false,同样的串再次调用的时候返回true
var_dump($bf->add("zhangsan"));
class Bf
{
public $redis;
public $key;
public $m;
public $k;
public function __construct($key, $m, $k)
{
if ($m > 4294967296) {
error_log('ERROR: m over 4294967296');
return false;
}
$this->key = $key;
$this->m = $m;
$this->k = $k;
$this->redis = MyRedis::instance();
}
/**
* @param $e
* @return bool false means e is not in and add e. true means in.
* @date 2020/2/13 22:45:00
*/
public function add($e)
{
$e = (string)$e;
$this->redis->multi(Redis::PIPELINE);
for ($i = 0; $i < $this->k; $i++) {
$seed = self::getBKDRHashSeed($i);
$hash = self::BKDRHash($e, $seed);
$offset = $hash % $this->m;
$this->redis->setbit($this->key, $offset, 1);//return the bit before it is set
}
$t1 = microtime(true);
$rt = $this->redis->exec();
var_dump($rt);
$t2 = microtime(true);
$cost = round(($t2 - $t1) * 1000, 3) . 'ms';
$c = array_sum($rt);
error_log('[' . date('Y-m-d H:i:s', time()) . '] DEBUG: redis-time-spent=' . $cost . ' entry=' . $e . ' c=' . $c);
return $c === $this->k;
}
public function flushall()
{
return $this->redis->delete($this->key);
}
static public function getBKDRHashSeed($n)
{
if ($n === 0) return 31;
$j = $n + 2;
$r = 0;
for ($i = 0; $i < $j; $i++) {
if ($i % 2) {// 奇数
$r = $r * 10 + 3;
} else {
$r = $r * 10 + 1;
}
}
return $r;
}
static public function BKDRHash($str, $seed)
{
$hash = 0;
$len = strlen($str);
$i = 0;
while ($i < $len) {
$hash = ((floatval($hash * $seed) & 0x7FFFFFFF) + ord($str[$i])) & 0x7FFFFFFF;
$i++;
}
return ($hash & 0x7FFFFFFF);
}
}
>
版本二
class BloomFilter {
private $_connection = array('host' => 'localhost', 'port' => 6379);
private $_redis;
private $_size;
private $_hashCount;
private $_key;
const KEY_BLOOM = 'bloom';
public function __construct($size, $hash_count) {
$this->_size = $size;
$this->_hashCount = $hash_count;
$this->_redis = MyRedis::instance();
}
public function add($item) {
$index = 0;
$this->_redis->multi(Redis::PIPELINE);
while ($index < $this->_hashCount) {
$crc = $this->hash($item, $index);
$this->_redis->setbit(self::KEY_BLOOM, $crc, 1);
$index++;
}
$this->_redis->exec();
}
public function has($item) {
$index = 0;
$this->_redis->multi(Redis::PIPELINE);
while ($index < $this->_hashCount) {
$crc = $this->hash($item, $index);
$this->_redis->getbit(self::KEY_BLOOM, $crc);
$index++;
}
$result = $this->_redis->exec();
return !in_array(0, $result);
}
private function hash($item, $index) {
return abs(crc32(md5('m' . $index . $item))) % $this->_size;
}
}
$bf = new BloomFilter(100000, 3);
$bf->add('value');
if ($bf->has('value')) {
echo "has";
} else {
echo "nopes";
}