基于redis 的PHP 版本的bloom filter

1,875 阅读1分钟

网上搜索了几个版本的布隆过滤器实现,记录一波。

版本1

<?php

$redisKey = 'BF@' . date('Ymd', time());
$mod =  pow(2, 32);//redis string max length
$k = 8;

$bf = new Bf($redisKey, $mod, $k);

//会在调用的时候把对应的str添加进去。也就是第一次调用的时候返回false,同样的串再次调用的时候返回true
var_dump($bf->add("zhangsan"));

class Bf
{

    public $redis;
    public $key;
    public $m;
    public $k;

    public function __construct($key, $m, $k)
    {
        if ($m > 4294967296) {
            error_log('ERROR: m over 4294967296');
            return false;
        }
        $this->key = $key;
        $this->m = $m;
        $this->k = $k;
        $this->redis = MyRedis::instance();
    }

    /**
     * @param $e
     * @return bool false means e is not in and add e. true means in.
     * @date   2020/2/13 22:45:00
     */
    public function add($e)
    {
        $e = (string)$e;
        $this->redis->multi(Redis::PIPELINE);
        for ($i = 0; $i < $this->k; $i++) {
            $seed = self::getBKDRHashSeed($i);
            $hash = self::BKDRHash($e, $seed);
            $offset = $hash % $this->m;
            $this->redis->setbit($this->key, $offset, 1);//return the bit before it is set
        }
        $t1 = microtime(true);
        $rt = $this->redis->exec();
        var_dump($rt);
        $t2 = microtime(true);
        $cost = round(($t2 - $t1) * 1000, 3) . 'ms';
        $c = array_sum($rt);
        error_log('[' . date('Y-m-d H:i:s', time()) . '] DEBUG: redis-time-spent=' . $cost . ' entry=' . $e . ' c=' . $c);
        return $c === $this->k;
    }

    public function flushall()
    {
        return $this->redis->delete($this->key);
    }

    static public function getBKDRHashSeed($n)
    {
        if ($n === 0) return 31;
        $j = $n + 2;
        $r = 0;
        for ($i = 0; $i < $j; $i++) {
            if ($i % 2) {// 奇数
                $r = $r * 10 + 3;
            } else {
                $r = $r * 10 + 1;
            }
        }
        return $r;
    }

    static public function BKDRHash($str, $seed)
    {
        $hash = 0;
        $len = strlen($str);
        $i = 0;
        while ($i < $len) {
            $hash = ((floatval($hash * $seed) & 0x7FFFFFFF) + ord($str[$i])) & 0x7FFFFFFF;
            $i++;
        }
        return ($hash & 0x7FFFFFFF);
    }
}


>

版本二

class BloomFilter {

    private $_connection = array('host' => 'localhost', 'port' => 6379);

    private $_redis;

    private $_size;
    private $_hashCount;

    private $_key;

    const KEY_BLOOM = 'bloom';


    public function __construct($size, $hash_count) {
        $this->_size = $size;
        $this->_hashCount = $hash_count;
        $this->_redis = MyRedis::instance();
    }


    public function add($item) {
        $index = 0;
        $this->_redis->multi(Redis::PIPELINE);
        while ($index < $this->_hashCount) {
            $crc = $this->hash($item, $index);
            $this->_redis->setbit(self::KEY_BLOOM, $crc, 1);
            $index++;
        }
        $this->_redis->exec();
    }


    public function has($item) {
        $index = 0;
        $this->_redis->multi(Redis::PIPELINE);
        while ($index < $this->_hashCount) {
            $crc = $this->hash($item, $index);
            $this->_redis->getbit(self::KEY_BLOOM, $crc);
            $index++;
        }
        $result = $this->_redis->exec();
        return !in_array(0, $result);
    }



    private function hash($item, $index) {
        return abs(crc32(md5('m' . $index . $item))) % $this->_size;
    }

}


$bf = new BloomFilter(100000, 3);

$bf->add('value');
if ($bf->has('value')) {
    echo "has";
} else {
    echo "nopes";
}