import hashlib
class BloomFilter:
def __init__(self, size, num_hashes):
"""初始化Bloom过滤器
:param size: 位数组的大小
:param num_hashes: 使用的哈希函数数量
"""
self.size = size
self.num_hashes = num_hashes
self.bit_array = [0] * size
def _hashes(self, item):
"""计算哈希值,返回多个哈希值"""
result = []
for i in range(self.num_hashes):
hash_value = int(hashlib.md5((str(i) + item).encode('utf-8')).hexdigest(), 16)
result.append(hash_value % self.size)
return result
def add(self, item):
"""向Bloom过滤器添加元素"""
for hash_value in self._hashes(item):
self.bit_array[hash_value] = 1
def contains(self, item):
"""检查元素是否存在于Bloom过滤器中"""
for hash_value in self._hashes(item):
if self.bit_array[hash_value] == 0:
return False
return True
if __name__ == "__main__":
bloom = BloomFilter(size=1000, num_hashes=5)
bloom.add("apple")
bloom.add("banana")
bloom.add("grape")
print(bloom.bit_array)
print("apple in filter?", bloom.contains("apple"))
print("banana in filter?", bloom.contains("banana"))
print("orange in filter?", bloom.contains("orange"))