在python中使用布隆过滤器

24 min read
from bitarray import bitarray
import mmh3

class BloomFilter:
    
    def __init__(self, size, num_hashes):
        self.size = size
        self.num_hashes = num_hashes
        self.bit_array = bitarray(size)
        self.bit_array.setall(0)
    
    def add(self, string):
        for seed in range(self.num_hashes):
            result = mmh3.hash(string, seed) % self.size
            self.bit_array[result] = 1
    
    def __contains__(self, string):
        for seed in range(self.num_hashes):
            result = mmh3.hash(string, seed) % self.size
            if self.bit_array[result] == 0:
                return False
        return True

使用布隆过滤器的例子:

# 初始化布隆过滤器,设置大小为10,哈希函数数量为2
bf = BloomFilter(10, 2)
# 添加元素
bf.add('Python')
bf.add('Java')
bf.add('C++')
bf.add('Ruby')

# 检查元素是否存在
print('Python' in bf)
print('Swift' in bf)