from pybloom_live import BloomFilter, ScalableBloomFilter
def test_bf():
error_rate = 0.01
capacity = 5000
bf = BloomFilter(capacity, 0.01)
items = list(range(capacity))
for i in items:
bf.add(i)
test_items = list(range(bf.num_bits))
false_positive_count = 0
for i in test_items:
if i in bf and i not in items:
false_positive_count += 1
p = false_positive_count / len(test_items)
print(p)
assert p < error_rate
def test_sbf():
error_rate = 0.01
sbf = ScalableBloomFilter(100, error_rate, 2)
items = list(range(5000))
for i in items:
sbf.add(i)
test_items = list(range(sum(f.num_bits for f in sbf.filters)))
false_positive_count = 0
for i in test_items:
if i in sbf and i not in items:
false_positive_count += 1
p = false_positive_count / len(test_items)
print(p)
assert p < error_rate
I try to do regression test for error rate for BloomFilter and ScalableBloomFilter.
For simple BloomFilter, the error_rate is in expected <0.01.
But for ScalableBloomFilter, error_rate always around 0.02 (expected 0.01)
In test, I allocated test_items in the total size of underlying bit arrays, if filter returned True, but item not in original inserted set, I thought it's a false positive item.
I try to do regression test for error rate for BloomFilter and ScalableBloomFilter.
For simple BloomFilter, the error_rate is in expected
<0.01
.But for ScalableBloomFilter, error_rate always around 0.02 (expected 0.01)
In test, I allocated
test_items
in the total size of underlying bit arrays, if filter returned True, but item not in original inserted set, I thought it's a false positive item.Wondering whether my test approach is right?
Thanks.