The BTL C/C++ Common Bloom filters for bioinformatics projects, as well as any APIs created for other programming languages.
Fast Bloom filter loading using the rolling hash function.
#include "BloomFilter.hpp"
#include <vector>
#include <string>
#include "vendor/ntHashIterator.hpp"
using namespace std;
int main(int argc, char** argv)
{
/* test sequence */
const string seq = "TAGAATCACCCAAAGA";
/* k-mer size */
const unsigned k = 5;
/* number of Bloom filter hash functions */
const unsigned numHashes = 4;
/* size of Bloom filter (in bits) */
const unsigned size = 1000;
//Building the filter
{
/* init Bloom filter */
BloomFilter bloom(size, numHashes, k);
/* init rolling hash state and compute hash values for first k-mer */
ntHashIterator itr(seq, numHashes, k);
while (itr != itr.end()) {
bloom.insert(*itr);
++itr;
}
/* store the bloom filter */
bloom.storeFilter("filterPathname.bf");
}
//After building
{
/* load the bloom filter */
BloomFilter bloom("filterPathname.bf");
/* query the bloom filter */
/* init rolling hash state and compute hash values for first k-mer */
ntHashIterator itr(seq, numHashes, k);
while (itr != itr.end()) {
bloom.contains(*itr);
++itr;
}
}
return 0;
}
Fast Counting Bloom filter loading using the rolling hash function.
#include "CountingBloomFilter.hpp"
#include <vector>
#include <string>
#include "vendor/ntHashIterator.hpp"
using namespace std;
int main(int argc, char** argv)
{
/* test sequence */
const string seq = "TAGAATCACCCAAAGA";
/* k-mer size */
const unsigned k = 5;
/* number of Bloom filter hash functions */
const unsigned numHashes = 4;
/* size of Bloom filter (in bytes) */
size_t size = 1000;
/* counts to threshold bloom filter on*/
const unsigned threshold = 1;
//Building the filter
{
/* init Bloom filter */
CountingBloomFilter<uint8_t> bloom(size, numHashes, k, threshold);
/* init rolling hash state and compute hash values for first k-mer */
ntHashIterator itr(seq, numHashes, k);
while (itr != itr.end()) {
bloom.insert(*itr);
++itr;
}
/* store the bloom filter */
bloom.storeFilter("filterPathname.bf");
}
//After building
{
/* load the bloom filter */
CountingBloomFilter<uint8_t> bloom("filterPathname.bf", threshold);
/* query the bloom filter */
/* init rolling hash state and compute hash values for first k-mer */
ntHashIterator itr(seq, numHashes, k);
while (itr != itr.end()) {
bloom.contains(*itr);
++itr;
}
}
return 0;
}
BloomFilter.hpp
: main Bloom filter classCountingBloomFilter.hpp
: Counting Bloom filter classRollingHashIterator.h
: Enable rolling hashing on a stringRollingHash.h
: rolling hash interface (required by RollingHashIterator.h
)rolling.h
: rolling hash function (required by BloomFilter.hpp
and RollingHash.h
)Tests/Unit
: unit testsTests/AdHoc
: ad-hoc testsThe unit tests may be compiled and run with:
$ ./autogen.sh
$ ./configure
$ make check
To see more detailed output for the individual tests, run the binaries in Tests/Unit
from the command line. (The ad-hoc tests in Tests/AdHoc
may also be run in this way.)
This projects uses:
The specification of the Bloom filter file format is as follows: