Skip to content

Commit

Permalink
More efficient bitsliced rolling Bloom filter
Browse files Browse the repository at this point in the history
This patch changes the implementation from one that stores 16 2-bit integers
in one uint32_t's, to one that stores the first bit of 64 2-bit integers in
one uint64_t and the second bit in another. This allows for 450x faster
refreshing and 2.2x faster average speed.
  • Loading branch information
sipa committed Apr 28, 2016
1 parent aa62b68 commit 1953c40
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 26 deletions.
40 changes: 27 additions & 13 deletions src/bloom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,14 +234,18 @@ CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate)
*/
uint32_t nFilterBits = (uint32_t)ceil(-1.0 * nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs)));
data.clear();
/* We store up to 16 'bits' per data element. */
data.resize((nFilterBits + 15) / 16);
/* For each data element we need to store 2 bits. If both bits are 0, the
* bit is treated as unset. If the bits are (01), (10), or (11), the bit is
* treated as set in generation 1, 2, or 3 respectively.
* These bits are stored in separate integers: position P corresponds to bit
* (P & 63) of the integers data[(P >> 6) * 2] and data[(P >> 6) * 2 + 1]. */
data.resize(((nFilterBits + 63) / 64) << 1);
reset();
}

/* Similar to CBloomFilter::Hash */
inline unsigned int CRollingBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const {
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (data.size() * 16);
static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, const std::vector<unsigned char>& vDataToHash) {
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
}

void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
Expand All @@ -252,18 +256,25 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
if (nGeneration == 4) {
nGeneration = 1;
}
uint64_t nGenerationMask1 = -(uint64_t)(nGeneration & 1);
uint64_t nGenerationMask2 = -(uint64_t)(nGeneration >> 1);
/* Wipe old entries that used this generation number. */
for (uint32_t p = 0; p < data.size() * 16; p++) {
if (get(p) == nGeneration) {
put(p, 0);
}
for (uint32_t p = 0; p < data.size(); p += 2) {
uint64_t p1 = data[p], p2 = data[p + 1];
uint64_t mask = (p1 ^ nGenerationMask1) | (p2 ^ nGenerationMask2);
data[p] = p1 & mask;
data[p + 1] = p2 & mask;
}
}
nEntriesThisGeneration++;

for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = Hash(n, vKey);
put(h, nGeneration);
uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F;
uint32_t pos = (h >> 6) % data.size();
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
}
}

Expand All @@ -276,8 +287,11 @@ void CRollingBloomFilter::insert(const uint256& hash)
bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
{
for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = Hash(n, vKey);
if (get(h) == 0) {
uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F;
uint32_t pos = (h >> 6) % data.size();
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
return false;
}
}
Expand All @@ -295,7 +309,7 @@ void CRollingBloomFilter::reset()
nTweak = GetRand(std::numeric_limits<unsigned int>::max());
nEntriesThisGeneration = 0;
nGeneration = 1;
for (std::vector<uint32_t>::iterator it = data.begin(); it != data.end(); it++) {
for (std::vector<uint64_t>::iterator it = data.begin(); it != data.end(); it++) {
*it = 0;
}
}
13 changes: 1 addition & 12 deletions src/bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,20 +135,9 @@ class CRollingBloomFilter
int nEntriesPerGeneration;
int nEntriesThisGeneration;
int nGeneration;
std::vector<uint32_t> data;
std::vector<uint64_t> data;
unsigned int nTweak;
int nHashFuncs;

unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;

inline int get(uint32_t position) const {
return (data[(position >> 4) % data.size()] >> (2 * (position & 0xF))) & 0x3;
}

inline void put(uint32_t position, uint32_t val) {
uint32_t& cell = data[(position >> 4) % data.size()];
cell = (cell & ~(((uint32_t)3) << (2 * (position & 0xF)))) | (val << (2 * (position & 0xF)));
}
};

#endif // BITCOIN_BLOOM_H
5 changes: 4 additions & 1 deletion src/test/bloom_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,14 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
if (i >= 100)
BOOST_CHECK(rb1.contains(data[i-100]));
rb1.insert(data[i]);
BOOST_CHECK(rb1.contains(data[i]));
}

// Insert 999 more random entries:
for (int i = 0; i < 999; i++) {
rb1.insert(RandomData());
std::vector<unsigned char> d = RandomData();
rb1.insert(d);
BOOST_CHECK(rb1.contains(d));
}
// Sanity check to make sure the filter isn't just filling up:
nHits = 0;
Expand Down

0 comments on commit 1953c40

Please sign in to comment.