Skip to content

Commit

Permalink
Make CRollingBloomFilter set nTweak for you
Browse files Browse the repository at this point in the history
While CBloomFilter is usually used with an explicitly set nTweak,
CRollingBloomFilter is only used internally. Requiring every caller to
set nTweak is error-prone and redundant; better to have the class handle
that for you with a high-quality randomness source.

Additionally when clearing the filter it makes sense to change nTweak as
well to recover from a bad setting, e.g. due to insufficient randomness
at initialization, so the clear() method is replaced by a reset() method
that sets a new, random, nTweak value.
  • Loading branch information
petertodd authored and sipa committed Jul 27, 2015
1 parent a3d65fe commit d2d7ee0
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 12 deletions.
19 changes: 15 additions & 4 deletions src/bloom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "hash.h"
#include "script/script.h"
#include "script/standard.h"
#include "random.h"
#include "streams.h"

#include <math.h>
Expand Down Expand Up @@ -121,6 +122,12 @@ void CBloomFilter::clear()
isEmpty = true;
}

void CBloomFilter::reset(unsigned int nNewTweak)
{
clear();
nTweak = nNewTweak;
}

bool CBloomFilter::IsWithinSizeConstraints() const
{
return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS;
Expand Down Expand Up @@ -217,7 +224,8 @@ CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate,
// inserted, so at least one always contains the last nElements
// inserted.
nBloomSize = nElements * 2;
nInsertions = 0;

reset(nTweak);
}

void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
Expand Down Expand Up @@ -254,9 +262,12 @@ bool CRollingBloomFilter::contains(const uint256& hash) const
return contains(data);
}

void CRollingBloomFilter::clear()
void CRollingBloomFilter::reset(unsigned int nNewTweak)
{
b1.clear();
b2.clear();
if (!nNewTweak)
nNewTweak = GetRand(std::numeric_limits<unsigned int>::max());

b1.reset(nNewTweak);
b2.reset(nNewTweak);
nInsertions = 0;
}
12 changes: 9 additions & 3 deletions src/bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class CBloomFilter
bool contains(const uint256& hash) const;

void clear();
void reset(unsigned int nNewTweak);

//! True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS
//! (catch a filter which was just deserialized which was too big)
Expand All @@ -103,22 +104,27 @@ class CBloomFilter

/**
* RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
* Construct it with the number of items to keep track of, and a false-positive rate.
* Construct it with the number of items to keep track of, and a false-positive
* rate. Unlike CBloomFilter, by default nTweak is set to a cryptographically
* secure random value for you. Similarly rather than clear() the method
* reset() is provided, which also changes nTweak to decrease the impact of
* false-positives.
*
* contains(item) will always return true if item was one of the last N things
* insert()'ed ... but may also return true for items that were not inserted.
*/
class CRollingBloomFilter
{
public:
CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
CRollingBloomFilter(unsigned int nElements, double nFPRate,
unsigned int nTweak = 0);

void insert(const std::vector<unsigned char>& vKey);
void insert(const uint256& hash);
bool contains(const std::vector<unsigned char>& vKey) const;
bool contains(const uint256& hash) const;

void clear();
void reset(unsigned int nNewTweak = 0);

private:
unsigned int nBloomSize;
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4812,7 +4812,7 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
{
// Periodically clear addrKnown to allow refresh broadcasts
if (nLastRebroadcast)
pnode->addrKnown.clear();
pnode->addrKnown.reset();

// Rebroadcast our address
AdvertizeLocal(pnode);
Expand Down
2 changes: 1 addition & 1 deletion src/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2060,7 +2060,7 @@ unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }

CNode::CNode(SOCKET hSocketIn, const CAddress& addrIn, const std::string& addrNameIn, bool fInboundIn) :
ssSend(SER_NETWORK, INIT_PROTO_VERSION),
addrKnown(5000, 0.001, insecure_rand()),
addrKnown(5000, 0.001),
setInventoryKnown(SendBufferSize() / 1000)
{
nServices = 0;
Expand Down
6 changes: 3 additions & 3 deletions src/test/bloom_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ static std::vector<unsigned char> RandomData()
BOOST_AUTO_TEST_CASE(rolling_bloom)
{
// last-100-entry, 1% false positive:
CRollingBloomFilter rb1(100, 0.01, 0);
CRollingBloomFilter rb1(100, 0.01, 1);

// Overfill:
static const int DATASIZE=399;
Expand Down Expand Up @@ -500,7 +500,7 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
BOOST_CHECK(nHits < 175);

BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
rb1.clear();
rb1.reset(1);
BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));

// Now roll through data, make sure last 100 entries
Expand All @@ -527,7 +527,7 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
BOOST_CHECK(nHits < 100);

// last-1000-entry, 0.01% false positive:
CRollingBloomFilter rb2(1000, 0.001, 0);
CRollingBloomFilter rb2(1000, 0.001, 1);
for (int i = 0; i < DATASIZE; i++) {
rb2.insert(data[i]);
}
Expand Down

0 comments on commit d2d7ee0

Please sign in to comment.