Skip to content

Commit 6a2a543

Browse files
committed
feat!: strict bloom filter format + internal refactoring
1 parent 7378fb8 commit 6a2a543

File tree

6 files changed

+386
-344
lines changed

6 files changed

+386
-344
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ add: build
2020
./ecloop add -f data/btc-puzzles-hash -t 4 -r 8000:ffffff
2121

2222
mul: build
23-
cat data.txt | ./ecloop mul -f _check_1.txt -t 4 -a cu
23+
cat misc/data.txt | ./ecloop mul -f misc/wd_filter.txt -t 4 -a cu -q -o /dev/null
2424

2525
blf: build
2626
rm -rf /tmp/test.blf; cat data/btc-puzzles-hash | ./ecloop blf-gen -n $(n) -o /tmp/test.blf

lib/bench.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
#include "addr.c"
44
#include "ecc.c"
5-
#include "util.c"
5+
#include "utils.c"
66

77
void print_res(char *label, u64 stime, u64 iters) {
88
double dt = MAX((tsnow() - stime), 1) / 1000.0;

lib/util.c

Lines changed: 0 additions & 29 deletions
This file was deleted.

lib/utils.c

Lines changed: 337 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
#pragma once
2+
3+
#include <math.h>
4+
#include <pthread.h>
5+
#include <stdbool.h>
6+
#include <stddef.h>
7+
#include <stdio.h>
8+
#include <stdlib.h>
9+
#include <string.h>
10+
#include <time.h>
11+
#include <unistd.h>
12+
13+
typedef __uint128_t u128;
14+
typedef unsigned long long u64;
15+
typedef unsigned int u32;
16+
typedef unsigned char u8;
17+
18+
typedef char hex40[41]; // rmd160 hex string
19+
typedef char hex64[65]; // sha256 hex string
20+
typedef u32 h160_t[5];
21+
22+
// MARK: helpers
23+
24+
u64 tsnow() {
25+
struct timespec ts;
26+
// clock_gettime(CLOCK_MONOTONIC, &ts);
27+
clock_gettime(CLOCK_REALTIME, &ts);
28+
return ts.tv_sec * 1000 + ts.tv_nsec / 1e6;
29+
}
30+
31+
bool strendswith(const char *str, const char *suffix) {
32+
size_t str_len = strlen(str);
33+
size_t suffix_len = strlen(suffix);
34+
return (str_len >= suffix_len) && (strcmp(str + str_len - suffix_len, suffix) == 0);
35+
}
36+
37+
// MARK: args
38+
39+
typedef struct args_t {
40+
int argc;
41+
const char **argv;
42+
} args_t;
43+
44+
bool args_bool(args_t *args, const char *name) {
45+
for (int i = 1; i < args->argc; ++i) {
46+
if (strcmp(args->argv[i], name) == 0) return true;
47+
}
48+
return false;
49+
}
50+
51+
u64 args_int(args_t *args, const char *name, int def) {
52+
for (int i = 1; i < args->argc - 1; ++i) {
53+
if (strcmp(args->argv[i], name) == 0) {
54+
return strtoull(args->argv[i + 1], NULL, 10);
55+
}
56+
}
57+
return def;
58+
}
59+
60+
char *arg_str(args_t *args, const char *name) {
61+
for (int i = 1; i < args->argc; ++i) {
62+
if (strcmp(args->argv[i], name) == 0) {
63+
if (i + 1 < args->argc) return (char *)args->argv[i + 1];
64+
}
65+
}
66+
return NULL;
67+
}
68+
69+
// MARK: queue
70+
71+
typedef struct queue_item_t {
72+
void *data_ptr;
73+
struct queue_item_t *next;
74+
} queue_item_t;
75+
76+
typedef struct queue_t {
77+
size_t capacity;
78+
size_t size;
79+
bool done;
80+
queue_item_t *head;
81+
queue_item_t *tail;
82+
pthread_mutex_t lock;
83+
pthread_cond_t cond_put;
84+
pthread_cond_t cond_get;
85+
} queue_t;
86+
87+
void queue_init(queue_t *q, size_t capacity) {
88+
q->capacity = capacity;
89+
q->size = 0;
90+
q->done = false;
91+
q->head = NULL;
92+
q->tail = NULL;
93+
pthread_mutex_init(&q->lock, NULL);
94+
pthread_cond_init(&q->cond_put, NULL);
95+
pthread_cond_init(&q->cond_get, NULL);
96+
}
97+
98+
void queue_done(queue_t *q) {
99+
pthread_mutex_lock(&q->lock);
100+
q->done = true;
101+
pthread_cond_broadcast(&q->cond_get);
102+
pthread_mutex_unlock(&q->lock);
103+
}
104+
105+
void queue_put(queue_t *q, void *data_ptr) {
106+
pthread_mutex_lock(&q->lock);
107+
if (q->done) {
108+
pthread_mutex_unlock(&q->lock);
109+
return;
110+
}
111+
112+
while (q->size == q->capacity) {
113+
pthread_cond_wait(&q->cond_put, &q->lock);
114+
}
115+
116+
queue_item_t *item = malloc(sizeof(queue_item_t));
117+
item->data_ptr = data_ptr;
118+
item->next = NULL;
119+
120+
if (q->tail != NULL) q->tail->next = item;
121+
else q->head = item;
122+
123+
q->tail = item;
124+
q->size += 1;
125+
126+
pthread_cond_signal(&q->cond_get);
127+
pthread_mutex_unlock(&q->lock);
128+
}
129+
130+
void *queue_get(queue_t *q) {
131+
pthread_mutex_lock(&q->lock);
132+
while (q->size == 0 && !q->done) {
133+
pthread_cond_wait(&q->cond_get, &q->lock);
134+
}
135+
136+
if (q->size == 0) {
137+
pthread_mutex_unlock(&q->lock);
138+
return NULL;
139+
}
140+
141+
queue_item_t *item = q->head;
142+
q->head = item->next;
143+
if (!q->head) q->tail = NULL;
144+
145+
void *data_ptr = item->data_ptr;
146+
free(item);
147+
q->size -= 1;
148+
149+
pthread_cond_signal(&q->cond_put);
150+
pthread_mutex_unlock(&q->lock);
151+
return data_ptr;
152+
}
153+
154+
// MARK: bloom filter
155+
156+
#define BLF_MAGIC 0x45434246 // FourCC: ECBF
157+
#define BLF_VERSION 1
158+
159+
typedef struct blf_t {
160+
size_t size;
161+
u64 *bits;
162+
} blf_t;
163+
164+
static inline void blf_setbit(blf_t *blf, size_t idx) {
165+
blf->bits[idx % (blf->size * 64) / 64] |= (u64)1 << (idx % 64);
166+
}
167+
168+
static inline bool blf_getbit(blf_t *blf, u64 idx) {
169+
return (blf->bits[idx % (blf->size * 64) / 64] & ((u64)1 << (idx % 64))) != 0;
170+
}
171+
172+
void blf_add(blf_t *blf, const h160_t hash) {
173+
u64 a1 = (u64)hash[0] << 32 | hash[1];
174+
u64 a2 = (u64)hash[2] << 32 | hash[3];
175+
u64 a3 = (u64)hash[4] << 32 | hash[0];
176+
u64 a4 = (u64)hash[1] << 32 | hash[2];
177+
u64 a5 = (u64)hash[3] << 32 | hash[4];
178+
179+
u8 shifts[4] = {24, 28, 36, 40};
180+
for (size_t i = 0; i < 4; ++i) {
181+
u8 S = shifts[i];
182+
blf_setbit(blf, a1 << S | a2 >> S);
183+
blf_setbit(blf, a2 << S | a3 >> S);
184+
blf_setbit(blf, a3 << S | a4 >> S);
185+
blf_setbit(blf, a4 << S | a5 >> S);
186+
blf_setbit(blf, a5 << S | a1 >> S);
187+
}
188+
}
189+
190+
bool blf_has(blf_t *blf, const h160_t hash) {
191+
u64 a1 = (u64)hash[0] << 32 | hash[1];
192+
u64 a2 = (u64)hash[2] << 32 | hash[3];
193+
u64 a3 = (u64)hash[4] << 32 | hash[0];
194+
u64 a4 = (u64)hash[1] << 32 | hash[2];
195+
u64 a5 = (u64)hash[3] << 32 | hash[4];
196+
197+
u8 shifts[4] = {24, 28, 36, 40};
198+
for (size_t i = 0; i < 4; ++i) {
199+
u8 S = shifts[i];
200+
if (!blf_getbit(blf, a1 << S | a2 >> S)) return false;
201+
if (!blf_getbit(blf, a2 << S | a3 >> S)) return false;
202+
if (!blf_getbit(blf, a3 << S | a4 >> S)) return false;
203+
if (!blf_getbit(blf, a4 << S | a5 >> S)) return false;
204+
if (!blf_getbit(blf, a5 << S | a1 >> S)) return false;
205+
}
206+
207+
return true;
208+
}
209+
210+
bool blf_save(const char *filepath, blf_t *blf) {
211+
FILE *file = fopen(filepath, "wb");
212+
if (file == NULL) {
213+
fprintf(stderr, "failed to open output file\n");
214+
exit(1);
215+
}
216+
217+
u32 blf_magic = BLF_MAGIC;
218+
u32 blg_version = BLF_VERSION;
219+
220+
if (fwrite(&blf_magic, sizeof(blf_magic), 1, file) != 1) {
221+
fprintf(stderr, "failed to write bloom filter magic\n");
222+
return false;
223+
};
224+
225+
if (fwrite(&blg_version, sizeof(blg_version), 1, file) != 1) {
226+
fprintf(stderr, "failed to write bloom filter version\n");
227+
return false;
228+
}
229+
230+
if (fwrite(&blf->size, sizeof(blf->size), 1, file) != 1) {
231+
fprintf(stderr, "failed to write bloom filter size\n");
232+
return false;
233+
}
234+
235+
if (fwrite(blf->bits, sizeof(u64), blf->size, file) != blf->size) {
236+
fprintf(stderr, "failed to write bloom filter bits\n");
237+
return false;
238+
}
239+
240+
fclose(file);
241+
return true;
242+
}
243+
244+
bool blf_load(const char *filepath, blf_t *blf) {
245+
FILE *file = fopen(filepath, "rb");
246+
if (file == NULL) {
247+
fprintf(stderr, "failed to open input file\n");
248+
return false;
249+
}
250+
251+
u32 blf_magic, blf_version;
252+
size_t size;
253+
254+
bool is_ok = true;
255+
is_ok = is_ok && fread(&blf_magic, sizeof(blf_magic), 1, file) == 1;
256+
is_ok = is_ok && fread(&blf_version, sizeof(blf_version), 1, file) == 1;
257+
is_ok = is_ok && fread(&size, sizeof(size), 1, file) == 1;
258+
if (!is_ok) {
259+
fprintf(stderr, "failed to read bloom filter header\n");
260+
return false;
261+
}
262+
263+
if (blf_magic != BLF_MAGIC || blf_version != BLF_VERSION) {
264+
fprintf(stderr, "invalid bloom filter version; create a new filter with blf-gen command\n");
265+
return false;
266+
}
267+
268+
u64 *bits = calloc(size, sizeof(u64));
269+
if (fread(bits, sizeof(u64), size, file) != size) {
270+
fprintf(stderr, "failed to read bloom filter bits\n");
271+
return false;
272+
}
273+
274+
fclose(file);
275+
blf->size = size;
276+
blf->bits = bits;
277+
return true;
278+
}
279+
280+
void __blf_gen_usage(args_t *args) {
281+
printf("Usage: %s blf-gen -n <count> -o <file>\n", args->argv[0]);
282+
printf("Generate a bloom filter from a list of hex-encoded hash160 values passed to stdin.\n");
283+
printf("\nOptions:\n");
284+
printf(" -n <count> - Number of hashes to add.\n");
285+
printf(" -o <file> - File to write bloom filter (must have a .blf extension).\n");
286+
exit(1);
287+
}
288+
289+
void blf_gen(args_t *args) {
290+
u64 n = args_int(args, "-n", 0);
291+
if (n == 0) {
292+
fprintf(stderr, "[!] missing filter size (-n <number>)\n");
293+
return __blf_gen_usage(args);
294+
}
295+
296+
char *filepath = arg_str(args, "-o");
297+
if (filepath == NULL) {
298+
fprintf(stderr, "[!] missing output file (-o <file>)\n");
299+
return __blf_gen_usage(args);
300+
}
301+
302+
blf_t blf = {.size = 0, .bits = NULL};
303+
if (access(filepath, F_OK) == 0) blf_load(filepath, &blf);
304+
305+
// https://hur.st/bloomfilter/?n=500M&p=1000000&m=&k=20
306+
u64 r = 1e9;
307+
double p = 1.0 / (double)r;
308+
u64 m = (u64)(n * log(p) / log(1.0 / pow(2.0, log(2.0))));
309+
double mb = (double)m / 8 / 1024 / 1024;
310+
printf("creating bloom filter: n = %'llu | p = 1:%'llu | m = %'llu (%'.1f MB)\n", n, r, m, mb);
311+
312+
blf.size = (m + 63) / 64;
313+
blf.bits = calloc(blf.size, sizeof(u64));
314+
315+
u64 count = 0;
316+
hex40 line;
317+
while (fgets(line, sizeof(line), stdin) != NULL) {
318+
if (strlen(line) != sizeof(line) - 1) continue;
319+
320+
h160_t hash;
321+
for (size_t j = 0; j < sizeof(line) - 1; j += 8) {
322+
sscanf(line + j, "%8x", &hash[j / 8]);
323+
}
324+
325+
count += 1;
326+
blf_add(&blf, hash);
327+
}
328+
329+
printf("added %'llu items; saving to %s\n", count, filepath);
330+
331+
if (!blf_save(filepath, &blf)) {
332+
fprintf(stderr, "failed to save bloom filter\n");
333+
exit(1);
334+
}
335+
336+
free(blf.bits);
337+
}

0 commit comments

Comments
 (0)