/* Licensed under BSD-MIT - see LICENSE file for details */ #include "invbloom.h" #include #include #include /* "We will show that hash_count values of 3 or 4 work well in practice" From: Eppstein, David, et al. "What's the difference?: efficient set reconciliation without prior context." ACM SIGCOMM Computer Communication Review. Vol. 41. No. 4. ACM, 2011. http://conferences.sigcomm.org/sigcomm/2011/papers/sigcomm/p218.pdf */ #define NUM_HASHES 3 struct invbloom *invbloom_new_(const tal_t *ctx, size_t id_size, size_t n_elems, u32 salt) { struct invbloom *ib = tal(ctx, struct invbloom); if (ib) { ib->n_elems = n_elems; ib->id_size = id_size; ib->salt = salt; ib->singleton = NULL; ib->count = tal_arrz(ib, s32, n_elems); ib->idsum = tal_arrz(ib, u8, id_size * n_elems); if (!ib->count || !ib->idsum) ib = tal_free(ib); } return ib; } void invbloom_singleton_cb_(struct invbloom *ib, void (*cb)(struct invbloom *, size_t bucket, bool, void *), void *data) { ib->singleton = cb; ib->singleton_data = data; } static size_t hash_bucket(const struct invbloom *ib, const void *id, size_t i) { return hash((const char *)id, ib->id_size, ib->salt+i*7) % ib->n_elems; } static u8 *idsum_ptr(const struct invbloom *ib, size_t bucket) { return (u8 *)ib->idsum + bucket * ib->id_size; } static void check_for_singleton(struct invbloom *ib, size_t bucket, bool before) { if (!ib->singleton) return; if (ib->count[bucket] != 1 && ib->count[bucket] != -1) return; ib->singleton(ib, bucket, before, ib->singleton_data); } static void add_to_bucket(struct invbloom *ib, size_t n, const u8 *id) { size_t i; u8 *idsum = idsum_ptr(ib, n); check_for_singleton(ib, n, true); ib->count[n]++; for (i = 0; i < ib->id_size; i++) idsum[i] ^= id[i]; check_for_singleton(ib, n, false); } static void remove_from_bucket(struct invbloom *ib, size_t n, const u8 *id) { size_t i; u8 *idsum = idsum_ptr(ib, n); check_for_singleton(ib, n, true); ib->count[n]--; for (i = 0; i < ib->id_size; i++) idsum[i] ^= id[i]; check_for_singleton(ib, n, false); } void invbloom_insert(struct invbloom *ib, const void *id) { unsigned int i; for (i = 0; i < NUM_HASHES; i++) add_to_bucket(ib, hash_bucket(ib, id, i), id); } void invbloom_delete(struct invbloom *ib, const void *id) { unsigned int i; for (i = 0; i < NUM_HASHES; i++) remove_from_bucket(ib, hash_bucket(ib, id, i), id); } static bool all_zero(const u8 *mem, size_t size) { unsigned int i; for (i = 0; i < size; i++) if (mem[i]) return false; return true; } bool invbloom_get(const struct invbloom *ib, const void *id) { unsigned int i; for (i = 0; i < NUM_HASHES; i++) { size_t h = hash_bucket(ib, id, i); u8 *idsum = idsum_ptr(ib, h); if (ib->count[h] == 0 && all_zero(idsum, ib->id_size)) return false; if (ib->count[h] == 1) return (memcmp(idsum, id, ib->id_size) == 0); } return false; } static void *extract(const tal_t *ctx, struct invbloom *ib, int count) { size_t i; /* FIXME: this makes full extraction O(n^2). */ for (i = 0; i < ib->n_elems; i++) { void *id; if (ib->count[i] != count) continue; id = tal_dup_arr(ctx, u8, idsum_ptr(ib, i), ib->id_size, 0); return id; } return NULL; } void *invbloom_extract(const tal_t *ctx, struct invbloom *ib) { void *id; id = extract(ctx, ib, 1); if (id) invbloom_delete(ib, id); return id; } void *invbloom_extract_negative(const tal_t *ctx, struct invbloom *ib) { void *id; id = extract(ctx, ib, -1); if (id) invbloom_insert(ib, id); return id; } void invbloom_subtract(struct invbloom *ib1, const struct invbloom *ib2) { size_t i; assert(ib1->n_elems == ib2->n_elems); assert(ib1->id_size == ib2->id_size); assert(ib1->salt == ib2->salt); for (i = 0; i < ib1->n_elems; i++) check_for_singleton(ib1, i, true); for (i = 0; i < ib1->n_elems * ib1->id_size; i++) ib1->idsum[i] ^= ib2->idsum[i]; for (i = 0; i < ib1->n_elems; i++) { ib1->count[i] -= ib2->count[i]; check_for_singleton(ib1, i, false); } } bool invbloom_empty(const struct invbloom *ib) { size_t i; for (i = 0; i < ib->n_elems; i++) { if (ib->count[i]) return false; if (!all_zero(idsum_ptr(ib, i), ib->id_size)) return false; } return true; }