ref: babf901b4a508c3ec5d1f89655f10377bbdf9637
dir: /appl/lib/bloomfilter.b/
implement Bloomfilter;
include "sys.m";
sys: Sys;
include "sets.m";
sets: Sets;
Set: import sets;
include "keyring.m";
keyring: Keyring;
include "bloomfilter.m";
init()
{
sys = load Sys Sys->PATH;
sets = load Sets Sets->PATH;
sets->init();
keyring = load Keyring Keyring->PATH;
}
Bits: adt {
d: array of byte;
n: int; # bits used
get: fn(bits: self ref Bits, n: int): int;
};
filter(d: array of byte, logm, k: int): Set
{
if(logm < 3 || logm > 30)
raise "invalid bloom filter size";
nb := 1 << logm;
f := array[nb / 8 + 1] of {* => byte 0}; # one extra zero to make sure set's not inverted.
bits := hashbits(d, logm * k);
while(k--){
v := bits.get(logm);
f[v >> 3] |= byte 1 << (v & 7);
}
return sets->bytes2set(f);
}
hashbits(data: array of byte, n: int): ref Bits
{
d := array[((n + 7) / 8)] of byte;
digest := array[Keyring->SHA1dlen] of byte;
state := keyring->sha1(data, len data, nil, nil);
extra := array[2] of byte;
e := 0;
for(i := 0; i < len d; i += Keyring->SHA1dlen){
extra[0] = byte e;
extra[1] = byte (e>>8);
e++;
state = keyring->sha1(extra, len extra, digest, state);
if(i + Keyring->SHA1dlen > len d)
digest = digest[0:len d - i];
d[i:] = digest;
}
return ref Bits(d, 0);
}
# XXX could be more efficient.
Bits.get(bits: self ref Bits, n: int): int
{
d := bits.d;
v := 0;
nb := bits.n;
for(i := 0; i < n; i++){
j := nb + i;
if(int d[j >> 3] & (1 << (j & 7)))
v |= (1 << i);
}
bits.n += n;
return v;
}