ref: c3cfd06564ab868d1e78bf5fe8fe6fac07202de9
dir: /sys/src/cmd/venti/srv/fmtbloom.c/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
Bloom b;
void
usage(void)
{
	fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
	threadexitsall(0);
}
void
threadmain(int argc, char *argv[])
{
	Part *part;
	char *file;
	vlong bits, size, size2;
	int nhash;
	vlong nblocks;
	
	ventifmtinstall();
	statsinit();
	size = 0;
	nhash = 0;
	nblocks = 0;
	ARGBEGIN{
	case 'n':
		if(nhash || nblocks)
			usage();
		nblocks = unittoull(EARGF(usage()));
		break;
	case 'N':
		if(nhash || nblocks)
			usage();
		nhash = unittoull(EARGF(usage()));
		if(nhash > BloomMaxHash){
			fprint(2, "maximum possible is -N %d", BloomMaxHash);
			usage();
		}
		break;
	case 's':
		size = unittoull(ARGF());
		if(size == ~0)
			usage();
		break;
	default:
		usage();
		break;
	}ARGEND
	if(argc != 1)
		usage();
	file = argv[0];
	part = initpart(file, ORDWR|ODIRECT);
	if(part == nil)
		sysfatal("can't open partition %s: %r", file);
	if(size == 0)
		size = part->size;
	
	if(size < 1024*1024)
		sysfatal("bloom filter too small");
	if(size > MaxBloomSize){
		fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
			size, (vlong)MaxBloomSize);
		size = MaxBloomSize;
	}
	if(size&(size-1)){
		for(size2=1; size2<size; size2*=2)
			;
		size = size2/2;
		fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
	}
	if(nblocks){
		/*
		 * no use for more than 32 bits per block
		 * shoot for less than 64 bits per block
		 */
		size2 = size;
		while(size2*8 >= nblocks*64)
			size2 >>= 1;
		if(size2 != size){
			size = size2;
			fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
				size/1024/1024);
		}
		/*
		 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.  
		 */
		bits = (8*size)/nblocks;
		nhash = bits*7/10;
		if(nhash > BloomMaxHash)
			nhash = BloomMaxHash;
	}
	if(!nhash)
		nhash = BloomMaxHash;
	if(bloominit(&b, size, nil) < 0)
		sysfatal("bloominit: %r");
	b.nhash = nhash;
	bits = nhash*10/7;
	nblocks = (8*size)/bits;
	fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
	b.data = vtmallocz(size);
	b.part = part;
	if(writebloom(&b) < 0)
		sysfatal("writing %s: %r", file);
	threadexitsall(0);
}