#include "stdinc.h" #include "dat.h" #include "fns.h" Bloom b; void usage(void) { fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n"); threadexitsall(0); } void threadmain(int argc, char *argv[]) { Part *part; char *file; vlong bits, size, size2; int nhash; vlong nblocks; ventifmtinstall(); statsinit(); size = 0; nhash = 0; nblocks = 0; ARGBEGIN{ case 'n': if(nhash || nblocks) usage(); nblocks = unittoull(EARGF(usage())); break; case 'N': if(nhash || nblocks) usage(); nhash = unittoull(EARGF(usage())); if(nhash > BloomMaxHash){ fprint(2, "maximum possible is -N %d", BloomMaxHash); usage(); } break; case 's': size = unittoull(ARGF()); if(size == ~0) usage(); break; default: usage(); break; }ARGEND if(argc != 1) usage(); file = argv[0]; part = initpart(file, ORDWR|ODIRECT); if(part == nil) sysfatal("can't open partition %s: %r", file); if(size == 0) size = part->size; if(size < 1024*1024) sysfatal("bloom filter too small"); if(size > MaxBloomSize){ fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n", size, (vlong)MaxBloomSize); size = MaxBloomSize; } if(size&(size-1)){ for(size2=1; size2= nblocks*64) size2 >>= 1; if(size2 != size){ size = size2; fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n", size/1024/1024); } /* * optimal is to use ln 2 times as many hash functions as we have bits per blocks. */ bits = (8*size)/nblocks; nhash = bits*7/10; if(nhash > BloomMaxHash) nhash = BloomMaxHash; } if(!nhash) nhash = BloomMaxHash; if(bloominit(&b, size, nil) < 0) sysfatal("bloominit: %r"); b.nhash = nhash; bits = nhash*10/7; nblocks = (8*size)/bits; fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks); b.data = vtmallocz(size); b.part = part; if(writebloom(&b) < 0) sysfatal("writing %s: %r", file); threadexitsall(0); }