ref: c84d8f133f761569af206b6d76ffeee015ccd3b8
dir: /sys/src/cmd/venti/srv/buildbuck.c/
#include "stdinc.h"
#include "dat.h"
#include "fns.h"
/*
 * An IEStream is a sorted list of index entries.
 */
struct IEStream
{
	Part	*part;
	u64int	off;		/* read position within part */
	u64int	n;		/* number of valid ientries left to read */
	u32int	size;		/* allocated space in buffer */
	u8int	*buf;
	u8int	*pos;		/* current place in buffer */
	u8int	*epos;		/* end of valid buffer contents */
};
IEStream*
initiestream(Part *part, u64int off, u64int clumps, u32int size)
{
	IEStream *ies;
/* out of memory? */
	ies = MKZ(IEStream);
	ies->buf = MKN(u8int, size);
	ies->epos = ies->buf;
	ies->pos = ies->epos;
	ies->off = off;
	ies->n = clumps;
	ies->size = size;
	ies->part = part;
	return ies;
}
void
freeiestream(IEStream *ies)
{
	if(ies == nil)
		return;
	free(ies->buf);
	free(ies);
}
/*
 * Return the next IEntry (still packed) in the stream.
 */
static u8int*
peekientry(IEStream *ies)
{
	u32int n, nn;
	n = ies->epos - ies->pos;
	if(n < IEntrySize){
		memmove(ies->buf, ies->pos, n);
		ies->epos = &ies->buf[n];
		ies->pos = ies->buf;
		nn = ies->size;
		if(nn > ies->n * IEntrySize)
			nn = ies->n * IEntrySize;
		nn -= n;
		if(nn == 0)
			return nil;
//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
		if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
			seterr(EOk, "can't read sorted index entries: %r");
			return nil;
		}
		ies->epos += nn;
		ies->off += nn;
	}
	return ies->pos;
}
/*
 * Compute the bucket number for the given IEntry.
 * Knows that the score is the first thing in the packed
 * representation.
 */
static u32int
iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies)
{
	USED(ies);
	USED(ib);
	return hashbits(b, 32) / ix->div;
}
/*
 * Fill ib with the next bucket in the stream.
 */
u32int
buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
{
	IEntry ie1, ie2;
	u8int *b;
	u32int buck;
	buck = TWID32;
	ib->n = 0;
	while(ies->n){
		b = peekientry(ies);
		if(b == nil)
			return TWID32;
/* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b); */
		if(ib->n == 0)
			buck = iebuck(ix, b, ib, ies);
		else{
			if(buck != iebuck(ix, b, ib, ies))
				break;
			if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
				/*
				 * guess that the larger address is the correct one to use
				 */
				unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
				unpackientry(&ie2, b);
				seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type);
				ib->n--;
				if(ie1.ia.addr > ie2.ia.addr)
					memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
			}
		}
		if((ib->n+1)*IEntrySize > maxdata){
			seterr(EOk, "bucket overflow");
			return TWID32;
		}
		memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
		ib->n++;
		ies->n--;
		ies->pos += IEntrySize;
	}
	return buck;
}