code: mafs

ref: 5c21eca829380c28769f79beabb7cc51f1787f97
dir: /extents.c/

View raw version
#include <u.h>
#include <libc.h>
#include "dat.h"
#include "fns.h"
#include "extents.h"

/*
 * This allocator allocates disk blocks. 
 *
 * All disk blocks are split into a sequence of blocks of variable size.
 * Each such sequence of blocks is called an extent. Each extent begins
 * with a Tag that denotes it's length in blocks (including the Tag).
 *
 * Free extents (Free*) have a tag of TFree.  They are kept in a binary tree
 * (p->freeroot) traversible by walking ->left and ->right.  Each node of
 * the binary tree is a pointer to a circular doubly-linked list (next, prev)
 * of blocks of identical size.  Blocks are added to this ``tree of lists''
 * by pooladd(), and removed by pooldel().
 *
 * When freed, adjacent extents are coalesced to create larger extents when
 * possible.
 */
/*
 * Verbosity induces the dumping of the pool via p->print at each lock operation.
 * By default, only one line is logged for each alloc, free, and realloc.
 */

Extent *sortbysize(Extents *es, Extent *e);
void	showextent(char *pre, Extent *e);

Extent *
smallest(Extents *es)
{
	Extent *e;

	if(es == nil || es->lru == nil)
		return nil;
	for(e = es->lru; e!=nil && e->small != nil; e=e->small)
		;
	return e;
}

Extent *
biggest(Extents *es)
{
	Extent *e;

	if(es == nil || es->lru == nil)
		return nil;
	for(e = es->lru; e!=nil && e->big != nil; e=e->big)
		;
	return e;
}

Extent *
lowest(Extents *es)
{
	Extent *e;

	if(es == nil || es->lru == nil)
		return nil;
	for(e = es->lru; e!= nil && e->low != nil; e=e->low)
		;
	return e;
}

Extent *
highest(Extents *es)
{
	Extent *e;

	if(es == nil || es->lru == nil)
		return nil;
	for(e = es->lru; e!=nil && e->high != nil; e=e->high)
		;
	return e;
}

Extent *
addsize(Extents *es, Extent *e)
{
	Extent *dsmall, *fbig, *f, *d;

	if(chatty9p > 5)
		print("	+size %llud .. %llud\n", e->blkno, e->blkno+e->len-1);

	for(f = d = smallest(es);
		f != nil;
		f = f->big){
		if(f->len > e->len)
			break;
		if(f->len == e->len && f->blkno > e->blkno)
			break;
		d = f;
	}

	/* at the highest size */
	if(f == nil){
		/* d nil => d e nil */
		d->big = e;
		e->small = d;
	}else{
		/* d f => d e f */
		fbig = f;
		dsmall = fbig->small;
		if(dsmall != nil)
			dsmall->big = e;
		e->small = dsmall;
		e->big = fbig;
		fbig->small = e;
	}
	es->lru = e;
	return e;
}

Extent *
addextent(Extents *es, Extent *e, u64 blkno, u64 len)
{
	Extent *c;

	c = emalloc(sizeof(Extent));
	c->blkno = blkno;
	c->len = len;
	es->n++;
	if(chatty9p > 5)
		print("	+%llud .. %llud\n", blkno, blkno+len-1);

	if(blkno < e->blkno){
		/* e->low e
			e->low c e
		  */
		if(e->low != nil)
			e->low->high = c;
		c->low = e->low;
		e->low = c;
		c->high = e;
		return addsize(es, c);
	}
	if(blkno > e->blkno){
		/* e e->high
			e c e->high
		  */
		if(e->high != nil)
			e->high->low = c;
		c->high = e->high;
		e->high = c;
		c->low = e;
		return addsize(es, c);
	}
	print("addextent: should not be here e->blkno"
			" %llud .. %llud blkno %llud len %llud\n",
			e->blkno, e->blkno+e->len-1, blkno, len);
	abort();
	return nil;
}

/*
	in d e f g
		e = e+f
	becomes d e g
 */
Extent *
mergenext(Extents *es, Extent *e)
{
	Extent *f, *g, *gbig, *esmall;

	if(e == nil)
		return e;

	/* at the highest extent */
	if(e->high == nil)
		return e;
	if(e->blkno + e->len == e->high->blkno){
		f = e->high;
		if(f != nil)
			g = f->high;
		else
			g = nil;
		/* skipping f */
		e->len += f->len;
		if(g != nil)
			g->low = e;
		e->high = g;

		/* remove e from the ordered size list */
		gbig = f->big;
		esmall = f->small;
		if(esmall != nil)
			esmall->big = gbig;
		if(gbig != nil)
			gbig->small = esmall;

		free(f);
		es->n--;
		es->lru = e;
		return e;
	}
	return e;
}

/*
	in c d e f
		d = d+e
	becomes c d f
 */
Extent *
mergeprevious(Extents *es, Extent *e)
{
	Extent *d, *f, *dsmall, *fbig;

	if(e == nil)
		return e;

	/* at the lowest extent */
	if(e->low == nil)
		return e;
	if(e->low->blkno+e->low->len == e->blkno){
		d = e->low;
		f = e->high;
		/* skipping e */
		if(d != nil){
			d->len += e->len;
			d->high = f;
		}
		if(f != nil)
			f->low = d;

		/* remove e from the ordered size list */
		dsmall = e->small;
		fbig = e->big;
		if(dsmall != nil)
			dsmall->big = fbig;
		if(fbig != nil)
			fbig->small = dsmall;

		free(e);
		es->n--;
		es->lru = d;
		return d;
	}
	return e;
}

Extent *
increment(Extents *es, Extent *e, u64 blkno, u64 len)
{
	if(e == nil)
		return e;
	es->lru = e;
	if(blkno+len == e->blkno){
		e->blkno = blkno;
		e->len += len;
		return sortbysize(es, mergeprevious(es, e));
	}
	if(blkno == e->blkno+e->len){
		e->len += len;
		return sortbysize(es, mergenext(es, e));
	}
	print("increment: should not be here"
		" e->blkno %llud .. %llud blkno %llud len %llud\n",
		e->blkno, e->blkno+e->len-1, blkno, len);
	abort();
	return e;
}

int
belongs(Extent *e, u64 blkno, u64 len)
{
	if(e == nil)
		sysfatal("belongs: e == nil");
	if(blkno+len == e->blkno || blkno == e->blkno+e->len)
		return 0;
	if(blkno >= e->blkno && blkno <= e->blkno+e->len)
		panic("belongs: inconsistent"
				" e->blkno %llud e->len %llud"
				" blkno %llud len %llud\n",
				e->blkno, e->len, blkno, len);
	return blkno+len-e->blkno;
}

/*
print("between e->prev %llud .. %llud and e %llud .. %llud\n",
	 e->prev->blkno, e->prev->blkno+e->prev->n-1,
	 e->blkno, e->blkno+e->len-1);
 */
Extent *
doadd(Extents *es, u64 blkno, u64 len)
{
	int dir;
	Extent *e;

	if(es == nil)
		panic("add es == nil");
	if(es->lru == nil){
		e = emalloc(sizeof(Extent));
		e->low = e->high = e->small = e->big = nil;
		e->blkno = blkno;
		e->len = len;
		es->lru = e;
		es->n = 1;
		return e;
	}

	/* using the previously used extent */
	e = es->lru;
	dir = belongs(es->lru, blkno, len);
	if(chatty9p > 5){
		print(" 	belongs(e %llud %llud blkno %llud .. %llud) %d\n",
			 e->blkno, e->blkno+e->len-1, blkno, blkno+len-1, dir);
	}
	if(dir == 0)
		return increment(es, e, blkno, len);
	else if(dir < 0){
		if(e->low == nil)
			/* at the lowest extent, add a prev */
			return addextent(es, e, blkno, len);
		else if(e->low->blkno+e->low->len == blkno)
			/* belongs in the ->low extent */
			return increment(es, e->low, blkno, len);
		else if(e->low->blkno+e->low->len < blkno)
			/* between the e->low and e */
			return addextent(es, e->low, blkno, len);
		else{
			/* search from e->low */
			es->lru = e->low;
			return doadd(es, blkno, len);
		}
	}else if(dir > 0){
		if(e->high == nil)
			/* at the highest extent, add the next */
			return addextent(es, e, blkno, len);
		else if(blkno+len == e->high->blkno)
			/* belongs in the ->high extent */
			return increment(es, e->high, blkno, len);
		else if(blkno+len < e->high->blkno)
			/* between e and e->high */
			return addextent(es, e, blkno, len);
		else{
			es->lru = e->high;
			/* search from e->high */
			return doadd(es, blkno, len);
		}
	}
	/* dir == 0 */
	sysfatal("add: should not be here"
		 " e->blkno %llud .. %llud blkno %llud len %llud\n",
		  e->blkno, e->blkno+e->len-1, blkno, len);
	return e;
}

Extent *
add(Extents *es, u64 blkno, u64 len)
{
	Extent *e;

	if(chatty9p > 5){
		showextents("		before\n", es);
		print(" +%llud %llud\n", blkno, len);
	}
	e = doadd(es, blkno, len);
	es->lru = e;
	if(chatty9p > 5)
		showextents("		after\n", es);
	return e;
}

u64
pluck(Extents *es, Extent *e)
{
	Extent *dlow, *dsmall, *fhigh, *fbig;
	u64 blkno;

	if(es == nil || e == nil || es->lru == nil)
		panic("pluck(): should not happen");

	/* if e is the only entry in es */
	if(e->small == nil && e->big == nil){
		es->lru = nil;
		es->n = 0;
		blkno = e->blkno;
		free(e);
		return blkno;
	}

	/* there are atleast 2 elements in the list */
	/* d e f => d f */
	dsmall = e->small;
	fbig = e->big;
	dlow = e->low;
	fhigh = e->high;

	/* d e nil => d nil */
	if(fhigh == nil){
		dlow->high = nil;
		es->lru = dlow;
	}
	if(fbig == nil)
		dsmall->big = nil;

	/* nil e f => nil e */
	if(dlow == nil){
		fhigh->low = nil;
		es->lru = fhigh;
	}
	if(dsmall == nil)
		fbig->small = nil;

	if(dlow != nil && fhigh != nil){
		dlow->high = fhigh;
		fhigh->low = dlow;
		es->lru = fhigh;
	}
	if(dsmall != nil && fbig != nil){
		dsmall->big = fbig;
		fbig->small = dsmall;
	}
	blkno = e->blkno;
	free(e);
	return blkno;
}

u64
slice(Extents *es, Extent *e, u64 len)
{
	u64 oldblkno, oldlen;

	if(es == nil || e == nil || es->lru == nil || len == 0 || e->len <= len)
		panic("pluck(): should not happen");
	oldlen = e->len;
	oldblkno = pluck(es, e);
	add(es, oldblkno+len, oldlen - len);
	return oldblkno;
}

/* allocate n blocks and return that block number */
u64
balloc(Extents *es, u64 n)
{
	Extent *e;
	u64 blkno;
	char msg[64];

	if(es == nil)
		panic("balloc: es == nil");
	blkno = 0;
	qlock(&es->el);
	if(chatty9p > 5){
		snprint(msg, 64, "balloc() %llud blocks:\n", n);
		showextents(msg, es);
	}
	for(e = smallest(es); e != nil && e->len < n; e = e->big)
		;
	if(e == nil){
		snprint(msg, 64, "balloc() %llud blocks:\n", n);
		showextents(msg, es);
		panic("balloc: out of free blocks");
	}
	else if(e->len == n)
		blkno = pluck(es, e);
	else /* found something bigger */
		blkno = slice(es, e, n);

	qunlock(&es->el);
	return blkno;
}

/* reallocate n blocks to nnew blocks and return that block number
   It is upto the caller to copy the contents and bfree() the old
   block number if the returned block number <> old block number.
   Not providing brealloc() as we would need access to the contents
   to copy stuff over.
 */

/* free n blocks allocated at block number */
void
bfree(Extents *es, u64 blkno, u64 len)
{
	if(es == nil)
		panic("bfree: es == nil");
	qlock(&es->el);
	add(es, blkno, len);
	qunlock(&es->el);
}

/* count the total number of free blocks */
u64
nfrees(Extents *es)
{
	u64 n = 0;
	Extent *e;

	if(es == nil)
		panic("nfrees: es == nil");
	qlock(&es->el);
	for(e = smallest(es); e != nil; e = e->big){
		n += e->len;
	}
	qunlock(&es->el);
	return n;
}

/* string length when the extents are written as a string */
s32
sizeofextents(Extents *es)
{
	u64 n, used;
	s8 tmp[128];
	Extent *e;

	qlock(&es->el);
	used = 0;
	for(e = smallest(es); e != nil; e = e->big){
		n = snprint(tmp, 128, "%llud .. %llud\n",
						e->blkno, e->blkno+e->len-1);
		if(n == 128)
			panic("saveextents(): increase tmp size");
		used += n;
	}
	// keep it locked?
	qunlock(&es->el);
	return used;
}
/* write to *buf return the length written */
s32
saveextents(Extents *es, s8 *buf, u32 nbuf)
{
	u64 n, used;
	s8 tmp[128];
	Extent *e;
	s32 ret;

	qlock(&es->el);
	used = 0;
	for(e = smallest(es); e != nil; e = e->big){
		n = snprint(tmp, 128, "%llud .. %llud\n",
						e->blkno, e->blkno+e->len-1);
		if(n == 128)
			panic("saveextents(): increase tmp size");
		if(used+n > nbuf){
			ret = -1;	/* increase buf size */
			goto end;
		}
		snprint(buf+used, nbuf-used, "%s", tmp);
		used += n;
	}
	ret = used;
	// keep it locked?
end:
	qunlock(&es->el);
	return ret;
}

/* load the extents from buf of length nbuf */
/* make this be called multiple times to add more extents - not needed now */
/* assumes that the input is in ascending order of block numbers */
s32
loadextents(Extents *es, s8 *buf, u32 nbuf)
{
	s8 *p, *ep;
	u64 sblkno, eblkno, nblocks;

	p = buf;
	if(es->lru != nil || es->n != 0){
		panic("extents already loaded.\n"
			"	TODO make loadextents() be called multiple times\n");
	}
	while(*p != 0 && p-buf < nbuf){
		sblkno = strtoull(p, &ep, 10);
		if(p == ep)
			panic("could not read");
		p = ep;
		p += 4; /* skip over ' .. ' */
		eblkno = strtoull(p, &ep, 10);
		if(p == ep)
			panic("could not read");
		nblocks = eblkno - sblkno+1;
		p = ep;
		p++; /* to skip over the new line */
		bfree(es, sblkno, nblocks);
	}
	return es->n;
}

Extent *
sortbelowbysize(Extents *es, Extent *e)
{
	Extent *c, *d, *f;

	if(es == nil)
		panic("sortbysize es == nil");
	if(e == nil)
		return e;

	/* only 1 item, nothing to sort */
	if(e->small == nil && e->big == nil){
		es->lru = e;
		return e;
	}

	/* nothing to sort, at the smallest item */
	if(e->small == nil)
		return e;
	for(d=e->small;
		d != nil;
		d = d->small){
		if(d->len > e->len ||
				(d->len == e->len && d->blkno > e->blkno)){
			/* c d e f => c e d f */
			c = d->small;
			if(c != nil)
				c->big = e;
			f = e->big;
			if(f != nil)
				f->small = d;
			d->big = f;
			e->small = c;
			e->big = d;
			d->small = e;
		}
	}
	return e;
}

Extent *
sortabovebysize(Extents *es, Extent *e)
{
	Extent *d, *f, *g;

	if(es == nil)
		panic("sortbysize es == nil");
	if(e == nil)
		return e;

	/* only 1 item, nothing to sort */
	if(e->small == nil && e->big == nil){
		es->lru = e;
		return e;
	}

	/* nothing to sort, at the biggest item */
	if(e->big == nil)
		return e;
	for(f=e->big;
		f != nil;
		f = f->big){
		if(f->len < e->len ||
				(f->len == e->len && f->blkno < e->blkno)){
			/* d e f g => d f e g */
			g = f->big;
			if(g != nil)
				g->small = e;
			d = e->small;
			if(d != nil)
				d->big = f;
			f->small = d;
			f->big = e;
			e->small = f;
			e->big = g;
		}
	}
	return e;
}

Extent *
sortbysize(Extents *es, Extent *e)
{
	Extent *c;

	if(es == nil)
		panic("sortbysize es == nil");
	if(e == nil)
		return e;

	/* only 1 item, nothing to sort */
	if(e->small == nil && e->big == nil){
		es->lru = e;
		return e;
	}

	/* sort below */
	c = sortbelowbysize(es, e);
	return sortabovebysize(es, c);
}

void
showextent(char *pre, Extent *e)
{
	print("%s small %8#p	low %8#p	e %8#p %llud %llud	high %8#p	big %8#p",
			pre, e->small, e->low, e, e->blkno, e->len, e->high, e->big);
}

void
showextents(char *msg, Extents *es)
{
	Extent *e;

	print("%s", msg);
	for(e = lowest(es); e != nil; e = e->high){
		print("	%llud .. %llud", e->blkno, e->blkno+e->len-1);
//		showextent("	", e);
		print("\n");
	}
	print("			ordered by size\n");
	for(e = smallest(es); e != nil; e = e->big){
		print("	%llud .. %llud has %llud blocks", e->blkno, e->blkno+e->len-1, e->len);
//		showextent("	", e);
		print("\n");
	}
}

void
showblocknos(Extents *es)
{
	Extent *e;
	u64 i;

	for(e = lowest(es); e != nil; e = e->high)
		for(i = e->blkno; i<e->blkno+e->len; i++)
			print("%llud\n", i);
}

void *
emalloc(u32 sz)
{
	void *v;

	if((v = mallocz(sz, 1)) == nil)
		sysfatal("emalloc: %r");

	setmalloctag(v, getcallerpc(&sz));
	return v;
}

s8 *
estrdup(s8 *s)
{
	s8 *p;

	p = strdup(s);
	if(p == nil)
		sysfatal("estrdup: %r");
	setmalloctag(p, getcallerpc(&s));
	return p;
}

s8
find(Extents *es, u64 bno)
{
	Extent *e;

	if(es == nil || es->lru == nil)
		return 0;
	for(e = lowest(es); e != nil; e=e->high){
		if(e->blkno == bno)
			return 1;
		if(e->blkno < bno && bno < e->blkno+e->len)
			return 1;
	}
	return 0;
}