code: mafs

Download patch

ref: c41caa3b068fca0e58bed789548f75c7222e8f46
parent: db24bb2913d859d67a2a428480488dff4de9d3ae
author: 9ferno <gophone2015@gmail.com>
date: Tue Oct 25 15:03:27 EDT 2022

use extents to manage memory usage too

--- a/all.h
+++ b/all.h
@@ -10,6 +10,25 @@
 #include "dat.h"
 #include "extents.h"
 
+enum
+{
+	/*
+	https://cs.stackexchange.com/questions/11029/why-is-it-best-to-use-a-prime-number-as-a-mod-in-a-hashing-function
+		using small numbers for testing
+		using Nbuckets=2 for testing.
+
+	Size of buffer cache is approximately (Nbuckets * Ncollisions * Rawblocksize).
+
+	If you have RAM to spare, increase the Nbuckets instead of
+	Ncollisions as the hash index lookup is faster than searching
+	through a linked list.
+	 */
+	Nbuckets = 256*KiB,	/* number of Hiob, hash buckets */
+	Ncollisions = 4,	/* soft limit after which we start reusing older Iobuf's */
+	Npendingwrites	= MiB,	/* write throttling */
+	Nmemunits = 8*MiB,	/* total memunit usage */
+};
+
 typedef	struct	Hiob	Hiob;
 typedef	struct	Iobuf	Iobuf;
 
@@ -66,11 +85,14 @@
 };
 
 extern	u32	nbuckets;		/* n hash buckets for i/o */
-extern	Extents frees;		/* free extents */
+extern	Extents frees;		/* extents of free blocks on the disk */
 
 #include "fns.h"
 
 /* Iobuf routines - contents of the blocks in memory */
+void	initmemunitpool(u64 nunits);
+u8 *allocmemunit(void);
+void	freememunit(u8 *m);
 int		checktag(Iobuf *p, u16 tag, u64 qpath);
 Iobuf*	getbuf(u64 blkno, u8 readonly, u8 freshalloc);
 Iobuf*	getbufchk(u64 blkno, u8 readonly, int tag, u64 qpath);
--- a/dat.h
+++ b/dat.h
@@ -10,6 +10,11 @@
 
 enum
 {
+	KiB	= 1024ULL,	/* Kibibytes */
+	MiB	= KiB*KiB,	/* Mibibytes */
+	GiB	= KiB*MiB,	/* Gibibytes */
+	TiB	= KiB*GiB,	/* Tibibytes */
+
 	Nusers	= 32,	/* max. number of users */
 	None	= 0,	/* user ID for "none" */
 	Noworld	= 9999,	/* conventional id for "noworld" group */
@@ -19,25 +24,6 @@
 	Msec	= 1000ULL,
 	MAXRPC	= 8192ULL,
 	Nbkp	= 2,
-	KiB	= 1024ULL,	/* Kibibytes */
-	MiB	= KiB*KiB,	/* Mibibytes */
-	GiB	= KiB*MiB,	/* Gibibytes */
-	TiB	= KiB*GiB,	/* Tibibytes */
-
-	/*
-	https://cs.stackexchange.com/questions/11029/why-is-it-best-to-use-a-prime-number-as-a-mod-in-a-hashing-function
-		using small numbers for testing
-		using Nbuckets=2 for testing.
-
-	Size of buffer cache is approximately (Nbuckets * Ncollisions * Rawblocksize).
-
-	If you have RAM to spare, increase the Nbuckets instead of
-	Ncollisions as the hash index lookup is faster than searching
-	through a linked list.
-	 */
-	Nbuckets = 256*KiB,	/* number of Hiob */
-	Ncollisions = 4,	/* soft limit after which we start reusing older Iobuf's */
-	Npendingwrites	= MiB,	/* write throttling */
 
 	/* Qpnone is the Tag.path of free blocks/extents(Tfree),
 		and zero'ed out dentry blocks */
--- a/extents.c
+++ b/extents.c
@@ -5,57 +5,26 @@
 #include "extents.h"
 
 /*
- * This allocator allocates disk blocks. 
+ * Extents are used to manage memory space and disk space. The space is
+ * is split into units of the same size.
  *
- * All disk blocks are split into a sequence of blocks of variable size.
- * Each such sequence of blocks is called an extent. Each extent begins
- * with a Tag that denotes it's length in blocks (including the Tag).
- *
- * Free extents (Free*) have a tag of TFree.  They are kept in a binary tree
- * (p->freeroot) traversible by walking ->left and ->right.  Each node of
- * the binary tree is a pointer to a circular doubly-linked list (next, prev)
- * of blocks of identical size.  Blocks are added to this ``tree of lists''
- * by pooladd(), and removed by pooldel().
- *
- * When freed, adjacent extents are coalesced to create larger extents when
- * possible.
+ * All space is split into a sequence of units. Each such sequence of units
+ * is called an Extent. The data structure Extents is used to contain this
+ * information. Space is added to the Extents using bfree() and allocated
+ * using balloc(). When freed, adjacent extents are coalesced to create a
+ * large extent, if they are continuous.
  */
 
-Extent *sortbysize(Extents *es, Extent *e);
 void	showextent(int fd, char *pre, Extent *e);
 
 Extent *
-smallest(Extents *es)
-{
-	Extent *e;
-
-	if(es == nil || es->lru == nil)
-		return nil;
-	for(e = es->lru; e!=nil && e->small != nil; e=e->small)
-		;
-	return e;
-}
-
-Extent *
-biggest(Extents *es)
-{
-	Extent *e;
-
-	if(es == nil || es->lru == nil)
-		return nil;
-	for(e = es->lru; e!=nil && e->big != nil; e=e->big)
-		;
-	return e;
-}
-
-Extent *
 lowest(Extents *es)
 {
 	Extent *e;
 
-	if(es == nil || es->lru == nil)
+	if(es == nil || es->lru == nil || es->head == nil)
 		return nil;
-	for(e = es->lru; e!= nil && e->low != nil; e=e->low)
+	for(e = es->head; e!= nil && e->low != nil; e=e->low)
 		;
 	return e;
 }
@@ -73,67 +42,33 @@
 }
 
 Extent *
-addsize(Extents *es, Extent *e)
+addextent(Extents *es, Extent *e, u64 start, u64 len)
 {
-	Extent *dsmall, *fbig, *f, *d;
-
-	if(chatty9p > 7)
-		print("	+size %llud .. %llud\n", e->blkno, e->blkno+e->len-1);
-
-	for(f = d = smallest(es);
-		f != nil;
-		f = f->big){
-		if(f->len > e->len)
-			break;
-		if(f->len == e->len && f->blkno > e->blkno)
-			break;
-		d = f;
-	}
-
-	/* at the highest size */
-	if(f == nil){
-		/* d nil => d e nil */
-		d->big = e;
-		e->small = d;
-	}else{
-		/* d f => d e f */
-		fbig = f;
-		dsmall = fbig->small;
-		if(dsmall != nil)
-			dsmall->big = e;
-		e->small = dsmall;
-		e->big = fbig;
-		fbig->small = e;
-	}
-	es->lru = e;
-	return e;
-}
-
-Extent *
-addextent(Extents *es, Extent *e, u64 blkno, u64 len)
-{
 	Extent *c;
 
 	c = emalloc(sizeof(Extent));
-	c->blkno = blkno;
+	c->start = start;
 	c->len = len;
 	es->n++;
 	if(chatty9p > 7)
-		print("	+%llud .. %llud\n", blkno, blkno+len-1);
+		print("	+%llud .. %llud\n", start, start+len-1);
 
-	if(blkno < e->blkno){
-		/* e->low e
+	if(start < e->start){
+		/* e->low e =>
 			e->low c e
 		  */
 		if(e->low != nil)
 			e->low->high = c;
+		else
+			es->head = c;
 		c->low = e->low;
 		e->low = c;
 		c->high = e;
-		return addsize(es, c);
+		es->lru = c;
+		return c;
 	}
-	if(blkno > e->blkno){
-		/* e e->high
+	if(start > e->start){
+		/* e e->high =>
 			e c e->high
 		  */
 		if(e->high != nil)
@@ -141,11 +76,12 @@
 		c->high = e->high;
 		e->high = c;
 		c->low = e;
-		return addsize(es, c);
+		es->lru = c;
+		return c;
 	}
-	print("addextent: should not be here e->blkno"
-			" %llud .. %llud blkno %llud len %llud\n",
-			e->blkno, e->blkno+e->len-1, blkno, len);
+	print("addextent: should not be here e->start"
+			" %llud .. %llud start %llud len %llud\n",
+			e->start, e->start+e->len-1, start, len);
 	abort();
 	return nil;
 }
@@ -158,7 +94,7 @@
 Extent *
 mergenext(Extents *es, Extent *e)
 {
-	Extent *f, *g, *gbig, *esmall;
+	Extent *f, *g;
 
 	if(e == nil)
 		return e;
@@ -166,7 +102,7 @@
 	/* at the highest extent */
 	if(e->high == nil)
 		return e;
-	if(e->blkno + e->len == e->high->blkno){
+	if(e->start + e->len == e->high->start){
 		f = e->high;
 		if(f != nil)
 			g = f->high;
@@ -178,14 +114,6 @@
 			g->low = e;
 		e->high = g;
 
-		/* remove e from the ordered size list */
-		gbig = f->big;
-		esmall = f->small;
-		if(esmall != nil)
-			esmall->big = gbig;
-		if(gbig != nil)
-			gbig->small = esmall;
-
 		free(f);
 		es->n--;
 		es->lru = e;
@@ -202,7 +130,7 @@
 Extent *
 mergeprevious(Extents *es, Extent *e)
 {
-	Extent *d, *f, *dsmall, *fbig;
+	Extent *d, *f;
 
 	if(e == nil)
 		return e;
@@ -210,7 +138,7 @@
 	/* at the lowest extent */
 	if(e->low == nil)
 		return e;
-	if(e->low->blkno+e->low->len == e->blkno){
+	if(e->low->start+e->low->len == e->start){
 		d = e->low;
 		f = e->high;
 		/* skipping e */
@@ -221,14 +149,6 @@
 		if(f != nil)
 			f->low = d;
 
-		/* remove e from the ordered size list */
-		dsmall = e->small;
-		fbig = e->big;
-		if(dsmall != nil)
-			dsmall->big = fbig;
-		if(fbig != nil)
-			fbig->small = dsmall;
-
 		free(e);
 		es->n--;
 		es->lru = d;
@@ -238,49 +158,49 @@
 }
 
 Extent *
-increment(Extents *es, Extent *e, u64 blkno, u64 len)
+increment(Extents *es, Extent *e, u64 start, u64 len)
 {
 	if(e == nil)
 		return e;
 	es->lru = e;
-	if(blkno+len == e->blkno){
-		e->blkno = blkno;
+	if(start+len == e->start){
+		e->start = start;
 		e->len += len;
-		return sortbysize(es, mergeprevious(es, e));
+		return mergeprevious(es, e);
 	}
-	if(blkno == e->blkno+e->len){
+	if(start == e->start+e->len){
 		e->len += len;
-		return sortbysize(es, mergenext(es, e));
+		return mergenext(es, e);
 	}
 	print("increment: should not be here"
-		" e->blkno %llud .. %llud blkno %llud len %llud\n",
-		e->blkno, e->blkno+e->len-1, blkno, len);
+		" e->start %llud .. %llud start %llud len %llud\n",
+		e->start, e->start+e->len-1, start, len);
 	abort();
 	return e;
 }
 
 int
-belongs(Extent *e, u64 blkno, u64 len)
+belongs(Extent *e, u64 start, u64 len)
 {
 	if(e == nil)
 		sysfatal("belongs: e == nil");
-	if(blkno+len == e->blkno || blkno == e->blkno+e->len)
+	if(start+len == e->start || start == e->start+e->len)
 		return 0;
-	if(blkno >= e->blkno && blkno <= e->blkno+e->len)
+	if(start >= e->start && start <= e->start+e->len)
 		panic("belongs: inconsistent"
-				" e->blkno %llud e->len %llud"
-				" blkno %llud len %llud\n",
-				e->blkno, e->len, blkno, len);
-	return blkno+len-e->blkno;
+				" e->start %llud e->len %llud"
+				" start %llud len %llud\n",
+				e->start, e->len, start, len);
+	return start+len-e->start;
 }
 
 /*
 print("between e->prev %llud .. %llud and e %llud .. %llud\n",
-	 e->prev->blkno, e->prev->blkno+e->prev->n-1,
-	 e->blkno, e->blkno+e->len-1);
+	 e->prev->start, e->prev->start+e->prev->n-1,
+	 e->start, e->start+e->len-1);
  */
 Extent *
-doadd(Extents *es, u64 blkno, u64 len)
+doadd(Extents *es, u64 start, u64 len)
 {
 	int dir;
 	Extent *e;
@@ -289,10 +209,10 @@
 		panic("add es == nil");
 	if(es->lru == nil){
 		e = emalloc(sizeof(Extent));
-		e->low = e->high = e->small = e->big = nil;
-		e->blkno = blkno;
+		e->low = e->high = nil;
+		e->start = start;
 		e->len = len;
-		es->lru = e;
+		es->lru = es->head = e;
 		es->n = 1;
 		return e;
 	}
@@ -299,61 +219,61 @@
 
 	/* using the previously used extent */
 	e = es->lru;
-	dir = belongs(es->lru, blkno, len);
+	dir = belongs(es->lru, start, len);
 	if(chatty9p > 7){
-		print(" 	belongs(e %llud %llud blkno %llud .. %llud) %d\n",
-			 e->blkno, e->blkno+e->len-1, blkno, blkno+len-1, dir);
+		print(" 	belongs(e %llud %llud start %llud .. %llud) %d\n",
+			 e->start, e->start+e->len-1, start, start+len-1, dir);
 	}
 	if(dir == 0)
-		return increment(es, e, blkno, len);
+		return increment(es, e, start, len);
 	else if(dir < 0){
 		if(e->low == nil)
 			/* at the lowest extent, add a prev */
-			return addextent(es, e, blkno, len);
-		else if(e->low->blkno+e->low->len == blkno)
+			return addextent(es, e, start, len);
+		else if(e->low->start+e->low->len == start)
 			/* belongs in the ->low extent */
-			return increment(es, e->low, blkno, len);
-		else if(e->low->blkno+e->low->len < blkno)
+			return increment(es, e->low, start, len);
+		else if(e->low->start+e->low->len < start)
 			/* between the e->low and e */
-			return addextent(es, e->low, blkno, len);
+			return addextent(es, e->low, start, len);
 		else{
 			/* search from e->low */
 			es->lru = e->low;
-			return doadd(es, blkno, len);
+			return doadd(es, start, len);
 		}
 	}else if(dir > 0){
 		if(e->high == nil)
 			/* at the highest extent, add the next */
-			return addextent(es, e, blkno, len);
-		else if(blkno+len == e->high->blkno)
+			return addextent(es, e, start, len);
+		else if(start+len == e->high->start)
 			/* belongs in the ->high extent */
-			return increment(es, e->high, blkno, len);
-		else if(blkno+len < e->high->blkno)
+			return increment(es, e->high, start, len);
+		else if(start+len < e->high->start)
 			/* between e and e->high */
-			return addextent(es, e, blkno, len);
+			return addextent(es, e, start, len);
 		else{
 			es->lru = e->high;
 			/* search from e->high */
-			return doadd(es, blkno, len);
+			return doadd(es, start, len);
 		}
 	}
 	/* dir == 0 */
 	sysfatal("add: should not be here"
-		 " e->blkno %llud .. %llud blkno %llud len %llud\n",
-		  e->blkno, e->blkno+e->len-1, blkno, len);
+		 " e->start %llud .. %llud start %llud len %llud\n",
+		  e->start, e->start+e->len-1, start, len);
 	return e;
 }
 
 Extent *
-add(Extents *es, u64 blkno, u64 len)
+add(Extents *es, u64 start, u64 len)
 {
 	Extent *e;
 
 /*	if(chatty9p > 7){
 		showextents(2, "		before\n", es);
-		fprint(2, " +%llud %llud\n", blkno, len);
+		fprint(2, " +%llud %llud\n", start, len);
 	}*/
-	e = doadd(es, blkno, len);
+	e = doadd(es, start, len);
 	es->lru = e;
 /*	if(chatty9p > 7)
 		showextents(2, "		after\n", es);*/
@@ -363,25 +283,23 @@
 static u64
 pluck(Extents *es, Extent *e)
 {
-	Extent *dlow, *dsmall, *fhigh, *fbig;
-	u64 blkno;
+	Extent *dlow, *fhigh;
+	u64 start;
 
 	if(es == nil || e == nil || es->lru == nil)
 		panic("pluck(): should not happen");
 
 	/* if e is the only entry in es */
-	if(e->small == nil && e->big == nil){
-		es->lru = nil;
+	if(e->low == nil && e->high == nil){
+		es->lru = es->head = nil;
 		es->n = 0;
-		blkno = e->blkno;
+		start = e->start;
 		free(e);
-		return blkno;
+		return start;
 	}
 
 	/* there are atleast 2 elements in the list */
 	/* d e f => d f */
-	dsmall = e->small;
-	fbig = e->big;
 	dlow = e->low;
 	fhigh = e->high;
 
@@ -390,16 +308,12 @@
 		dlow->high = nil;
 		es->lru = dlow;
 	}
-	if(fbig == nil)
-		dsmall->big = nil;
 
-	/* nil e f => nil e */
+	/* nil e f => nil f */
 	if(dlow == nil){
 		fhigh->low = nil;
-		es->lru = fhigh;
+		es->lru = es->head = fhigh;
 	}
-	if(dsmall == nil)
-		fbig->small = nil;
 
 	if(dlow != nil && fhigh != nil){
 		dlow->high = fhigh;
@@ -406,26 +320,22 @@
 		fhigh->low = dlow;
 		es->lru = fhigh;
 	}
-	if(dsmall != nil && fbig != nil){
-		dsmall->big = fbig;
-		fbig->small = dsmall;
-	}
-	blkno = e->blkno;
+	start = e->start;
 	free(e);
-	return blkno;
+	return start;
 }
 
 u64
 slice(Extents *es, Extent *e, u64 len)
 {
-	u64 oldblkno, oldlen;
+	u64 oldstart, oldlen;
 
 	if(es == nil || e == nil || es->lru == nil || len == 0 || e->len <= len)
 		panic("pluck(): should not happen");
 	oldlen = e->len;
-	oldblkno = pluck(es, e);
-	add(es, oldblkno+len, oldlen - len);
-	return oldblkno;
+	oldstart = pluck(es, e);
+	add(es, oldstart+len, oldlen - len);
+	return oldstart;
 }
 
 /* allocate n blocks and return that block number */
@@ -433,18 +343,21 @@
 balloc(Extents *es, u64 n)
 {
 	Extent *e;
-	u64 blkno;
+	u64 start;
 	// char msg[64];
 
 	if(es == nil)
 		panic("balloc: es == nil");
-	blkno = 0;
-	qlock(&es->el);
+	start = 0;
+	qlock(&es->lck);
+	if(es->n == 0)
+		rsleep(&es->isempty);
 /*	if(chatty9p > 7){
 		snprint(msg, 64, "balloc() %llud blocks:\n", n);
 		showextents(2, msg, es);
 	}*/
-	for(e = smallest(es); e != nil && e->len < n; e = e->big)
+	/* TODO get rid of this call to the lowest by using Extent *head */
+	for(e = lowest(es); e != nil && e->len < n; e = e->high)
 		;
 	if(e == nil){
 		// snprint(msg, 64, "balloc() %llud blocks:\n", n);
@@ -452,12 +365,12 @@
 		panic("balloc: out of free blocks");
 	}
 	else if(e->len == n)
-		blkno = pluck(es, e);
+		start = pluck(es, e);
 	else /* found something bigger */
-		blkno = slice(es, e, n);
+		start = slice(es, e, n);
 
-	qunlock(&es->el);
-	return blkno;
+	qunlock(&es->lck);
+	return start;
 }
 
 /* reallocate n blocks to nnew blocks and return that block number
@@ -469,13 +382,15 @@
 
 /* free n blocks allocated at block number */
 void
-bfree(Extents *es, u64 blkno, u64 len)
+bfree(Extents *es, u64 start, u64 len)
 {
 	if(es == nil)
 		panic("bfree: es == nil");
-	qlock(&es->el);
-	add(es, blkno, len);
-	qunlock(&es->el);
+	qlock(&es->lck);
+	add(es, start, len);
+	if(es->n == 1)
+		rwakeup(&es->isempty);
+	qunlock(&es->lck);
 }
 
 /* count the total number of free blocks */
@@ -487,11 +402,11 @@
 
 	if(es == nil)
 		panic("nfrees: es == nil");
-	qlock(&es->el);
-	for(e = smallest(es); e != nil; e = e->big){
+	qlock(&es->lck);
+	for(e = lowest(es); e != nil; e = e->high){
 		n += e->len;
 	}
-	qunlock(&es->el);
+	qunlock(&es->lck);
 	return n;
 }
 
@@ -503,17 +418,17 @@
 	s8 tmp[128];
 	Extent *e;
 
-	qlock(&es->el);
+	qlock(&es->lck);
 	used = 0;
-	for(e = smallest(es); e != nil; e = e->big){
+	for(e = lowest(es); e != nil; e = e->high){
 		n = snprint(tmp, 128, "%llud .. %llud\n",
-						e->blkno, e->blkno+e->len-1);
+						e->start, e->start+e->len-1);
 		if(n == 128)
 			panic("saveextents(): increase tmp size");
 		used += n;
 	}
 	// keep it locked?
-	qunlock(&es->el);
+	qunlock(&es->lck);
 	return used;
 }
 /* write to *buf return the length written */
@@ -525,11 +440,11 @@
 	Extent *e;
 	s32 ret;
 
-	qlock(&es->el);
+	qlock(&es->lck);
 	used = 0;
-	for(e = smallest(es); e != nil; e = e->big){
+	for(e = lowest(es); e != nil; e = e->high){
 		n = snprint(tmp, 128, "%llud .. %llud\n",
-						e->blkno, e->blkno+e->len-1);
+						e->start, e->start+e->len-1);
 		if(n == 128)
 			panic("saveextents(): increase tmp size");
 		if(used+n > nbuf){
@@ -542,7 +457,7 @@
 	ret = used;
 	// keep it locked?
 end:
-	qunlock(&es->el);
+	qunlock(&es->lck);
 	return ret;
 }
 
@@ -553,7 +468,7 @@
 loadextents(Extents *es, s8 *buf, u32 nbuf)
 {
 	s8 *p, *ep;
-	u64 sblkno, eblkno, nblocks;
+	u64 sstart, estart, nblocks;
 
 	p = buf;
 	if(es->lru != nil || es->n != 0){
@@ -561,128 +476,27 @@
 			"	TODO make loadextents() be called multiple times\n");
 	}
 	while(*p != 0 && p-buf < nbuf){
-		sblkno = strtoull(p, &ep, 10);
+		sstart = strtoull(p, &ep, 10);
 		if(p == ep)
 			panic("could not read");
 		p = ep;
 		p += 4; /* skip over ' .. ' */
-		eblkno = strtoull(p, &ep, 10);
+		estart = strtoull(p, &ep, 10);
 		if(p == ep)
 			panic("could not read");
-		nblocks = eblkno - sblkno+1;
+		nblocks = estart - sstart+1;
 		p = ep;
 		p++; /* to skip over the new line */
-		bfree(es, sblkno, nblocks);
+		bfree(es, sstart, nblocks);
 	}
 	return es->n;
 }
 
-Extent *
-sortbelowbysize(Extents *es, Extent *e)
-{
-	Extent *c, *d, *f;
-
-	if(es == nil)
-		panic("sortbysize es == nil");
-	if(e == nil)
-		return e;
-
-	/* only 1 item, nothing to sort */
-	if(e->small == nil && e->big == nil){
-		es->lru = e;
-		return e;
-	}
-
-	/* nothing to sort, at the smallest item */
-	if(e->small == nil)
-		return e;
-	for(d=e->small;
-		d != nil;
-		d = d->small){
-		if(d->len > e->len ||
-				(d->len == e->len && d->blkno > e->blkno)){
-			/* c d e f => c e d f */
-			c = d->small;
-			if(c != nil)
-				c->big = e;
-			f = e->big;
-			if(f != nil)
-				f->small = d;
-			d->big = f;
-			e->small = c;
-			e->big = d;
-			d->small = e;
-		}
-	}
-	return e;
-}
-
-Extent *
-sortabovebysize(Extents *es, Extent *e)
-{
-	Extent *d, *f, *g;
-
-	if(es == nil)
-		panic("sortbysize es == nil");
-	if(e == nil)
-		return e;
-
-	/* only 1 item, nothing to sort */
-	if(e->small == nil && e->big == nil){
-		es->lru = e;
-		return e;
-	}
-
-	/* nothing to sort, at the biggest item */
-	if(e->big == nil)
-		return e;
-	for(f=e->big;
-		f != nil;
-		f = f->big){
-		if(f->len < e->len ||
-				(f->len == e->len && f->blkno < e->blkno)){
-			/* d e f g => d f e g */
-			g = f->big;
-			if(g != nil)
-				g->small = e;
-			d = e->small;
-			if(d != nil)
-				d->big = f;
-			f->small = d;
-			f->big = e;
-			e->small = f;
-			e->big = g;
-		}
-	}
-	return e;
-}
-
-Extent *
-sortbysize(Extents *es, Extent *e)
-{
-	Extent *c;
-
-	if(es == nil)
-		panic("sortbysize es == nil");
-	if(e == nil)
-		return e;
-
-	/* only 1 item, nothing to sort */
-	if(e->small == nil && e->big == nil){
-		es->lru = e;
-		return e;
-	}
-
-	/* sort below */
-	c = sortbelowbysize(es, e);
-	return sortabovebysize(es, c);
-}
-
 void
 showextent(int fd, char *pre, Extent *e)
 {
-	fprint(fd, "%s small %8#p	low %8#p	e %8#p %llud %llud	high %8#p	big %8#p",
-			pre, e->small, e->low, e, e->blkno, e->len, e->high, e->big);
+	fprint(fd, "%s	low %8#p	e %8#p %llud %llud	high %8#p",
+			pre, e->low, e, e->start, e->len, e->high);
 }
 
 void
@@ -692,16 +506,10 @@
 
 	fprint(fd, "%s", msg);
 	for(e = lowest(es); e != nil; e = e->high){
-		fprint(fd, "	%llud .. %llud", e->blkno, e->blkno+e->len-1);
+		fprint(fd, "	%llud .. %llud", e->start, e->start+e->len-1);
 //		showextent(fd, "	", e);
 		fprint(fd, "\n");
 	}
-	fprint(fd, "			ordered by size\n");
-	for(e = smallest(es); e != nil; e = e->big){
-		fprint(fd, "	%llud .. %llud has %llud blocks", e->blkno, e->blkno+e->len-1, e->len);
-//		showextent(fd, "	", e);
-		fprint(fd, "\n");
-	}
 }
 
 void
@@ -711,7 +519,7 @@
 	u64 i;
 
 	for(e = lowest(es); e != nil; e = e->high)
-		for(i = e->blkno; i<e->blkno+e->len; i++)
+		for(i = e->start; i<e->start+e->len; i++)
 			fprint(fd, "%llud\n", i);
 }
 
@@ -747,10 +555,16 @@
 	if(es == nil || es->lru == nil)
 		return 0;
 	for(e = lowest(es); e != nil; e=e->high){
-		if(e->blkno == bno)
+		if(e->start == bno)
 			return 1;
-		if(e->blkno < bno && bno < e->blkno+e->len)
+		if(e->start < bno && bno < e->start+e->len)
 			return 1;
 	}
 	return 0;
+}
+
+void
+initextents(Extents *es)
+{
+	es->isempty.l = &es->lck;
 }
--- a/extents.h
+++ b/extents.h
@@ -12,15 +12,16 @@
 2. If no extent of the len we need is available, then break up the smallest extent.
 */
 struct Extent {
-	struct Extent *low, *high;	/* sorted by block number */
-	struct Extent *small, *big;/* sorted by the number of blocks in this extent */
-	u64 blkno;					/* block number */
-	u64 len; 					/* in blocks */
+	struct Extent *low, *high;	/* sorted by start */
+	u64 start;					/* where this extent starts from */
+	u64 len; 					/* how many units in this extent */
 };
 struct Extents {
 	Extent *lru;	/* least recently used extent */
-	QLock el;
+	Extent *head;	/* find the first block in a jiffy */
+	QLock lck;
 	u32 n;			/* number of extents */
+	Rendez isempty; 	/* fully used, nothing available */
 };
 
 extern int chatty9p;
@@ -41,4 +42,4 @@
 u64	 nfrees(Extents *es);
 
 Extent *lowest(Extents *es);
-
+void	initextents(Extents *es);
--- a/iobuf.c
+++ b/iobuf.c
@@ -2,9 +2,45 @@
 
 u32  nbuckets = 0;	/* nbuckets derived from -m or Nbuckets */
 Hiob *hiob = nil;	/* array of nbuckets */
-Extents frees = {0};
+Extents frees = {0};/* extents of free blocks on the disk */
 
 /*
+	extents of Rawblocksize units of memory used to store
+	the disk block contents in memory for the buffer cache
+	and write queue
+ */
+Extents memunits = {0};
+u8 *memunitpool = 0;
+
+void
+initmemunitpool(u64 nunits)
+{
+	memunitpool = sbrk(nunits * Rawblocksize);
+	initextents(&memunits);
+}
+
+u8 *
+allocmemunit(void)
+{
+	u64 m;
+
+	m = balloc(&memunits, 1);
+	if(m == 0)
+		panic("allocmemunit: no memory available");
+
+	return memunitpool+m;
+}
+
+void
+freememunit(u8 *m)
+{
+	if(m == 0)
+		panic("freememunit: m == 0\n");
+	bfree(&memunits, m-memunitpool, 1);
+}
+
+
+/*
    add an Iobuf to the collisions lru linked list
    hp must be locked
  */
@@ -27,7 +63,7 @@
 		p->back = p;
 	}
 	p->blkno = 0;
-	p->xiobuf = emalloc9p(Rawblocksize);
+	p->xiobuf = allocmemunit();
 	return p;
 }
 
--- a/mafs.c
+++ b/mafs.c
@@ -33,7 +33,7 @@
 static void
 usage(void)
 {
-	fprint(2, "usage: mafs [-Ds] [-r service] [-n service] [-h nbuckets] [-w npendingwrites] [-a announce-string]... fsfile\n");
+	fprint(2, "usage: mafs [-Ds] [-r service] [-n service] [-h nbuckets] [-w npendingwrites] [-m nmemunits] [-a announce-string]... fsfile\n");
 	exits("usage");
 }
 
@@ -44,6 +44,7 @@
 	int doream, stdio, netc;
 	char buf[Namelen];
 	int pid, ctl;
+	u64 nmemunits;
 
 	progname = "mafs";
 	procname = "init";
@@ -57,6 +58,7 @@
 
 	nbuckets = Nbuckets;
 	npendingwrites = Npendingwrites;
+	nmemunits = Nmemunits;
 	sfd = -1;
 	doream = stdio = netc = 0;
 
@@ -70,8 +72,9 @@
 	default:	usage();
 	case 'D':	chatty9p++; break;
 	case 'f':	devfile = ARGF(); break;
-	case 'h':	nbuckets = atoi(EARGF(usage())); break;
-	case 'w':	npendingwrites = atoi(EARGF(usage())); break;
+	case 'h':	nbuckets = atoll(EARGF(usage())); break;
+	case 'w':	npendingwrites = atoll(EARGF(usage())); break;
+	case 'm':	nmemunits = atoll(EARGF(usage())); break;
 	case 'r':
 		doream = 1;
 		/* fall through */
@@ -109,6 +112,8 @@
 
 	formatinit();
 
+	initmemunitpool(nmemunits);
+	initextents(&frees);
 	tlocks = emalloc9p(NTLOCK * sizeof *tlocks);
 	initwriter();
 	iobufinit();
--- a/reconcile.c
+++ b/reconcile.c
@@ -128,7 +128,7 @@
 		return;
 	}
 	for(e = lowest(u->es); e != nil; e=e->high){
-		for(bno = e->blkno; bno<e->blkno+e->len; bno++){
+		for(bno = e->start; bno<e->start+e->len; bno++){
 			if(find(f->es, bno) == 1)
 				print(" %llud", bno);
 		}
--- a/writer.c
+++ b/writer.c
@@ -25,8 +25,8 @@
 	Wbuf *prev, *next;
 	Iobuf *iobuf;	/* pointer to the used Iobuf in the buffer cache */
 	union{
-		u8	payload;	/* "real" contents */
-		Content io;	/* cast'able to contents */
+		u8	*payload;	/* "real" contents */
+		Content *io;	/* cast'able to contents */
 	};
 };
 
@@ -114,11 +114,12 @@
 		dprint("putwrite start p->blkno %llud\n", b->blkno);
 		stats();
 	}
-	if(drts.n > Npendingwrites)
+	if(drts.n > npendingwrites)
 		rsleep(&drts.isfull);
-	w = emalloc9p(sizeof(Wbuf)+Rawblocksize);
+	w = emalloc9p(sizeof(Wbuf));
 	w->blkno = b->blkno;
-	memcpy(&w->io, b->io, Rawblocksize);
+	w->payload = allocmemunit();
+	memcpy(w->io, b->io, Rawblocksize);
 	if(drts.head == nil){
 		drts.head = drts.tail = w;
 	}else{
@@ -153,13 +154,13 @@
 	}
 	if(chatty9p > 4)
 		dprint("dowrite p->blkno %llud locked\n", p->blkno);
-	p->io.dirty = 1;
-	if((n = devwrite(p->blkno, &p->payload)) != Rawblocksize){
+	p->io->dirty = 1;
+	if((n = devwrite(p->blkno, p->payload)) != Rawblocksize){
 		dprint("%s\n", errstring[Esystem]);
 		panic("error writing block %llud: %llud bytes: %r\n",
 				p->blkno, n);
 	}
-	p->io.dirty = 0;
+	p->io->dirty = 0;
 	devwritedirtyclear(p->blkno);
 	n = p->blkno;
 	if(chatty9p > 4)
@@ -171,6 +172,7 @@
 		bfree(&frees, p->iobuf->blkno, 1);
 	}
 	wunlock(p->iobuf);
+	freememunit(p->payload);
 	free(p);
 }