code: plan9front

Download patch

ref: 843b2168c6a41b5fd91a7e9993d14dc0c186d4ca
parent: 90d8e4d9442cd3219211f12a2cc66ee5418c6b57
author: Ori Bernstein <ori@eigenstate.org>
date: Sun May 26 20:42:36 EDT 2024

gefs: tweak block address heuristic to sequentialize allocation

When churning through data blocks and writing out of order, we would
copy blocks and release them often, leading to gaps at the back of
the file. Retrying an allocation could also lead to allocating a block
in the same arena as the metadata.

This would leave gaps in the allocated files, which would cause seeks
when accessing the data later.

This set of changes allocates blocks that are expected to be accessed
sequentially from the start of the arena, and blocks that are expected
to be accessed out of order at the end of the arena.

For full data blocks (ie, where the write is at the end of the block),
we assume that the accesses will be sequential. For partial data blocks,
when the write offset is not at the end of the data block, we assume
that future appends are inbound, and we allocate the block in the non
sequential part of the arena. As a result, when we eventuall fill the
block, we will allocate the sequential block.

This doesn't help us too much if we have writes to files interleaved,
or we overwrite the same section of a file over and over, but it's
better than nothing.

--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -7,8 +7,8 @@
 #include "fns.h"
 #include "atomic.h"
 
-static vlong	blkalloc_lk(Arena*);
-static vlong	blkalloc(int, uint);
+static vlong	blkalloc_lk(Arena*, int);
+static vlong	blkalloc(int, uint, int);
 static void	blkdealloc_lk(Arena*, vlong);
 static Blk*	initblk(Blk*, vlong, vlong, int);
 
@@ -286,7 +286,7 @@
 	 * and chaining.
 	 */
 	if(lb == nil || lb->logsz >= Logspc - Logslop){
-		o = blkalloc_lk(a);
+		o = blkalloc_lk(a, 0);
 		if(o == -1)
 			error(Efull);
 		nl = mklogblk(a, o);
@@ -430,7 +430,7 @@
 		nexterror();
 	}
 	for(i = 0; i < nblks; i++){
-		blks[i] = blkalloc_lk(a);
+		blks[i] = blkalloc_lk(a, 1);
 		if(blks[i] == -1)
 			error(Efull);
 	}
@@ -495,14 +495,15 @@
  * the alloc log.
  */
 static vlong
-blkalloc_lk(Arena *a)
+blkalloc_lk(Arena *a, int seq)
 {
-	Avltree *t;
 	Arange *r;
 	vlong b;
 
-	t = a->free;
-	r = (Arange*)t->root;
+	if(seq)
+		r = (Arange*)avlmin(a->free);
+	else
+		r = (Arange*)avlmax(a->free);
 	if(!usereserve && a->size - a->used <= a->reserve)
 		return -1;
 	if(r == nil)
@@ -515,11 +516,16 @@
 	 * the sort order because the tree
 	 * covers disjoint ranges
 	 */
-	b = r->off;
-	r->len -= Blksz;
-	r->off += Blksz;
+	if(seq){
+		b = r->off;
+		r->len -= Blksz;
+		r->off += Blksz;
+	}else{
+		r->len -= Blksz;
+		b = r->off + r->len;
+	}
 	if(r->len == 0){
-		avldelete(t, r);
+		avldelete(a->free, r);
 		free(r);
 	}
 	a->used += Blksz;
@@ -548,7 +554,7 @@
 }
 
 static vlong
-blkalloc(int ty, uint hint)
+blkalloc(int ty, uint hint, int seq)
 {
 	Arena *a;
 	vlong b;
@@ -575,7 +581,7 @@
 		qunlock(a);
 		nexterror();
 	}
-	b = blkalloc_lk(a);
+	b = blkalloc_lk(a, seq);
 	if(b == -1){
 		qunlock(a);
 		poperror();
@@ -633,13 +639,28 @@
 }
 
 Blk*
-newblk(Tree *t, int ty, vlong hint)
+newdblk(Tree *t, vlong hint, int seq)
 {
 	vlong bp;
 	Blk *b;
 
-	bp = blkalloc(ty, hint);
+	bp = blkalloc(Tdat, hint, seq);
 	b = cachepluck();
+	initblk(b, bp, t->memgen, Tdat);
+	b->alloced = getcallerpc(&t);
+	tracex("newblk" , b->bp, Tdat, -1);
+	return b;
+
+}
+
+Blk*
+newblk(Tree *t, int ty)
+{
+	vlong bp;
+	Blk *b;
+
+	bp = blkalloc(ty, 0, 0);
+	b = cachepluck();
 	initblk(b, bp, t->memgen, ty);
 	b->alloced = getcallerpc(&t);
 	tracex("newblk" , b->bp, ty, -1);
@@ -651,7 +672,7 @@
 {
 	Blk *r;
 
-	if((r = newblk(t, b->type, 0)) == nil)
+	if((r = newblk(t, b->type)) == nil)
 		return nil;
 
 	tracex("dup" , b->bp, b->type, t->gen);
--- a/sys/src/cmd/gefs/fns.h
+++ b/sys/src/cmd/gefs/fns.h
@@ -35,8 +35,9 @@
 
 void*	emalloc(usize, int);
 
-Blk*	newblk(Tree *, int, vlong);
-Blk*	dupblk(Tree *, Blk*);
+Blk*	newdblk(Tree*, vlong, int);
+Blk*	newblk(Tree*, int);
+Blk*	dupblk(Tree*, Blk*);
 Blk*	getroot(Tree*, int*);
 Blk*	getblk(Bptr, int);
 Blk*	holdblk(Blk*);
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -471,6 +471,7 @@
 	char buf[Kvmax];
 	vlong fb, fo;
 	Blk *b, *t;
+	int seq;
 	Tree *r;
 	Bptr bp;
 	Kvp kv;
@@ -482,7 +483,11 @@
 	PACK64(m->k+1, f->qpath);
 	PACK64(m->k+9, fb);
 
-	b = newblk(f->mnt->root, Tdat, f->qpath);
+	if(fo+n >= Blksz)
+		seq = 1;
+	else
+		seq = 0;
+	b = newdblk(f->mnt->root, f->qpath, seq);
 	t = nil;
 	r = f->mnt->root;
 	if(btlookup(r, m, &kv, buf, sizeof(buf))){
--- a/sys/src/cmd/gefs/ream.c
+++ b/sys/src/cmd/gefs/ream.c
@@ -286,7 +286,7 @@
 		loadlog(a, a->loghd);
 	}
 
-	if((mb = newblk(mnt->root, Tleaf, 0)) == nil)
+	if((mb = newblk(mnt->root, Tleaf)) == nil)
 		sysfatal("ream: allocate root: %r");
 	holdblk(mb);
 	initroot(mb);
@@ -296,9 +296,9 @@
 	mnt->root->ht = 1;
 	mnt->root->bp = mb->bp;
 
-	if((ab = newblk(adm->root, Tleaf, 0)) == nil)
+	if((ab = newblk(adm->root, Tleaf)) == nil)
 		sysfatal("ream: allocate root: %r");
-	if((ub = newblk(adm->root, Tdat, 0)) == nil)
+	if((ub = newdblk(adm->root, 0, 1)) == nil)
 		sysfatal("ream: allocate root: %r");
 	holdblk(ab);
 	holdblk(ub);
@@ -322,7 +322,7 @@
 	 * a single snap block that the tree will insert
 	 * into, and take a snapshot as the initial state.
 	 */
-	if((tb = newblk(mnt->root, Tleaf, 0)) == nil)
+	if((tb = newblk(mnt->root, Tleaf)) == nil)
 		sysfatal("ream: allocate snaps: %r");
 	holdblk(tb);
 	initsnap(tb, mb, ab);
--- a/sys/src/cmd/gefs/snap.c
+++ b/sys/src/cmd/gefs/snap.c
@@ -596,7 +596,7 @@
 		nexterror();
 	}
 	if(dl->ins == nil || Logspc - dl->ins->logsz < Logslop){
-		b = newblk(&fs->snap, Tdlist, 0);
+		b = newblk(&fs->snap, Tdlist);
 		if(dl->ins != nil){
 			enqueue(dl->ins);
 			dropblk(dl->ins);
--- a/sys/src/cmd/gefs/tree.c
+++ b/sys/src/cmd/gefs/tree.c
@@ -482,7 +482,7 @@
 	 */
 	full = 0;
 	spc = Leafspc - blkfill(b);
-	n = newblk(t, b->type, 0);
+	n = newblk(t, b->type);
 	assert(i >= 0 && j >= 0);
 	while(i < b->nval || j < up->hi){
 		if(i >= b->nval)
@@ -573,7 +573,7 @@
 	Msg m, u;
 
 	b = p->b;
-	n = newblk(t, b->type, 0);
+	n = newblk(t, b->type);
 	for(i = 0; i < b->nval; i++){
 		if(pp != nil && i == p->midx){
 			copyup(n, pp, nil);
@@ -657,8 +657,8 @@
 		efreeblk(t, r);
 		nexterror();
 	}
-	l = newblk(t, b->type, 0);
-	r = newblk(t, b->type, 0);
+	l = newblk(t, b->type);
+	r = newblk(t, b->type);
 
 	d = l;
 	i = 0;
@@ -770,8 +770,8 @@
 		efreeblk(t, r);
 		nexterror();
 	}
-	l = newblk(t, b->type, 0);
-	r = newblk(t, b->type, 0);
+	l = newblk(t, b->type);
+	r = newblk(t, b->type);
 	d = l;
 	copied = 0;
 	halfsz = (2*b->nval + b->valsz)/2;
@@ -820,7 +820,7 @@
 	Msg m;
 	int i;
 
-	d = newblk(t, a->type, 0);
+	d = newblk(t, a->type);
 	for(i = 0; i < a->nval; i++){
 		getval(a, i, &m);
 		setval(d, &m);
@@ -904,8 +904,8 @@
 		efreeblk(t, r);
 		nexterror();
 	}
-	l = newblk(t, a->type, 0);
-	r = newblk(t, a->type, 0);
+	l = newblk(t, a->type);
+	r = newblk(t, a->type);
 	d = l;
 	cp = 0;
 	sp = -1;
@@ -1088,7 +1088,7 @@
 	}
 	if(pp->nl != nil && pp->nr != nil){
 		rp = &path[0];
-		rp->nl = newblk(t, Tpivot, 0);
+		rp->nl = newblk(t, Tpivot);
 		rp->npull = pp->npull;
 		rp->pullsz = pp->pullsz;
 		copyup(rp->nl, pp, nil);