code: plan9front

Download patch

ref: f91852c653e99e121bf152f6ab614e7bff9fbc97
parent: dd4b1abbe6b81e4055a40c9dcf6080804452535a
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Jun 7 10:41:15 EDT 2024

gefs: improve flow of blocks/frees

This change covers three improvements:
- inline the limbo entry into the objects
  being freed, meaning we don't need to
  allocate, and thus can't fail to free
  an object when we're out of memory
- Preallocate Bfree objects, for the same
  reason above.

--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -819,16 +819,17 @@
 }
 
 void
-limbo(Bfree *f)
+limbo(int op, Limbo *l)
 {
-	Bfree *p;
+	Limbo *p;
 	ulong ge;
 
+	l->op = op;
 	while(1){
 		ge = agetl(&fs->epoch);
 		p = agetp(&fs->limbo[ge]);
-		f->next = p;
-		if(acasp(&fs->limbo[ge], p, f)){
+		l->next = p;
+		if(acasp(&fs->limbo[ge], p, l)){
 			aincl(&fs->nlimbo, 1);
 			break;
 		}
@@ -836,8 +837,24 @@
 }
 
 void
-freeblk(Tree *t, Blk *b, Bptr bp)
+freeblk(Tree *t, Blk *b)
 {
+	if(t == &fs->snap || (t != nil && b->bp.gen < t->memgen)){
+		tracex("killb", b->bp, getcallerpc(&t), -1);
+		killblk(t, b->bp);
+		return;
+	}
+	b->freed = getcallerpc(&t);
+	tracex("freeb", b->bp, getcallerpc(&t), -1);
+	setflag(b, Blimbo);
+	holdblk(b);
+	assert(b->ref > 1);
+	limbo(DFblk, b);
+}
+
+void
+freebp(Tree *t, Bptr bp)
+{
 	Bfree *f;
 
 	if(t == &fs->snap || (t != nil && bp.gen < t->memgen)){
@@ -845,18 +862,17 @@
 		killblk(t, bp);
 		return;
 	}
-
 	tracex("freeb", bp, getcallerpc(&t), -1);
-	f = emalloc(sizeof(Bfree), 0);
-	f->op = DFblk;
+
+	qlock(&fs->bfreelk);
+	while(fs->bfree == nil)
+		rsleep(&fs->bfreerz);
+	f = fs->bfree;
+	fs->bfree = (Bfree*)f->next;
+	qunlock(&fs->bfreelk);
+
 	f->bp = bp;
-	f->b = nil;
-	if(b != nil){
-		setflag(b, Blimbo);
-		b->freed = getcallerpc(&t);
-		f->b = holdblk(b);
-	}
-	limbo(f);
+	limbo(DFbp, f);
 }
 
 void
@@ -889,7 +905,7 @@
 	for(i = 0; i < fs->nworker; i++){
 		e = agetl(&fs->lepoch[i]);
 		if((e & Eactive) && e != (ge | Eactive)){
-			if(delay < 100)
+			if(delay < 1000)
 				delay++;
 			else
 				fprint(2, "stalled epoch %lx [worker %d]\n", e, i);
@@ -903,7 +919,9 @@
 epochclean(void)
 {
 	ulong c, e, ge;
-	Bfree *p, *n;
+	Limbo *p, *n;
+	Blk *b;
+	Bfree *f;
 	Arena *a;
 	Qent qe;
 	int i;
@@ -926,28 +944,39 @@
 		n = p->next;
 		switch(p->op){
 		case DFtree:
-			free(p->t);
+			free(p);
 			break;
 		case DFmnt:
-			free(p->m);
+			free(p);
 			break;
+		case DFbp:
+			f = (Bfree*)p;
+			a = getarena(f->bp.addr);
+			qe.op = Qfree;
+			qe.bp = f->bp;
+			qe.b = nil;
+			qput(a->sync, qe);
+			qlock(&fs->bfreelk);
+			f->next = fs->bfree;
+			fs->bfree = f;
+			rwakeup(&fs->bfreerz);
+			qunlock(&fs->bfreelk);
+			break;
 		case DFblk:
-			a = getarena(p->bp.addr);
+			b = (Blk*)p;
+			a = getarena(b->bp.addr);
 			qe.op = Qfree;
-			qe.bp = p->bp;
+			qe.bp = b->bp;
 			qe.b = nil;
+			setflag(b, Bfreed);
+			clrflag(b, Blimbo);
 			qput(a->sync, qe);
-			if(p->b != nil){
-				clrflag(p->b, Blimbo);
-				setflag(p->b, Bfreed);
-				dropblk(p->b);
-			}
+			dropblk(b);
 			break;
 		default:
 			abort();
 		}
 		aincl(&fs->nlimbo, -1);
-		free(p);
 	}
 }
 
@@ -1007,6 +1036,8 @@
 		assert(fs->syncing > 0);
 	else
 		abort();
+	if(qe.b != nil)
+		assert(qe.b->ref > 0);
 	qlock(&q->lk);
 	qe.qgen = agetv(&fs->qgen);
 	while(q->nheap == q->heapsz)
@@ -1080,12 +1111,16 @@
 		switch(qe.op){
 		case Qfree:
 			tracex("qfreeb", qe.bp, qe.qgen, -1);
+			/*
+			 * we shouldn't have a block in a free op,
+			 * the frees go into the queue just to ensure
+			 * write/reuse ordering.
+			 */
+			assert(qe.b == nil);
 			a = getarena(qe.bp.addr);
 			qlock(a);
 			cachedel(qe.bp.addr);
 			blkdealloc_lk(a, qe.bp.addr);
-			if(qe.b != nil)
-				dropblk(qe.b);
 			qunlock(a);
 			break;
 		case Qfence:
--- a/sys/src/cmd/gefs/dat.h
+++ b/sys/src/cmd/gefs/dat.h
@@ -10,6 +10,7 @@
 typedef struct Kvp	Kvp;
 typedef struct Xdir	Xdir;
 typedef struct Bptr	Bptr;
+typedef struct Limbo	Limbo;
 typedef struct Bfree	Bfree;
 typedef struct Scan	Scan;
 typedef struct Dent	Dent;
@@ -325,6 +326,18 @@
 	AOrclose,
 };
 
+enum {
+	DFblk,
+	DFbp,
+	DFmnt,
+	DFtree,
+};
+
+struct Limbo {
+	Limbo	*next;
+	int	op;
+};
+
 struct Bptr {
 	vlong	addr;
 	uvlong	hash;
@@ -412,6 +425,8 @@
 };
 
 struct Tree {
+	Limbo;
+
 	/* in-memory */
 	Lock	lk;
 	long	memref;	/* number of in-memory references to this */
@@ -430,19 +445,9 @@
 	vlong	base;	/* base snapshot */
 };
 
-enum {
-	DFblk,
-	DFmnt,
-	DFtree,
-};
-
 struct Bfree {
-	Bfree	*next;
-	int	op;
-	Mount	*m;
-	Tree	*t;
-	Blk	*b;
-	Bptr	bp;
+	Limbo;
+	Bptr bp;
 };
 
 struct User {
@@ -516,6 +521,7 @@
 	QLock	synclk;
 	Rendez	syncrz;
 
+	QLock	mountlk;
 	Mount	*mounts;
 	Mount	*snapmnt;
 	Lock	connlk;
@@ -529,7 +535,7 @@
 	long	nworker;
 	long	epoch;
 	long	lepoch[32];
-	Bfree	*limbo[3];
+	Limbo	*limbo[3];
 	long	nlimbo;
 
 	Syncq	syncq[32];
@@ -563,6 +569,11 @@
 	usize	ccount;
 	usize	cmax;
 
+	/* preallocated deferred frees */
+	QLock	bfreelk;
+	Rendez	bfreerz;
+	Bfree	*bfree;
+
 	RWLock	flushq[Nflushtab];
 	int	flushop[Nflushtab];
 
@@ -622,6 +633,7 @@
 };
 
 struct Mount {
+	Limbo;
 	Lock;
 	Mount	*next;
 	long	ref;
@@ -710,6 +722,7 @@
 };
 
 struct Blk {
+	Limbo;
 	/* cache entry */
 	Blk	*cnext;
 	Blk	*cprev;
--- a/sys/src/cmd/gefs/fns.h
+++ b/sys/src/cmd/gefs/fns.h
@@ -60,8 +60,9 @@
 void	epochend(int);
 void	epochwait(void);
 void	epochclean(void);
-void	limbo(Bfree*);
-void	freeblk(Tree*, Blk*, Bptr);
+void	limbo(int op, Limbo*);
+void	freeblk(Tree*, Blk*);
+void	freebp(Tree*, Bptr);
 int	logbarrier(Arena *, vlong);
 void	dlappend(Dlist *dl, Bptr);
 void	killblk(Tree*, Bptr);
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -623,6 +623,7 @@
 		return fs->snapmnt;
 	}
 
+	qlock(&fs->mountlk);
 	for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
 		if(strcmp(name, mnt->name) == 0){
 			ainc(&mnt->ref);
@@ -633,6 +634,7 @@
 	if((mnt = mallocz(sizeof(*mnt), 1)) == nil)
 		error(Enomem);
 	if(waserror()){
+		qunlock(&fs->mountlk);
 		free(mnt);
 		nexterror();
 	}
@@ -648,6 +650,7 @@
 	poperror();
 
 Out:
+	qunlock(&fs->mountlk);
 	return mnt;
 }
 
@@ -655,21 +658,19 @@
 clunkmount(Mount *mnt)
 {
 	Mount *me, **p;
-	Bfree *f;
 
 	if(mnt == nil)
 		return;
 	if(adec(&mnt->ref) == 0){
+		qlock(&fs->mountlk);
 		for(p = &fs->mounts; (me = *p) != nil; p = &me->next){
 			if(me == mnt)
 				break;
 		}
 		assert(me != nil);
-		f = emalloc(sizeof(Bfree), 0);
-		f->op = DFmnt;
-		f->m = mnt;
 		*p = me->next;
-		limbo(f);
+		limbo(DFmnt, me);
+		qunlock(&fs->mountlk);
 	}
 }
 
@@ -2160,7 +2161,7 @@
 		if(waserror()){
 			if(!fs->rdonly)
 				for(j = 0; j < i; j++)
-					freeblk(t, nil, bp[j]);
+					freebp(t, bp[j]);
 			nexterror();
 		}
 		n = writeb(f, &kv[i], &bp[i], p, o, c, f->dent->length);
@@ -2400,7 +2401,7 @@
 		}
 	}
 	if(rb.gen > pred)
-		freeblk(nil, nil, rb);
+		freebp(nil, rb);
 	dropblk(b);
 }
 
@@ -2429,7 +2430,7 @@
 			break;
 		bp = unpackbp(s.kv.v, s.kv.nv);
 		if(bp.gen > t->pred)
-			freeblk(nil, nil, bp);
+			freebp(nil, bp);
 		qlock(&fs->mutlk);
 		epochclean();
 		qunlock(&fs->mutlk);
@@ -2503,7 +2504,7 @@
 					epochstart(id);
 					b = getblk(bp, 0);
 					nb = b->logp;
-					freeblk(nil, b, b->bp);
+					freeblk(nil, b);
 					dropblk(b);
 					epochend(id);
 					epochclean();
--- a/sys/src/cmd/gefs/main.c
+++ b/sys/src/cmd/gefs/main.c
@@ -28,6 +28,7 @@
 int	nogroupid	= 9999;
 int	admid		= -1;
 Blk	*blkbuf;
+Bfree	*bfbuf;
 Errctx	**errctx;
 
 void
@@ -156,6 +157,7 @@
 static void
 initfs(vlong cachesz)
 {
+	Bfree *f, *g;
 	Blk *b;
 
 	if((fs = mallocz(sizeof(Gefs), 1)) == nil)
@@ -167,6 +169,7 @@
 	}
 	fs->lrurz.l = &fs->lrulk;
 	fs->syncrz.l = &fs->synclk;
+	fs->bfreerz.l = &fs->bfreelk;
 	fs->noauth = noauth;
 	fs->cmax = cachesz/Blksz;
 	if(fs->cmax > (1<<30))
@@ -181,12 +184,23 @@
 	if((fs->dlcache = mallocz(fs->dlcmax*sizeof(Dlist*), 1)) == nil)
 		sysfatal("malloc: %r");
 
+	bfbuf = sbrk(fs->cmax * sizeof(Bfree));
+	if(bfbuf == (void*)-1)
+		sysfatal("sbrk: %r");
+
+	g = nil;
+	for(f = bfbuf; f != bfbuf+fs->cmax; f++){
+		f->bp = Zb;
+		f->next = g;
+		g = f;
+	}
+	fs->bfree = g;
+
 	blkbuf = sbrk(fs->cmax * sizeof(Blk));
 	if(blkbuf == (void*)-1)
 		sysfatal("sbrk: %r");
 	for(b = blkbuf; b != blkbuf+fs->cmax; b++){
-		b->bp.addr = -1;
-		b->bp.hash = -1;
+		b->bp = Zb;
 		b->magic = Magic;
 		lrutop(b);
 	}
--- a/sys/src/cmd/gefs/snap.c
+++ b/sys/src/cmd/gefs/snap.c
@@ -189,10 +189,11 @@
 		bp = b->logp;
 		qe.op = Qfree;
 		qe.bp = b->bp;
-		qe.b = b;
+		qe.b = nil;
 		a = getarena(qe.bp.addr);
 		qput(a->sync, qe);
 		traceb("dlfreeb", qe.bp);
+		dropblk(b);
 	}
 }
 
@@ -542,14 +543,9 @@
 void
 closesnap(Tree *t)
 {
-	Bfree *f;
-
 	if(t == nil || adec(&t->memref) != 0)
 		return;
-	f = malloc(sizeof(Bfree));
-	f->op = DFtree;
-	f->t = t;
-	limbo(f);
+	limbo(DFtree, t);
 }
 
 void
--- a/sys/src/cmd/gefs/tree.c
+++ b/sys/src/cmd/gefs/tree.c
@@ -29,7 +29,7 @@
 
 #define efreeblk(t, b) do { \
 	if(b != nil) \
-		freeblk(t, b, b->bp); \
+		freeblk(t, b); \
 	} while(0)
 
 static void
@@ -517,7 +517,7 @@
 			|| m.op == Oinsert
 			|| m.op == Odelete){
 				bp = unpackbp(v.v, v.nv);
-				freeblk(t, nil, bp);
+				freebp(t, bp);
 			}
 			ok = apply(&v, &m, buf, sizeof(buf));
 			goto Copyloop;
@@ -541,7 +541,7 @@
 				|| m.op == Oinsert
 				|| m.op == Odelete){
 					bp = unpackbp(v.v, v.nv);
-					freeblk(t, nil, bp);
+					freebp(t, bp);
 				}
 				p->pullsz += msgsz(&m);
 				ok = apply(&v, &m, buf, sizeof(buf));
@@ -701,7 +701,7 @@
 			|| m.op == Oinsert
 			|| m.op == Odelete){
 				bp = unpackbp(v.v, v.nv);
-				freeblk(t, nil, bp);
+				freebp(t, bp);
 			}
 			ok = apply(&v, &m, buf, sizeof(buf));
 			goto Copyloop;
@@ -725,7 +725,7 @@
 				|| m.op == Oinsert
 				|| m.op == Odelete){
 					bp = unpackbp(v.v, v.nv);
-					freeblk(t, nil, bp);
+					freebp(t, bp);
 				}
 				p->pullsz += msgsz(&m);
 				ok = apply(&v, &m, buf, sizeof(buf));
@@ -1105,9 +1105,9 @@
 
 	for(p = path; p != path + npath; p++){
 		if(p->b != nil)
-			freeblk(t, p->b, p->b->bp);
+			freeblk(t, p->b);
 		if(p->m != nil)
-			freeblk(t, p->b, p->m->bp);
+			freeblk(t, p->b);
 		dropblk(p->b);
 		dropblk(p->nl);
 		dropblk(p->nr);
@@ -1211,7 +1211,7 @@
 	t->dirty = 1;
 	unlock(&t->lk);
 
-	freeblk(t, b, b->bp);
+	freeblk(t, b);
 	dropblk(b);
 	dropblk(r);
 }