ref: f91852c653e99e121bf152f6ab614e7bff9fbc97
parent: dd4b1abbe6b81e4055a40c9dcf6080804452535a
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Jun 7 10:41:15 EDT 2024
gefs: improve flow of blocks/frees This change covers three improvements: - inline the limbo entry into the objects being freed, meaning we don't need to allocate, and thus can't fail to free an object when we're out of memory - Preallocate Bfree objects, for the same reason above.
--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -819,16 +819,17 @@
}
void
-limbo(Bfree *f)
+limbo(int op, Limbo *l)
{
- Bfree *p;
+ Limbo *p;
ulong ge;
+ l->op = op;
while(1){
ge = agetl(&fs->epoch);
p = agetp(&fs->limbo[ge]);
- f->next = p;
- if(acasp(&fs->limbo[ge], p, f)){
+ l->next = p;
+ if(acasp(&fs->limbo[ge], p, l)){
aincl(&fs->nlimbo, 1);
break;
}
@@ -836,8 +837,24 @@
}
void
-freeblk(Tree *t, Blk *b, Bptr bp)
+freeblk(Tree *t, Blk *b)
{
+ if(t == &fs->snap || (t != nil && b->bp.gen < t->memgen)){
+ tracex("killb", b->bp, getcallerpc(&t), -1);
+ killblk(t, b->bp);
+ return;
+ }
+ b->freed = getcallerpc(&t);
+ tracex("freeb", b->bp, getcallerpc(&t), -1);
+ setflag(b, Blimbo);
+ holdblk(b);
+ assert(b->ref > 1);
+ limbo(DFblk, b);
+}
+
+void
+freebp(Tree *t, Bptr bp)
+{
Bfree *f;
if(t == &fs->snap || (t != nil && bp.gen < t->memgen)){
@@ -845,18 +862,17 @@
killblk(t, bp);
return;
}
-
tracex("freeb", bp, getcallerpc(&t), -1);
- f = emalloc(sizeof(Bfree), 0);
- f->op = DFblk;
+
+ qlock(&fs->bfreelk);
+ while(fs->bfree == nil)
+ rsleep(&fs->bfreerz);
+ f = fs->bfree;
+ fs->bfree = (Bfree*)f->next;
+ qunlock(&fs->bfreelk);
+
f->bp = bp;
- f->b = nil;
- if(b != nil){
- setflag(b, Blimbo);
- b->freed = getcallerpc(&t);
- f->b = holdblk(b);
- }
- limbo(f);
+ limbo(DFbp, f);
}
void
@@ -889,7 +905,7 @@
for(i = 0; i < fs->nworker; i++){
e = agetl(&fs->lepoch[i]);
if((e & Eactive) && e != (ge | Eactive)){
- if(delay < 100)
+ if(delay < 1000)
delay++;
else
fprint(2, "stalled epoch %lx [worker %d]\n", e, i);
@@ -903,7 +919,9 @@
epochclean(void)
{
ulong c, e, ge;
- Bfree *p, *n;
+ Limbo *p, *n;
+ Blk *b;
+ Bfree *f;
Arena *a;
Qent qe;
int i;
@@ -926,28 +944,39 @@
n = p->next;
switch(p->op){
case DFtree:
- free(p->t);
+ free(p);
break;
case DFmnt:
- free(p->m);
+ free(p);
break;
+ case DFbp:
+ f = (Bfree*)p;
+ a = getarena(f->bp.addr);
+ qe.op = Qfree;
+ qe.bp = f->bp;
+ qe.b = nil;
+ qput(a->sync, qe);
+ qlock(&fs->bfreelk);
+ f->next = fs->bfree;
+ fs->bfree = f;
+ rwakeup(&fs->bfreerz);
+ qunlock(&fs->bfreelk);
+ break;
case DFblk:
- a = getarena(p->bp.addr);
+ b = (Blk*)p;
+ a = getarena(b->bp.addr);
qe.op = Qfree;
- qe.bp = p->bp;
+ qe.bp = b->bp;
qe.b = nil;
+ setflag(b, Bfreed);
+ clrflag(b, Blimbo);
qput(a->sync, qe);
- if(p->b != nil){
- clrflag(p->b, Blimbo);
- setflag(p->b, Bfreed);
- dropblk(p->b);
- }
+ dropblk(b);
break;
default:
abort();
}
aincl(&fs->nlimbo, -1);
- free(p);
}
}
@@ -1007,6 +1036,8 @@
assert(fs->syncing > 0);
else
abort();
+ if(qe.b != nil)
+ assert(qe.b->ref > 0);
qlock(&q->lk);
qe.qgen = agetv(&fs->qgen);
while(q->nheap == q->heapsz)
@@ -1080,12 +1111,16 @@
switch(qe.op){
case Qfree:
tracex("qfreeb", qe.bp, qe.qgen, -1);
+ /*
+ * we shouldn't have a block in a free op,
+ * the frees go into the queue just to ensure
+ * write/reuse ordering.
+ */
+ assert(qe.b == nil);
a = getarena(qe.bp.addr);
qlock(a);
cachedel(qe.bp.addr);
blkdealloc_lk(a, qe.bp.addr);
- if(qe.b != nil)
- dropblk(qe.b);
qunlock(a);
break;
case Qfence:
--- a/sys/src/cmd/gefs/dat.h
+++ b/sys/src/cmd/gefs/dat.h
@@ -10,6 +10,7 @@
typedef struct Kvp Kvp;
typedef struct Xdir Xdir;
typedef struct Bptr Bptr;
+typedef struct Limbo Limbo;
typedef struct Bfree Bfree;
typedef struct Scan Scan;
typedef struct Dent Dent;
@@ -325,6 +326,18 @@
AOrclose,
};
+enum {
+ DFblk,
+ DFbp,
+ DFmnt,
+ DFtree,
+};
+
+struct Limbo {
+ Limbo *next;
+ int op;
+};
+
struct Bptr {
vlong addr;
uvlong hash;
@@ -412,6 +425,8 @@
};
struct Tree {
+ Limbo;
+
/* in-memory */
Lock lk;
long memref; /* number of in-memory references to this */
@@ -430,19 +445,9 @@
vlong base; /* base snapshot */
};
-enum {
- DFblk,
- DFmnt,
- DFtree,
-};
-
struct Bfree {
- Bfree *next;
- int op;
- Mount *m;
- Tree *t;
- Blk *b;
- Bptr bp;
+ Limbo;
+ Bptr bp;
};
struct User {
@@ -516,6 +521,7 @@
QLock synclk;
Rendez syncrz;
+ QLock mountlk;
Mount *mounts;
Mount *snapmnt;
Lock connlk;
@@ -529,7 +535,7 @@
long nworker;
long epoch;
long lepoch[32];
- Bfree *limbo[3];
+ Limbo *limbo[3];
long nlimbo;
Syncq syncq[32];
@@ -563,6 +569,11 @@
usize ccount;
usize cmax;
+ /* preallocated deferred frees */
+ QLock bfreelk;
+ Rendez bfreerz;
+ Bfree *bfree;
+
RWLock flushq[Nflushtab];
int flushop[Nflushtab];
@@ -622,6 +633,7 @@
};
struct Mount {
+ Limbo;
Lock;
Mount *next;
long ref;
@@ -710,6 +722,7 @@
};
struct Blk {
+ Limbo;
/* cache entry */
Blk *cnext;
Blk *cprev;
--- a/sys/src/cmd/gefs/fns.h
+++ b/sys/src/cmd/gefs/fns.h
@@ -60,8 +60,9 @@
void epochend(int);
void epochwait(void);
void epochclean(void);
-void limbo(Bfree*);
-void freeblk(Tree*, Blk*, Bptr);
+void limbo(int op, Limbo*);
+void freeblk(Tree*, Blk*);
+void freebp(Tree*, Bptr);
int logbarrier(Arena *, vlong);
void dlappend(Dlist *dl, Bptr);
void killblk(Tree*, Bptr);
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -623,6 +623,7 @@
return fs->snapmnt;
}
+ qlock(&fs->mountlk);
for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
if(strcmp(name, mnt->name) == 0){
ainc(&mnt->ref);
@@ -633,6 +634,7 @@
if((mnt = mallocz(sizeof(*mnt), 1)) == nil)
error(Enomem);
if(waserror()){
+ qunlock(&fs->mountlk);
free(mnt);
nexterror();
}
@@ -648,6 +650,7 @@
poperror();
Out:
+ qunlock(&fs->mountlk);
return mnt;
}
@@ -655,21 +658,19 @@
clunkmount(Mount *mnt)
{
Mount *me, **p;
- Bfree *f;
if(mnt == nil)
return;
if(adec(&mnt->ref) == 0){
+ qlock(&fs->mountlk);
for(p = &fs->mounts; (me = *p) != nil; p = &me->next){
if(me == mnt)
break;
}
assert(me != nil);
- f = emalloc(sizeof(Bfree), 0);
- f->op = DFmnt;
- f->m = mnt;
*p = me->next;
- limbo(f);
+ limbo(DFmnt, me);
+ qunlock(&fs->mountlk);
}
}
@@ -2160,7 +2161,7 @@
if(waserror()){
if(!fs->rdonly)
for(j = 0; j < i; j++)
- freeblk(t, nil, bp[j]);
+ freebp(t, bp[j]);
nexterror();
}
n = writeb(f, &kv[i], &bp[i], p, o, c, f->dent->length);
@@ -2400,7 +2401,7 @@
}
}
if(rb.gen > pred)
- freeblk(nil, nil, rb);
+ freebp(nil, rb);
dropblk(b);
}
@@ -2429,7 +2430,7 @@
break;
bp = unpackbp(s.kv.v, s.kv.nv);
if(bp.gen > t->pred)
- freeblk(nil, nil, bp);
+ freebp(nil, bp);
qlock(&fs->mutlk);
epochclean();
qunlock(&fs->mutlk);
@@ -2503,7 +2504,7 @@
epochstart(id);
b = getblk(bp, 0);
nb = b->logp;
- freeblk(nil, b, b->bp);
+ freeblk(nil, b);
dropblk(b);
epochend(id);
epochclean();
--- a/sys/src/cmd/gefs/main.c
+++ b/sys/src/cmd/gefs/main.c
@@ -28,6 +28,7 @@
int nogroupid = 9999;
int admid = -1;
Blk *blkbuf;
+Bfree *bfbuf;
Errctx **errctx;
void
@@ -156,6 +157,7 @@
static void
initfs(vlong cachesz)
{
+ Bfree *f, *g;
Blk *b;
if((fs = mallocz(sizeof(Gefs), 1)) == nil)
@@ -167,6 +169,7 @@
}
fs->lrurz.l = &fs->lrulk;
fs->syncrz.l = &fs->synclk;
+ fs->bfreerz.l = &fs->bfreelk;
fs->noauth = noauth;
fs->cmax = cachesz/Blksz;
if(fs->cmax > (1<<30))
@@ -181,12 +184,23 @@
if((fs->dlcache = mallocz(fs->dlcmax*sizeof(Dlist*), 1)) == nil)
sysfatal("malloc: %r");
+ bfbuf = sbrk(fs->cmax * sizeof(Bfree));
+ if(bfbuf == (void*)-1)
+ sysfatal("sbrk: %r");
+
+ g = nil;
+ for(f = bfbuf; f != bfbuf+fs->cmax; f++){
+ f->bp = Zb;
+ f->next = g;
+ g = f;
+ }
+ fs->bfree = g;
+
blkbuf = sbrk(fs->cmax * sizeof(Blk));
if(blkbuf == (void*)-1)
sysfatal("sbrk: %r");
for(b = blkbuf; b != blkbuf+fs->cmax; b++){
- b->bp.addr = -1;
- b->bp.hash = -1;
+ b->bp = Zb;
b->magic = Magic;
lrutop(b);
}
--- a/sys/src/cmd/gefs/snap.c
+++ b/sys/src/cmd/gefs/snap.c
@@ -189,10 +189,11 @@
bp = b->logp;
qe.op = Qfree;
qe.bp = b->bp;
- qe.b = b;
+ qe.b = nil;
a = getarena(qe.bp.addr);
qput(a->sync, qe);
traceb("dlfreeb", qe.bp);
+ dropblk(b);
}
}
@@ -542,14 +543,9 @@
void
closesnap(Tree *t)
{
- Bfree *f;
-
if(t == nil || adec(&t->memref) != 0)
return;
- f = malloc(sizeof(Bfree));
- f->op = DFtree;
- f->t = t;
- limbo(f);
+ limbo(DFtree, t);
}
void
--- a/sys/src/cmd/gefs/tree.c
+++ b/sys/src/cmd/gefs/tree.c
@@ -29,7 +29,7 @@
#define efreeblk(t, b) do { \
if(b != nil) \
- freeblk(t, b, b->bp); \
+ freeblk(t, b); \
} while(0)
static void
@@ -517,7 +517,7 @@
|| m.op == Oinsert
|| m.op == Odelete){
bp = unpackbp(v.v, v.nv);
- freeblk(t, nil, bp);
+ freebp(t, bp);
}
ok = apply(&v, &m, buf, sizeof(buf));
goto Copyloop;
@@ -541,7 +541,7 @@
|| m.op == Oinsert
|| m.op == Odelete){
bp = unpackbp(v.v, v.nv);
- freeblk(t, nil, bp);
+ freebp(t, bp);
}
p->pullsz += msgsz(&m);
ok = apply(&v, &m, buf, sizeof(buf));
@@ -701,7 +701,7 @@
|| m.op == Oinsert
|| m.op == Odelete){
bp = unpackbp(v.v, v.nv);
- freeblk(t, nil, bp);
+ freebp(t, bp);
}
ok = apply(&v, &m, buf, sizeof(buf));
goto Copyloop;
@@ -725,7 +725,7 @@
|| m.op == Oinsert
|| m.op == Odelete){
bp = unpackbp(v.v, v.nv);
- freeblk(t, nil, bp);
+ freebp(t, bp);
}
p->pullsz += msgsz(&m);
ok = apply(&v, &m, buf, sizeof(buf));
@@ -1105,9 +1105,9 @@
for(p = path; p != path + npath; p++){
if(p->b != nil)
- freeblk(t, p->b, p->b->bp);
+ freeblk(t, p->b);
if(p->m != nil)
- freeblk(t, p->b, p->m->bp);
+ freeblk(t, p->b);
dropblk(p->b);
dropblk(p->nl);
dropblk(p->nr);
@@ -1211,7 +1211,7 @@
t->dirty = 1;
unlock(&t->lk);
- freeblk(t, b, b->bp);
+ freeblk(t, b);
dropblk(b);
dropblk(r);
}