code: mafs

ref: 7038b412045ad003366804789e7fecaf0552f77c
dir: /9p.c/

View raw version
#include "all.h"

Tlock	*tlockhead = nil, *tlocktail = nil;
QLock	tlock;
s8 err[ERRMAX];

extern u32 mpsrvpid;
extern u8 noauth;
static u64 lastflushtime = 0;
static RWLock flushlck;

Aux*
newaux(u64 dblkno, u16 uid, u64 pdblkno, u64 pqpath, u64 preli)
{
	Aux *a;

	a = emalloc9p(sizeof(Aux));
	a->dblkno = dblkno;
	a->uid = uid;
	a->pdblkno = pdblkno;
	a->pqpath = pqpath;
	a->preli = preli;
	a->ctlmsg = nil;
	a->nctlmsg = 0;
	return a;
}

void
freeaux(Aux *a)
{
	if(a == nil)
		return;
	if(a->ctlmsg != nil)
		free(a->ctlmsg);
	free(a);
}

static void
fsauth(Req *req)
{
	if(noauth)
		respond(req, errstring[Eauthmsg]);
	else
		auth9p(req);
}

static void
fsattach(Req *req)
{
	short uid;
	Iobuf *dbuf;

	if(noauth == 0 && authattach(req) < 0)
		return;

	if(waserror()){
		responderror(req);
		return;
	}
	if((uid = lookupid(req->ifcall.uname)) <= 0)
		error(errstring[Enouser]);

	dbuf = egetmetachk(Bdroot, Breadonly, Tdentry, Qproot);
	if(waserror()){
		putbuf(dbuf, 0);
		nexterror();
	}

	req->fid->aux = newaux(Bdroot, uid, 0, 0, 0);
	req->fid->qid = (Qid){Qproot, dbuf->d->version, QTDIR};
	poperror();
	putbuf(dbuf, 0);
	req->ofcall.qid = req->fid->qid;
	poperror();
	respond(req, nil);
}

static char*
fswalk1(Fid *fid, char *name, void*)
{
	Aux *aux;
	Dentry *d, *chd, *p;
	Iobuf *dbuf, *cbuf, *pbuf;
	u64 chreli, blkno;
	s8 v;
	Qid qid;

	if(shuttingdown)
		return nil;
	if((fid->qid.type&QTDIR) == 0)
		return errstring[Enotdir];

	if(waserror()){
		rerrstr(err, ERRMAX);
		return err;
	}

	aux = fid->aux;
	if(strcmp(name, "..") == 0){
		switch(fid->qid.path){
		case Qproot:
			return nil;
		default:
			if(chatty9p > 1)
				dprint("fswalk1 .. fid->qid.path %llud aux->dblkno %llud aux->pdblkno %llud\n",
						fid->qid.path, aux->dblkno, aux->pdblkno);
			pbuf = egetmetachk(aux->pdblkno, Breadonly, Tdentry, aux->pqpath);

			p = pbuf->d;
			qid = (Qid){p->path, p->version, (p->mode&DMDIR) ? QTDIR : QTFILE};
			if(aux != nil){
				aux->dblkno = pbuf->blkno; /* preli in aux will be wrong, issue? */
				aux->pdblkno = p->pdblkno;
				aux->pqpath = p->pqpath;
				aux->preli = p->preli;
			}
			putbuf(pbuf, 0);
			fid->qid = qid;
			poperror();
			return nil;
		}
	}

	/* assuming that it will ever be here only for directories */
		/* some directory, find the child with name or idx */
	dbuf = egetmetachk(aux->dblkno, Breadonly, Tdentry, fid->qid.path);
	if(waserror()){
		putbuf(dbuf, 0);
		nexterror();
	}
	d = dbuf->d;

	if(canaccess(aux->uid, d, DMEXEC) == 0)
		error(errstring[Eperm]);

	v = searchnames(d, name, &chreli);
	if(v < 0)
		error(errstring[Einvread]);
	if(v == 0)
		error(errstring[Enotfound]);

	if((blkno = rel2abs(d, chreli)) == 0)
		error(errstring[Ephase]);

	cbuf = egetmeta(blkno, Breadonly, Bused);
	if(waserror()){
		putbuf(cbuf, 0);
		nexterror();
	}
	chd = cbuf->d;
	checktag(cbuf, 1, Tdentry, chd->qpath);
	if((chd->mode&DMDIR) && canaccess(aux->uid, chd, DMEXEC) == 0)
		error(errstring[Eperm]);

	qid = (Qid){chd->path, chd->version, (chd->mode&DMDIR) ? QTDIR : QTFILE};
	if(aux != nil){
		aux->dblkno = blkno;
		aux->pdblkno = chd->pdblkno;/* or, aux->dblkno */
		aux->pqpath = chd->pqpath;	/* or, d->pqpath */
		aux->preli = chreli;		/* or, chd->preli */
	}
	poperror();
	putbuf(cbuf, 0);
	poperror();
	putbuf(dbuf, 0);
	fid->qid = qid;
	poperror();
	return nil;
}

static char*
fsclone(Fid *oldfid, Fid *newfid, void*)
{
	Aux *o;

	if(shuttingdown)
		return nil;
	o = oldfid->aux;
	if(o == nil)
		return "bad fid";
	newfid->aux = newaux(o->dblkno, o->uid, o->pdblkno, o->pqpath, o->preli);
	return nil;
}

static void
fswalk(Req *r)
{
	walkandclone(r, fswalk1, fsclone, nil);
}

static void
fsdestroyfid(Fid *fid)
{
	Tlock *t;
	Iobuf *dbuf;

	if((fid->qid.type & QTAUTH) != 0){
		authdestroy(fid);
		return;
	}
	if(fid->aux == nil)
		return;
	if(((Aux*)fid->aux)->tlocked == 1){
		qlock(&tlock);
		if(tlocktail == nil){
			qunlock(&tlock);
			panic("locked but tlock queue is empty\n");
			return;
		}else{
			for(t = tlockhead; t != nil; t = t->next){
				if(t->dblkno == ((Aux*)fid->aux)->dblkno &&
					t->qpath == fid->qid.path){
					if(t->prev != nil)
						t->prev->next = t->next;
					if(t->next != nil)
						t->next->prev = t->prev;
					if(tlocktail == t)
						tlocktail = t->prev;
					if(tlockhead == t)
						tlockhead = t->next;
					((Aux*)fid->aux)->tlocked = 0;
					free(t);
					break;
				}
			}
		}
		qunlock(&tlock);
	}

	/* allocate on close */
	dbuf = getmeta(((Aux*)fid->aux)->dblkno, Bwritable, Bused);
	if(dbuf != nil){
		if(dbuf->xiobuf != nil &&
			dbuf->xiobuf[0] == Tdentry &&
			dbuf->d->path != Qpnone &&
			dbuf->append != nil)
			flush(dbuf);
		else
			putbuf(dbuf, 0);
	}

	freeaux(fid->aux);
}

static void
fsmkdir(Dentry *d, Dir *dir, char *buf, u64 appendsize, s8 *name, u16 namelen)
{
	memset(dir, 0, sizeof(*dir));
	dir->qid = (Qid){d->path, d->version, (d->mode&DMDIR)? QTDIR : QTFILE};
	dir->mode = (d->mode & 0777) | (dir->qid.type << 24);
	dir->atime = time(nil);
	dir->mtime = d->mtime/Nsec; /* ns to seconds */
	dir->length = d->size+appendsize;
	if(dir->qid.type & QTDIR)
		dir->length = 0;
	if(buf == nil){
		dir->name = emalloc9p((long)namelen+1);	/* TODO is this a leak? */
		strncpy(dir->name, name, (long)namelen);
		dir->uid = username(d->uid, dir->uid);
		dir->gid = username(d->gid, dir->gid);
		dir->muid = username(d->muid, dir->muid);
	}else{
		memset(buf, 0,  (long)namelen+1 + 3 * (Userlen+1));
		strncpy(buf, name, (long)namelen);
		dir->name = buf;
		dir->uid = username(d->uid, buf + namelen+1);
		dir->gid = username(d->gid, buf + namelen+1 + Userlen+1);
		dir->muid = username(d->muid, buf + namelen+1 + 2 * (Userlen+1));
	}
}

static void
fsstat(Req *req)
{
	Dentry *d, *pd;
	Iobuf *dbuf, *pdbuf;
	Aux *aux;
	s8 *name;
	u16 namelen;

	name = nil;
	pdbuf = dbuf = nil;
	pd = nil;
	if(waserror()){
		responderror(req);
		return;
	}
	aux = (Aux*)req->fid->aux;
	if(req->fid->qid.path != Qproot){
		pdbuf = egetmetachk(aux->pdblkno, Breadonly, Tdentry, aux->pqpath);
		if(waserror()){
			putbuf(dbuf, 0);
			nexterror();
		}
		pd = pdbuf->d;
	}

	dbuf = egetmetachk(aux->dblkno, Breadonly,
						Tdentry, req->fid->qid.path);
	if(waserror()){
		putbuf(dbuf, 0);
		nexterror();
	}
	d = dbuf->d;

	/* nothing to do for already zero'ed out slots */
	if(d->path == Qpnone){
		error(errstring[Ephase]);
/*	}else if(d->flags&Dsys){
		error(errstring[Esys]); */
	}else{
		if(req->fid->qid.path == Qproot){
			name = emalloc9p(2);
			name[0] = '/';
			name[1] = '\0';
			namelen = (u16)1;
		}else{
			readname(pd, d->preli, &name, &namelen);
		}
		fsmkdir(d, &req->d, nil, dbuf->appendsize, name, namelen);
	}

	if(name)
		free(name);
	poperror();
	putbuf(dbuf, 0);
	if(pdbuf){
		poperror();
		putbuf(pdbuf, 0);
	}
	poperror();
	respond(req, nil);
}

static void
fsread(Req *req)
{
	s32 n;
	Iobuf *dbuf, *cbuf;
	Dentry *d, *ch;
	s8 *sbuf, *name;
	Dir dir;
	Aux *aux;
	u64 blkno;
	u16 namelen;

	if(waserror()){
		responderror(req);
		return;
	}

	if(shuttingdown)
		error(errstring[Eshutdown]);

	if(req->fid->qid.type == QTAUTH){
		poperror();
		authread(req);
		return;
	}else if(req->fid->qid.path == Qpctl){
		poperror();
		ctlread(req);
		return;
	}
	aux = (Aux*)req->fid->aux;
	if((req->fid->qid.type & QTDIR) != 0){
		/* using this loop to skip over zero'ed out blocks or system files */

		dbuf = egetmetachk(aux->dblkno, Breadonly, Tdentry, req->fid->qid.path);
		if(waserror()){
			putbuf(dbuf, 0);
			nexterror();
		}
		d = dbuf->d;

		if(canaccess(aux->uid, d, DMEXEC) == 0)
			error(errstring[Eaccess]);

		cbuf = nil;
		if(aux->dri < Nsys)
			aux->dri = Nsys;
		do{
			if(cbuf){
				poperror();
				putbuf(cbuf, 0);
			}
			if((blkno = rel2abs(d, aux->dri++)) == 0){
				poperror();
				putbuf(dbuf, 0);
				req->ofcall.offset = req->ifcall.offset;
				req->ofcall.count = 0;
				aux->dri = 0;
				poperror();
				respond(req, nil);
				return;
			}

			cbuf = egetmeta(blkno, Breadonly, Bused);
			if(waserror()){
				putbuf(cbuf, 0);
				nexterror();
			}
			ch = cbuf->d;
			checktag(cbuf, 1, Tdentry, ch->path);

		}while(ch->flags&Fsys || ch->tag == Tdentry && ch->path == Qpnone);

		readname(d, ch->preli, &name, &namelen);
		sbuf = emalloc9p(namelen + 1+ 3*(Userlen+1));
		fsmkdir(ch, &dir, sbuf, cbuf->appendsize, name, namelen);
		req->ofcall.count = n = convD2M(&dir, (u8*)req->ofcall.data, req->ifcall.count);
		free(sbuf);
		req->ofcall.offset = req->ifcall.offset+n;
		if(n == 0)
			aux->dri = 0;
		if(cbuf){
			poperror();
			putbuf(cbuf, 0);
		}
		if(dbuf){
			poperror();
			putbuf(dbuf, 0);
		}
		poperror();
		respond(req, nil);
		return;
	}

	n = readfile(aux->dblkno, req->fid->qid.path,
				 req->ofcall.data, req->ifcall.count,
				 req->ifcall.offset);
	req->ofcall.count = n;
	req->ofcall.offset = req->ifcall.offset+n;
	poperror();
	respond(req, nil);
}

int
emptystr(char *s)
{
	if(s == nil)
		return 1;
	if(s[0] == '\0')
		return 1;
	return 0;
}

static void
fswstat(Req *req)
{
	Dentry *d, *pd;
	Iobuf *dbuf, *pdbuf;
	s16 gid;
	Aux *aux;
	u8 namechange;

	if(waserror()){
		responderror(req);
		return;
	}

	if(shuttingdown)
		error(errstring[Eshutdown]);
	if(readonly)
		error(errstring[Eronly]);
	if(req->fid->qid.path == Qpctl)
		error(errstring[Einval]);
	if((req->d.type & QTDIR) > 0 && req->d.length != ~0)
		error(errstring[Einval]);

	aux = (Aux*)req->fid->aux;
	if(aux->uid == None)
		error(errstring[Eperm]);

	/* TODO allow changing the length as per stat(5) */
	if(req->d.atime != ~0 ||  req->d.length  != ~0)
		error(errstring[Eperm]);

	/* stat(5) says that this is illegal */
	if(emptystr(req->d.uid) == 0)
		error(errstring[Einval]);

	/* TODO cwfs/9p2.c, hjfs/fs2.c and kfs64.b do more here. Get that stuff in. */
	pdbuf = nil; pd = nil;
	if(emptystr(req->d.name) == 0){
		namechange = 1;
		pdbuf = egetmetachk(aux->pdblkno, Bwritable,
							Tdentry, aux->pqpath);
		if(waserror()){
			putbuf(pdbuf, 0);
			nexterror();
		}
		pd = pdbuf->d;
	}else
		namechange = 0;

	dbuf = egetmetachk(aux->dblkno, Bwritable,
						Tdentry, req->fid->qid.path);
	if(waserror()){
		putbuf(dbuf, 0);
		nexterror();
	}
	d = dbuf->d;
	if(canaccess(aux->uid, d, DMWRITE) == 0)
		error(errstring[Eaccess]);

	/* invalid to change the directory bit stat(5) */
	if(req->d.mode != ~0 &&
			(req->d.mode&DMDIR) != (d->mode&DMDIR))
		error(errstring[Einval]);
	if(d->uid == aux->uid ||
		leadgroup(aux->uid, d->gid) == 1){
		if(namechange){
			d->muid = aux->uid;
			updatename(pd, aux->uid, aux->preli, req->d.name); /* TODO check status */
			d->mtime = nsec();
		}
		if(req->d.mtime != ~0){
			d->mtime = req->d.mtime*Nsec;
			d->muid = aux->uid;
		}
		if(req->d.mode != ~0 &&
			(req->d.mode&DMDIR) == (d->mode&DMDIR) &&
			req->d.mode != d->mode)
			d->mode = req->d.mode;

		/* TODO gid setting needs more fine tuning to align with stat(5) */
		if(emptystr(req->d.gid) == 0 &&
			(gid = lookupid(req->d.gid)) != 0){
			d->gid = gid;
		}
	}

	poperror();
	putbuf(dbuf, 1);
	if(pdbuf){
		poperror();
		putbuf(pdbuf, 0);	/* no changes to the parent directory entry per-se */
	}
	poperror();
	respond(req, nil);
	return;	
}

static void
fswrite(Req *req)
{
	s32 rv;
	Aux *aux;

	if(waserror()){
		responderror(req);
		return;
	}

	if(shuttingdown)
		error(errstring[Eshutdown]);
	if(readonly)
		error(errstring[Eronly]);
	if(req->fid->qid.type == QTAUTH){
		poperror();
		authwrite(req);
		return;
	}else if(req->fid->qid.path == Qpctl){
		poperror();
		ctlwrite(req);
		return;
	}else if(req->fid->qid.path < Qproot)
		error(errstring[Eaccess]);

	if((req->fid->qid.type & QTDIR) != 0)
		error(errstring[Einval]);

	aux = (Aux*)req->fid->aux;
	rv = writefile(aux->dblkno, req->fid->qid.path,
					aux->uid,
					req->ifcall.data, req->ifcall.count,
					req->ifcall.offset);
	if(rv == -3)
		error(errstring[Efull]);
	else if(rv == -1 || rv == -2)
		error(errstring[Ephase]);
	else{
		req->ofcall.count = rv;
		req->ofcall.offset = req->ifcall.offset+rv;
		poperror();
		respond(req, nil);
	}
	if(shuttingdown == 0 &&
		mpsrvpid == 0 &&
		lastflushtime < nsec()-Nrefresh){
		fsflush();
	}
}

static void
fsremove(Req *req)
{
	Fid *fid;
	Aux *aux;
	Iobuf *pdbuf;
	Dentry *pd;

	if(waserror()){
		responderror(req);
		return;
	}

	if(shuttingdown)
		error(errstring[Eshutdown]);
	if(readonly)
		error(errstring[Eronly]);

	fid = req->fid;
	aux = fid->aux;
	if(aux == nil ||
		aux->uid == None ||
		fid->qid.path < Qproot ||
		aux->dblkno == 0){
		error(errstring[Eperm]);
	}

	pdbuf = egetmetachk(aux->pdblkno, Bwritable, Tdentry, aux->pqpath);
	if(waserror()){
		putbuf(pdbuf, 0);
		nexterror();
	}
	pd = pdbuf->d;
	rmname(pd, aux->uid, aux->preli);

	if(fid->qid.type & QTDIR)
		rmdirectory(fid->qid.path, aux->dblkno);
	else
		rmfile(fid->qid.path, aux->dblkno);
	poperror();
	putbuf(pdbuf, 0);
	poperror();
	respond(req, nil);
}

/*
 * error(Eperm) if open permission not granted for up->newr.
 */
int
permcheck(u16 fileuid, u16 filegid, u16 uid, ulong perm, int omode)
{
	ulong t;
	static int access[] = { 0400, 0200, 0600, 0100 };

	if(uid == fileuid)
		perm <<= 0;
	else if(ingroup(uid, filegid, 0) == 0)
		perm <<= 3;
	else
		perm <<= 6;

	t = access[omode&3];
	if((t&perm) != t)
		return 0;
	return 1;
}

/* read the Req.ifcall's perm, name and mode
	and build the Fid.omode */
static void
fscreate(Req *req)
{
	Iobuf *dbuf, *cbuf, *iobufs[3];
	u64 reli, blkno;
	Dentry *dchild, *dparent;
	u64 qpath;	/* zero'ed dentry that can be reused */
	Aux *aux;
	u32 perm;
	Fid *fid;
	Tlock *t;
	Qid qid;

	if(waserror()){
		responderror(req);
		return;
	}

	if(shuttingdown)
		error(errstring[Eshutdown]);

	fid = req->fid;
	aux = fid->aux;
	if(chatty9p >2)
		dprint("fscreate aux 0x%p aux->uid %d fid->qid.path %d aux->dblkno %llud\n",
				aux, aux->uid, fid->qid.path, aux->dblkno);
	if(aux == nil ||
		aux->uid == None ||
		fid->qid.path < Qproot ||
		aux->dblkno == 0)
		error(errstring[Eaccess]);

	if((fid->qid.type & QTDIR) == 0)
		error(errstring[Enotdir]);

	if(req->ifcall.name == nil ||
		strlen(req->ifcall.name) == 0 ||
		req->ifcall.name[0] == '/' ||
		checkname9p2(req->ifcall.name) == 0)
		error(errstring[Ebadname]);

	if(readonly)
		error(errstring[Eronly]);

	perm = req->ifcall.perm;
	dbuf = egetmetachk(aux->dblkno, Bwritable, Tdentry, fid->qid.path);
	if(waserror()){
		putbuf(dbuf, 0);
		nexterror();
	}
	dparent = dbuf->d;
	if(canaccess(aux->uid, dparent, DMWRITE) == 0)
		error(errstring[Eperm]);

	fid->omode = OREAD;
	switch(req->ifcall.mode & 7) {
		case OREAD:
		case OEXEC:
			fid->omode = OREAD;
			break;
		case OWRITE:
			fid->omode = OWRITE;
			break;
		case ORDWR:	
			fid->omode = OREAD+OWRITE;
			break;
		default:
			error(errstring[Emode]);
	}
	if(searchnames(dparent, req->ifcall.name, &reli)){
		/* found with the same name */
		blkno = rel2abs(dparent, reli);	

		cbuf = egetmeta(blkno, Breadonly, Bused);
		if(waserror()){
			putbuf(cbuf, 0);
			nexterror();
		}
		dchild = cbuf->d;

		if(dchild->qpath == Qpnone || dchild->tag != Tdentry)
			error(errstring[Ephase]);

		if(canaccess(aux->uid, dchild, DMWRITE) == 0)
			error(errstring[Eaccess]);

		if((req->ifcall.mode&DMDIR) != (dchild->mode&DMDIR)){
			if(dchild->mode&DMDIR)
				error(errstring[Edexist]);
			else
				error(errstring[Eexist]);
		}

		/* req->ifcall.name matched, truncate file and use it */
		if(permcheck(dchild->uid, dchild->gid, aux->uid, perm, ORDWR) == 0)
			error(errstring[Eperm]);
		dchild->muid = aux->uid;
		fid->qid = (Qid){dchild->qpath, dchild->version,
							(req->ifcall.mode&DMDIR) ? QTDIR : QTFILE};
		aux->dblkno = blkno;
		aux->pdblkno = dchild->pdblkno;
		aux->pqpath = dchild->pqpath;
		aux->preli = reli;
		aux->dri = 0;
		req->ofcall.qid = fid->qid;
		req->ofcall.iounit = Iounit;
		/*
			save Iobuf of the child so truncate can open
			it with a wlock(), if needed
		 */
		poperror();
		putbuf(cbuf, 0);
		if(req->ofcall.qid.path >= Qpusers &&
			req->ofcall.qid.type == QTFILE)
			truncatefile(req->ofcall.qid.path, blkno, aux->uid);
		poperror();
		putbuf(dbuf, 0);
		poperror();
		respond(req, nil);
		return;
	}

	/* add a new one at reli */
	/* nothing found, create */
	/* TODO if(reli >= Maxreli)
			error(errstring[Etoobig]); */

	if(perm&DMDIR){
		if((req->ifcall.mode & OTRUNC) ||
			(perm & DMAPPEND) ||
			(fid->omode & OWRITE))
			error(errstring[Einval]);
	}

	blkno = rel2abs(dparent, reli);	
	qpath = newqpaths((perm&DMDIR)?4:1);
	if(blkno == 0){
		cbuf = allocmeta(Tdentry, qpath);
		if(waserror()){
			freeblockbuf(cbuf);
			nexterror();
		}
	}else{
		cbuf = egetmetachk(blkno, Bwritable, Tdentry, Qpnone);
		if(waserror()){
			putbuf(cbuf, 0);
			nexterror();
		}
	}
	cbuf->d->qpath = cbuf->d->path = qpath;

	dchild = cbuf->d;
	dchild->size = 0;
	dchild->pdblkno = dbuf->blkno;
	dchild->pqpath = dparent->path;
	dchild->preli = reli;
	dchild->mtime = nsec();
	dchild->uid = dchild->muid = aux->uid;
	dchild->gid = dparent->gid;
	dchild->version = 0;

	/* TODO DMAPPEND attributes? */
	if(perm&DMAPPEND)
		dchild->mode |= DMAPPEND;
	if(perm&DMEXCL)
		dchild->mode |= DMEXCL;
	qid = (Qid){dchild->path, 0, (perm&DMDIR) ? QTDIR : QTFILE};
	aux->dblkno = cbuf->blkno;
	aux->pdblkno = dchild->pdblkno;
	aux->pqpath = dchild->pqpath;
	aux->preli = reli;
	aux->dri = 0;
	if(perm&DMEXCL){
		t = emalloc9p(sizeof(Tlock));
		t->time = nsec();
		t->qpath = dchild->path;
		t->dblkno = aux->dblkno;
		qlock(&tlock);
		if(tlocktail == nil){
			tlocktail = tlockhead = t;
		}else{
			tlocktail->next = t;
			t->prev = tlocktail;
			tlocktail = t;
		}
		qunlock(&tlock);
		aux->tlocked = 1;
	}else
		aux->tlocked = 0;
	if(perm&DMDIR){
		if(allocdentries(3, iobufs, qpath+1) < 3)
			error(errstring[Efull]);
		if(waserror()){
			truncatefile(iobufs[0]->d->qpath, iobufs[0]->blkno, aux->uid);
			truncatefile(iobufs[1]->d->qpath, iobufs[1]->blkno, aux->uid);
			truncatefile(iobufs[2]->d->qpath, iobufs[2]->blkno, aux->uid);
			nexterror();
		}
		dchild->mode = DMDIR | (perm & (~0777 | (dparent->mode & 0777)));
		newnames(dchild, aux->uid, cbuf->blkno, iobufs);
	}else
		dchild->mode = perm & (~0666 | (dparent->mode & 0666));
	if(blkno == 0){
		blkno = cbuf->blkno; USED(blkno);
		/* only add it to the directory dentry if we are adding a new dentry block
			if we are reusing a zero'ed out slot, it already exists in the
			directory dentry */
		addrelative(dparent, dbuf->blkno, reli, cbuf->blkno);
		if(addname(dparent, aux->uid, reli, req->ifcall.name) == 0)
			error(errstring[Ephase]);
	}else if(updatename(dparent, aux->uid, reli, req->ifcall.name) == 0)
		error(errstring[Ephase]);

	fid->qid = qid;
	req->ofcall.qid = qid;
	req->ofcall.iounit = Iounit;
	if(perm&DMDIR)
		poperror();
	poperror();
	putbuf(cbuf, 1);	/* save Iobuf of the content */
	poperror();
	putbuf(dbuf, 1);
	poperror();
	respond(req, nil);
	return;
}

/* read the Req.ifcall.mode and build the Fid.omode based on the dentry */
static void
fsopen(Req *req)
{
	u8 mode, omode;
	Fid *fid;
	Iobuf *dbuf;
	Aux *aux;
	Dentry *d;
	Tlock *t;

	mode = req->ifcall.mode;
	fid = req->fid;
	aux = req->fid->aux;
	omode = 0;
	if(chatty9p > 1)
		dprint("fsopen fid->qid.path %d mode %d readonly %d\n",
				 fid->qid.path, mode, readonly);

	if(waserror()){
		responderror(req);
		return;
	}
	if(readonly && (mode & (ORCLOSE | OTRUNC | OWRITE | ORDWR)) != 0)
		error(errstring[Einval]);

	dbuf = egetmetachk(aux->dblkno, Breadonly, Tdentry, fid->qid.path);
	if(waserror()){
		putbuf(dbuf, 0);
		nexterror();
	}
	d = dbuf->d;

	if((mode & OTRUNC) != 0 &&
		canaccess(aux->uid, d, DMWRITE) == 0)
		error(errstring[Eperm]);
	if((mode & ORCLOSE) != 0)
		if(canaccess(aux->uid, d, DMWRITE) == 0)
		error(errstring[Eperm]);
	if((d->mode & DMAPPEND) != 0)
		mode &= ~OTRUNC;
	if((d->mode & DMDIR) != 0){
		if((mode & (ORCLOSE | OTRUNC | OWRITE | ORDWR)) != 0)
			error(errstring[Einval]);
		if(canaccess(aux->uid, d, DMEXEC) == 0)
			error(errstring[Eperm]);
	}
	if((d->mode & DMEXCL) != 0){
		qlock(&tlock);
		if(tlocktail != nil){
			for(t = tlockhead; t != nil; t = t->next){
				if(t->dblkno == ((Aux*)fid->aux)->dblkno &&
					t->qpath == fid->qid.path){
					qunlock(&tlock);
					respond(req, "DMEXCL locked");
					return;
				}
			}
		}
		t = emalloc9p(sizeof(Tlock));
		t->time = nsec();
		t->qpath = fid->qid.path;
		t->dblkno = aux->dblkno;
		if(tlocktail == nil){
			tlocktail = tlockhead = t;
		}else{
			tlocktail->next = t;
			t->prev = tlocktail;
			tlocktail = t;
		}
		qunlock(&tlock);
		aux->tlocked = 1;
	}
	switch(mode & OEXEC){
	case ORDWR:
		omode |= ORDWR;
	case OWRITE:
		omode |= OWRITE;
		break;
	case OEXEC:
	case OREAD:
		omode |= OREAD;
		break;
	}

	if((mode & ORCLOSE) != 0)
		omode |= ORCLOSE;

	fid->qid = (Qid){d->path, d->version, (d->mode&DMDIR) ? QTDIR : QTFILE};
	fid->omode = omode;
	req->ofcall.iounit = Iounit;
	req->ofcall.qid = fid->qid;

	poperror();
	putbuf(dbuf, 0);
	if((mode & OTRUNC) == 0){
		poperror();
		respond(req, nil);
		return;
	}
	/* truncate file */
	truncatefile(fid->qid.path, aux->dblkno, aux->uid);
	poperror();
	respond(req, nil);
	return;
}

/* below is from nemo's Pg 252 */
typedef	struct	Buffer	Buffer;
typedef	struct	Work	Work;

struct Work
{
	void (*f)(Req *r);
	Req *r;
};

struct Buffer
{
	QLock lck;
	Work works[Nworks];
	u16 hd, tl, nworks;
	Rendez isfull;	/* throttling */
	Rendez isempty; /* workers do not have to keep polling to find work */
};

Buffer buf;

Work
get(Buffer *b)
{
	Work w;

	if(shuttingdown)
		return (Work){nil,nil};
	qlock(&b->lck);
	if(b->nworks == 0){
		rsleep(&b->isempty);
		if(shuttingdown){
			qunlock(&b->lck);
			return (Work){nil,nil};
		}
	}
	w = b->works[b->hd];
	b->hd = ++b->hd %Nworks;
	b->nworks--;
	if(b->nworks == Nworks-1)
		rwakeup(&b->isfull);
	qunlock(&b->lck);
	return w;
}

static void stats(void);
void
put(Buffer *b, void (*f)(Req *r), Req *r)
{
	Work  w;

	if(shuttingdown){
		respond(r, errstring[Eshutdown]);
		qlock(&b->lck);
		rwakeupall(&b->isempty);
		qunlock(&b->lck);
	}
	w.f = f;
	w.r = r;
	qlock(&b->lck);
	if(b->nworks == Nworks){
		rsleep(&b->isfull);
		if(shuttingdown){
			qunlock(&b->lck);
			respond(r, errstring[Eshutdown]);
		}
	}
	b->works[b->tl] = w;
	b->tl = ++b->tl % Nworks;
	b->nworks++;
	if(b->nworks == 1)
		rwakeup(&b->isempty);
	qunlock(&b->lck);
	if(chatty9p > 1)
		stats();
}

struct
{
	u32 pid;
	Work w;
} worker[Nworkers];	/* keeps track of running procs to flush */

void
work(Buffer *b, int id)
{
	Work w;

	worker[id].pid = getpid();
	w = get(b);
	while(w.f != nil){
		worker[id].w = w;
		w.f(w.r);
		worker[id].w = (Work){nil, nil};
		w = get(b);
	}
	worker[id].pid = 0;
}

void
initworks(Buffer *b)
{
	// ufree all locks, set everything to null values
	memset(b, 0, sizeof(*b));
	// set the locks used by the Rendezes
	b->isempty.l = &b->lck;
	b->isfull.l = &b->lck;
}

int
stopworkers(void)
{
	int i, a;

	a = 0;
	for(i = 0; i<Nworkers; i++){
		if(worker[i].w.f == nil){
			if(worker[i].pid > 0 && worker[i].pid != getpid()){
				// rwakeup(&buf.isempty); TODO why can't I get this to work?
				postnote(PNPROC, worker[i].pid, "interrupt");
				worker[i].pid = 0;
			}
		}else
			a++;
	}
	return a;
}

static void
stats(void)
{
	int n, w, inv, i;

	n = w = inv = 0;
	for(i = 0; i<Nworkers; i++){
		if(worker[i].pid == 0)
			inv++;
		else if(worker[i].w.f == nil)
			n++;
		else if(worker[i].w.f != nil)
			w++;
	}
	dprint("Nworkers %d inv %d idle %d working %d buf nworks %d hd %d tl %d\n",
			Nworkers, inv, n, w, buf.nworks, buf.hd, buf.tl);
}

/*
	there are 2 ways to shutdown:
	1. by unmounting and removing the /srv/mfs_service file (can be rm and unmount too)
	2. by writing halt to /a/ctl file. unmount() the file system to keep it clean.

	In the first instance, the srv() process is driving the shutdown. It calls fsend().
		rm /srv/mfs_service file does not wait for fsend() to finish. Hence, there
		is no way to ensure that any pending writes have been flushed to the disk.
		If the system is shutdown or restarted immediately, there is a high possibility
		that the filesystem will be in an inconsistent state. This is more probable
		when disk/mafs (asynchronous writes) is used.
	In the second instance, fsend() is called by the worker process. It does not return
		until all the pending writes have been flushed to the disk. It also removes the
		/srv/mfs_service file and also stops the srv() process. Hence, this is the
		preferred approach to shutting down the file system.

	There is no way to unmount() automatically on shutdown. The mount() and unmount()
		calls are client driven and it is not the responsibility of the server to find
		all the clients that mounted it. Just shutdown and let the respective clients
		deal with their mess.
 */
/* TODO need to track open fids and close them */
void
shutdown(void)
{
	u64 n;
/*	User *u, *v; */

	if(chatty9p > 1)
		dprint("shutdown\n");
	shuttingdown = 1;
	while((n=sync()) > 0){
		// if(chatty9p > 1)
		dprint("shutdown: sync() finds %llud locked blocks\n", n);
		sleep(1000);
	}
	fsok(1);
//	showextents(&frees);
//	dprint("&buf.isempty %#p\n", &buf.isempty);
	if(mpsrvpid){
		qlock(&buf.lck);
		rwakeupall(&buf.isempty);
		qunlock(&buf.lck);
	}
/*	if(synchronouswrites == 0){
		while((n=pendingwrites())>0){
			if(chatty9p > 1)
			dprint("shutdown: pendingwrites %llud of write queue\n", n);
			sleep(1000);
		}
	}
*/
	savefrees();
/*	if(synchronouswrites == 0){
		while((n=pendingwrites())>0){
			if(chatty9p > 1)
			dprint("shutdown: pendingwrites %llud of frees\n", n);
			sleep(1000);
		}
		stopwriter();
	}
*/
	/* free users, why bother? leave it alone */
/*	u = t->newrs;
	while(u != nil){
		v = u->next;
		free(u);
		u = v;
	}
	t->newrs = nil;*/

	if(chatty9p > 1)
		dprint("shutdown: exiting\n");
	/* chkqunlock(&superlock); exit while holding the lock */

	close(devfd);
}

void
fsflush(void)
{
	u64 now;

	/* this loop is to avoid balloc() calling it immediately
		again and again */
	while(lastflushtime > (now=nsec())-Nrefresh)
		sleep((lastflushtime-(now-Nrefresh))/Nsec);
	if(canwlock(&flushlck)){
		flushold();
		wunlock(&flushlck);
	}
	lastflushtime = now;
}

void
startproc(Buffer *b, int id, u8 syncer)
{
	char name[128];
	// Errenv env = {0};

	switch(rfork(RFPROC|RFMEM|RFFDG)){
	case -1:
		panic("can't fork");
	case 0:
		// *envpp = &env;
		if(chatty9p > 1)
			dprint("child %d pid: %d\n", id, getpid());
		break;
	default:
		return;
	}
	if(syncer)
		snprint(name, 128, "%s flusher %d", service, id);
	else
		snprint(name, 128, "%s worker %d", service, id);
	procsetname(name);
	if(syncer){
		while(shuttingdown == 0){
			fsflush();
			sleep(Nrefresh*Msec/Nsec);
		}
	}else
		work(b, id);
	if(chatty9p)
		dprint("%s process exited\n", name);
	exits(nil);
}

void
fsstart(Srv *)
{
	int i;

	mpsrvpid=getpid();
	initworks(&buf);
	if(chatty9p > 1)
		dprint("srv() parent process pid: %d\n", mpsrvpid);
	for(i = 0; i < Nworkers; i++)
		startproc(&buf, i, 0);
	startproc(&buf, i, 1);
}

/* read the comment above shutdown() to understand */
void
fsend(Srv *)
{
	if(shuttingdown == 0)
		shutdown();
	/* no need to bother removing /srv/service
		as we do for cmdhalt in ctlwrite() of ctl.c
		as the srv() takes care of that cleanup */
}

void
mfsopen(Req *r)
{
	put(&buf, fsopen, r);
}

void
mfscreate(Req *r)
{
	put(&buf, fscreate, r);
}

void
mfsread(Req *r)
{
	/*
		auth should run on the srv process and not the worker.
		It returns an auth botch error when the worker does it.
	 */
	if(r->fid->qid.type == QTAUTH){
		authread(r);
		return;
	}else
		put(&buf, fsread, r);
}

void
mfswrite(Req *r)
{
	/*
		auth should run on the srv process and not the worker.
		It returns an auth botch error when the worker does it.
	 */
	if(r->fid->qid.type == QTAUTH){
		authwrite(r);
		return;
	}else
		put(&buf, fswrite, r);
}

void
mfsstat(Req *r)
{
	put(&buf, fsstat, r);
}

void
mfswstat(Req *r)
{
	put(&buf, fswstat, r);
}

void
mfsremove(Req *r)
{
	put(&buf, fsremove, r);
}

void
mfswalk(Req *r)
{
	put(&buf, fswalk, r);
}

/* multi process server */
Srv mpsrv = {
	.auth = fsauth,
	.attach = fsattach,
	.destroyfid = fsdestroyfid,
	.msize = Iounit,

	.open = mfsopen,
	.create = mfscreate,
	.read = mfsread,
	.write = mfswrite,
	.stat = mfsstat,
	.wstat = mfswstat,
	.walk = mfswalk,
	.remove = mfsremove,

	.start = fsstart,
	.end = fsend,
};

/* usrv is a uni process server */
Srv usrv = {
	.auth = fsauth,
	.attach = fsattach,
	.destroyfid = fsdestroyfid,
	.msize = Iounit,

	.open = fsopen,
	.create = fscreate,
	.read = fsread,
	.write = fswrite,
	.stat = fsstat,
	.wstat = fswstat,
	.walk = fswalk,
	.remove = fsremove,

	.end = fsend,
};

/*
08:08 < joe7> is there a way to interact with the stdin of a process using srv() from rc? I understand that it runs the srv() loop between the fd's 0 and 1.
08:13 < joe7> I wrote an userspace file server and I want to test it without using postmountsrv (avoiding the pipe interface).
08:45 < cinap_lenrek> {yourfileserver <[0=1] &} | echo 0 >/srv/service
08:46 < cinap_lenrek> then you should be able to mount /srv/service
08:47 < cinap_lenrek> the <>{} operator might also work
08:50 < cinap_lenrek> mount <{ramfs -i <[0=1]} /n/ram
08:50 < cinap_lenrek> works.
 */
void
start9p(int stdio)
{
	int sfd;

	if(stdio){
		usrv.infd = 0;
		usrv.outfd = 1;
		srv(&usrv);
	}else{
		sfd = postsrv(&mpsrv, service);
		if(sfd < 0)
			sysfatal("postsrv: %r");
		close(sfd);
	}
}

/*
static int
dir9p2(Dir* dir, Dentry* dentry, void* strs)
{
	char *op, *p;

	memset(dir, 0, sizeof(Dir));
	mkqid(dir->qid, dentry, 1);
	dir->mode = (dir->qid.type<<24)|(dentry->mode & 0777);
	dir->mtime = dentry->mtime;
	dir->length = dentry->size;

	op = p = strs;
	dir->name = p;
	p += sprint(p, "%s", dentry->name)+1;

	dir->uid = p;
	uidtostr(p, dentry->uid);
	p += strlen(p)+1;

	dir->gid = p;
	uidtostr(p, dentry->gid);
	p += strlen(p)+1;

	dir->muid = p;
	strcpy(p, "");
	p += strlen(p)+1;

	return p-op;
} */

s32
readfilesize(u64 dblkno, u64 qpath)
{
	u64 filesize;
	Iobuf *dbuf;

	dbuf = egetmetachk(dblkno, Breadonly, Tdentry, qpath);
	filesize = dbuf->d->size+dbuf->appendsize;
	putbuf(dbuf, 0);
	return filesize;
}
s32
readfile(u64 dblkno, u64 qpath, char *rbuf, s32 rbufsize, u64 offset)
{
	Dentry *d;
	s64 tosend, sent, filesize;
	s32 n;
	Iobuf *dbuf, *buf;
	u64 datablocksize;

	sent = 0;
	dbuf = egetmetachk(dblkno, Breadonly, Tdentry, qpath);
	if(waserror()){
		putbuf(dbuf, 0);
		nexterror();
	}
	d = dbuf->d;
	filesize = d->size+dbuf->appendsize;

	if(offset >= filesize)
		goto readend;
	if(filesize <= Ddatasize){
		n = min(filesize-offset, rbufsize);
		memcpy(rbuf, d->buf+offset, n);
		sent = n;
		goto readend;
	}
	if(filesize - offset > rbufsize)
		tosend = rbufsize;
	else
		tosend = filesize - offset;
	for(sent = 0; sent < tosend && offset+sent < d->size; ){
		buf = getdatablkat(d, (offset+sent)/Maxdatablocksize);
		if(buf->len == Maxdatablockunits)
			datablocksize = Maxdatablocksize;
		else
			datablocksize = buf->io->len*Blocksize -Ddataidssize;
		n = min(datablocksize-((offset+sent)%Maxdatablocksize), tosend-sent);
		memcpy(rbuf+sent, buf->io->buf+((offset+sent)%Maxdatablocksize), n);
		sent += n;
		putbuf(buf, 0);
	}
	if(dbuf->append && sent < tosend && offset+sent < d->size+dbuf->appendsize){
		/* assuming that rbufsize < Maxdatablocksize */
		n = min(d->size+dbuf->appendsize-(offset+sent), tosend-sent);
		memcpy(rbuf+sent, dbuf->append+(offset+sent-d->size), n);
		sent += n;
	}
readend:
	poperror();
	putbuf(dbuf, 0);
	return sent;
}

/* only for updating existing data */
s32
update(Dentry *d, u64 dblkno, char *wbuf, s32 wbufsize, u64 offset, Iobuf **oldbufp)
{
	Iobuf *oldbuf, *newbuf;
	s32 howmuch;
	u64 oldblkno, newblkno, to, nblocks, nblockssize;

	if(d == nil || wbuf == nil || wbufsize == 0)
		return 0;
	if(offset+wbufsize > d->size)
		panic("update(): should not be here\n"
				"	offset %llud wbufsize %d d->size %llud",
				offset, wbufsize, d->size);

	/*	get the old data	 */
	oldblkno = rel2abs(d, offset/Maxdatablocksize);
	if(offset/Maxdatablocksize < d->size/Maxdatablocksize){
		nblocks = Maxdatablockunits;
		nblockssize = Maxdatablocksize;
	}else{
		nblocks = nlastdatablocks(d->size);
		nblockssize = nblocks*Blocksize -Ddataidssize;
	}

	oldbuf = egetbufchk(oldblkno, nblocks, Bwritable, Tdata, d->path, getcallerpc(&d));

	/* allocate new blocks to copy on write */
	newbuf = allocblocks(nblocks, Tdata, d->path);
	newblkno = newbuf->blkno;
	newbuf->io->dblkno = dblkno;
	newbuf->io->flags = d->flags;
	if(waserror()){
		freeblocks(newblkno, nblocks, Tdata, d->path);
		nexterror();
	}

	/* put the old contents in these new blocks */
	if(newbuf->xiobuf+(newbuf->io->len*Blocksize)-sizeof(u64) < newbuf->io->buf+nblockssize)
		panic("update put old contents in new blocks overstepping the bounds");
	memcpy(newbuf->io->buf, oldbuf->io->buf, nblockssize);

	/* update with the new contents */
	to = offset%Maxdatablocksize;
	howmuch = min(nblockssize-to, wbufsize);
	if(oldbuf->xiobuf+(oldbuf->io->len*Blocksize)-sizeof(u64) < oldbuf->io->buf+to+howmuch)
		panic("update with new contents overstepping the bounds");
	memcpy(oldbuf->io->buf+to, wbuf, howmuch);

	putbuf(newbuf, 1);

	/* add the newly allocated blocks to the Dentry */
	addrelative(d, dblkno, offset/Maxdatablocksize, newblkno);
	poperror();
	*oldbufp = oldbuf; /* freeblockbuf(oldbuf); */
	return howmuch;
}

/*
	Scenarios:
	1. last block is full or no last block
	2. last block is partially full
 */
s32
writefullappend(Iobuf *dbuf, Dentry *d, u64 dblkno, Iobuf **oldbufp)
{
	Iobuf *oldbuf, *newbuf;
	s32 howmuch;
	u64 newblkno, newbufsize, lastdatablksize, oldblkno;

	/* last data block is full or there is no last block.
		write out the append
	 */
	if(d->size%Maxdatablocksize == 0 &&
		dbuf->appendsize >= Maxdatablocksize){

		/* allocate new blocks */
		newbuf = allocblocks(Maxdatablockunits,Tdata, d->path);
		newblkno = newbuf->blkno;
		newbuf->io->dblkno = dblkno;
		newbuf->io->flags = d->flags;
		if(waserror()){
			freeblocks(newblkno, Maxdatablockunits, Tdata, d->path);
			nexterror();
		}

		/* add the contents of append to those new blocks */
		if(newbuf->xiobuf+(newbuf->io->len*Blocksize)-sizeof(u64) < newbuf->io->buf+Maxdatablocksize)
			panic("writefullappend contents overstepping the bounds");
		memcpy(newbuf->io->buf, dbuf->append, Maxdatablocksize);

		putbuf(newbuf, 1);

		/* add this newly allocated blocks to the Dentry */
		addrelative(d, dblkno, d->size/Maxdatablocksize, newblkno);
		poperror();
		d->size += Maxdatablocksize;
		dbuf->appendsize -= Maxdatablocksize;
		return 1;

	}else if(d->size%Maxdatablocksize > 0 &&
			(d->size%Maxdatablocksize)+dbuf->appendsize >= Maxdatablocksize){

		/* last data block is not full.
			write a full last data block and leave the rest in append
		 */

		/* allocate new blocks */
		newbuf = allocblocks(Maxdatablockunits, Tdata, d->path);
		newblkno = newbuf->blkno;
		newbuf->io->dblkno = dblkno;
		newbuf->io->flags = d->flags;
		newbufsize = 0;
		if(waserror()){
			freeblocks(newblkno, Maxdatablockunits, Tdata, d->path);
			nexterror();
		}

		/* read from the last block */
		/* copy that to the new allocated blocks */
		oldbuf = nil;
		if(lastdatablksize=d->size%Maxdatablocksize){
			/* partial block, above is = not == */
			oldblkno = rel2abs(d, d->size/Maxdatablocksize);
			oldbuf = egetbufchk(oldblkno, nlastdatablocks(lastdatablksize),
								Bwritable, Tdata, d->path, getcallerpc(&dbuf));
			if(newbuf->xiobuf+(newbuf->io->len*Blocksize)-sizeof(u64) < newbuf->io->buf+lastdatablksize)
				panic("writefullappend last block overstepping the bounds");
			memcpy(newbuf->io->buf, oldbuf->io->buf, lastdatablksize);
			newbufsize = lastdatablksize;
		}

		/* add the contents of append to those new blocks */
		howmuch = Maxdatablocksize-newbufsize;
		if(newbuf->xiobuf+(newbuf->io->len*Blocksize)-sizeof(u64) < newbuf->io->buf+newbufsize+howmuch)
			panic("writefullappend contents overstepping the bounds");
		memcpy(newbuf->io->buf+newbufsize, dbuf->append, howmuch);

		/* move the left over append stuff to the front */
		memmove(dbuf->append, dbuf->append+howmuch, dbuf->appendsize-howmuch);

		dbuf->appendsize -= howmuch;
		putbuf(newbuf, 1);

		/* add this newly allocated blocks to the Dentry */
		addrelative(d, dblkno, d->size/Maxdatablocksize, newblkno);
		poperror();

		/* free the old last blocks after the dentry is written */
		if(oldbuf)
			/* freeblockbuf(oldbuf); */
			*oldbufp = oldbuf;
		d->size += howmuch;
		return 1;
	}
	return 0;
}

/*
	Scenarios:
	1. last block is full or no last block
	2. last block is partially full
 */
s32
writeallappend(Iobuf *dbuf, u64 dblkno, Iobuf **oldbufp)
{
	Iobuf *oldbuf, *newbuf;
	u64 newblkno, newbufsize, lastdatablksize, oldblkno, newblocks;
	s32 rv;
	Dentry *d;

	d = dbuf->d;
	if(dbuf->appendsize == 0)
		return 1;

	rv = 0;
	*oldbufp = oldbuf = nil;
	if((d->size%Maxdatablocksize)+dbuf->appendsize >= Maxdatablocksize){
		rv = writefullappend(dbuf, d, dblkno, oldbufp);
		if(rv < 0)
			return rv;
	}

	if(rv < 0)
		return rv;
	if(dbuf->appendsize == 0)
		return 1;

	/* last data block is full or there is no last block. */
	if(d->size%Maxdatablocksize == 0){

		/* allocate new blocks */
		newblocks = nlastdatablocks(dbuf->appendsize);
		newbuf = allocblocks(newblocks,Tdata, d->path);
		newblkno = newbuf->blkno;
		newbuf->io->dblkno = dblkno;
		newbuf->io->flags = dbuf->d->flags;
		if(waserror()){
			freeblocks(newblkno, newblocks, Tdata, d->path);
			nexterror();
		}

		/* add the contents of append to those new blocks */
		if(newbuf->xiobuf+(newbuf->io->len*Blocksize)-sizeof(u64)
			 < newbuf->io->buf+dbuf->appendsize)
			panic("writeallappend contents overstepping the bounds");
		memcpy(newbuf->io->buf, dbuf->append, dbuf->appendsize);

		putbuf(newbuf, 1);

		/* add this newly allocated blocks to the Dentry */
		addrelative(d, dblkno, d->size/Maxdatablocksize, newblkno);
		poperror();
		d->size += dbuf->appendsize;
		dbuf->appendsize = 0;
		return 1;
	}

	if(*oldbufp != nil)
		panic("writeallappend *oldbufp != nil should not be happening");
	/* allocate new blocks */
	newblocks = nlastdatablocks((d->size%Maxdatablocksize) +dbuf->appendsize);
	newbuf = allocblocks(newblocks, Tdata, d->path);
	newblkno = newbuf->blkno;
	newbufsize = 0;
	newbuf->io->dblkno = dblkno;
	newbuf->io->flags = dbuf->d->flags;
	if(waserror()){
		freeblocks(newblkno, newblocks, Tdata, d->path);
		nexterror();
	}

	/* read from the last block and copy that to the newly allocated blocks */
	if(lastdatablksize=d->size%Maxdatablocksize){
		/* partial block, above is = not == */
		oldblkno = rel2abs(d, d->size/Maxdatablocksize);
		oldbuf = getbufchk(oldblkno, nlastdatablocks(lastdatablksize),
							Bwritable, Tdata, d->path, getcallerpc(&dbuf));
		if(oldbuf == nil){
			freeblocks(newblkno, newblocks, Tdata, d->path);
			return -1;
		}
		if(newbuf->xiobuf+(newbuf->io->len*Blocksize)-sizeof(u64)
			 < newbuf->io->buf+lastdatablksize)
			panic("writeallappend last block overstepping the bounds");
		memcpy(newbuf->io->buf, oldbuf->io->buf, lastdatablksize);
		newbufsize = lastdatablksize;
	}

	/* add the contents of append to those new blocks */
	/* writefullappend() takes care of bigger sizes */
	if(newbuf->xiobuf+(newbuf->io->len*Blocksize)-sizeof(u64)
		 < newbuf->io->buf+newbufsize+dbuf->appendsize)
		panic("writeallappend add overstepping the bounds");
	memcpy(newbuf->io->buf+newbufsize, dbuf->append, dbuf->appendsize);
	newbuf->io->len = newblocks;

	/* nothing left in append to move to the front */

	putbuf(newbuf, 1);

	/* add this newly allocated blocks to the Dentry */
	addrelative(d, dblkno, d->size/Maxdatablocksize, newblkno);
	poperror();

	/* free the old last blocks after the dentry is written */
	if(oldbuf)
		/* freeblockbuf(oldbuf); */
		*oldbufp = oldbuf;
	d->size += dbuf->appendsize;
	dbuf->appendsize = 0;
	return 1;
}

/*
	3 scenarios
		offset < filesize && offset+wbufsize <= filesize
			replacing the data in existing blocks
		offset <= filesize && offset+wbufsize > filesize
			replacing the data in existing blocks
			add data to new blocks
		offset > filesize
			new blank blocks until offset
			data blocks from offset until offset+wbufsize
 */
s32
writefile(u64 dblkno, u64 qpath, s16 uid, char *wbuf, s32 wbufsize, u64 offset)
{
	Dentry *d;
	s64 written;
	s32 n;
	Iobuf *dbuf, *oldbuf;
	u8 dowrite;

	written = 0;
Writefileagain:
	oldbuf = nil;
	dbuf = egetmetachk(dblkno, Bwritable, Tdentry, qpath);
	if(waserror()){
		putbuf(dbuf, 0);
		nexterror();
	}
	d = dbuf->d;
	d->muid = uid;
	dowrite = 0;
	// odentry(d);

	// little data, stuff it in the Dentry
	if(d->size+dbuf->appendsize<=Ddatasize && offset+wbufsize <= Ddatasize){
		if(dbuf->xiobuf+(dbuf->io->len*Blocksize)-sizeof(u64)
			 < (u8*)dbuf->d->buf+offset+wbufsize)
			panic("writefile little data overstepping the bounds");
		memcpy(d->buf+offset, wbuf, wbufsize);
		if(offset+wbufsize > d->size)
			d->size = offset+wbufsize;
		written = wbufsize;
		dowrite = 1;
		goto writeend;
	}
	// more data, relocate it to a proper data block
	if(d->size > 0 &&
		d->size+dbuf->appendsize <= Ddatasize &&
		offset+wbufsize > Ddatasize){
		if(dbuf->append == nil){
			dbuf->append = allocmemunits(Maxdatablockunits);
			dbuf->appendsize = 0;
		}
		memcpy(dbuf->append, d->buf, d->size);
		memset(d->buf, 0, Ddatasize);
		dbuf->appendsize = d->size;
		d->size = 0;
		dowrite = 1;
	}

	while(oldbuf == nil && written < wbufsize){

		if(chatty9p > 1)
			dprint("writefile(): d->size %llud dbuf->appendsize %llud"
					" offset %llud written %d offset+written %llud wbufsize %ld\n",
					d->size, dbuf->appendsize,
					offset, written, offset+written, wbufsize);

		/* all the below functions only write upto the end of an extent.
		   Hence, the need for a loop to keep repeating.
		 */
		if((d->size%Maxdatablocksize) +dbuf->appendsize >= Maxdatablocksize){
			dowrite = 1;
			if(chatty9p > 1)
				dprint("writefile(): writefullappend\n");
			writefullappend(dbuf, d, dblkno, &oldbuf);

		}else if(offset > d->size+dbuf->appendsize){
			/* new blank blocks until offset
				fill blank data upto offset */

			if(chatty9p > 1)
				dprint("writefile(): blank blocks until offset\n");

			if(dbuf->append == nil){
				dbuf->append = allocmemunits(Maxdatablockunits);
				dbuf->appendsize = 0;
			}
			n = min(Maxdatablocksize, offset-d->size);
			dbuf->appendsize = n;

		}else if(offset+written < d->size){
			/* replacing existing data
				no change to file size */

			if(chatty9p > 1)
				dprint("writefile(): replace existing data\n");

			n = update(d, dblkno,
							wbuf+written, /* from where */
							/* how much */
							min(wbufsize-written, d->size-(offset+written)),
							offset+written,	/* to where */
							&oldbuf);
			if(chatty9p > 1)
				dprint("writefile(): update returned %d\n", n);
			written += n;
			dowrite = 1;

		}else if(offset+written >= d->size &&
					offset+written < d->size+dbuf->appendsize){
			/* changing append contents */
			if(chatty9p > 1)
				dprint("writefile(): change append contents\n");
			n = min(d->size+dbuf->appendsize-(offset+written),wbufsize-written);
			memcpy(dbuf->append+(offset+written-d->size), wbuf+written, n);
			written += n;

		}else if(offset+written >= d->size+dbuf->appendsize){
			/* assuming that wbufsize <= append */
			/* append data, changes file size
				data blocks from offset until offset+wbufsize */
			if(chatty9p > 1)
				dprint("writefile(): append\n");
			if(dbuf->append == nil){
				dbuf->append = allocmemunits(Maxdatablockunits);
				dbuf->appendsize = 0;
			}
			n = min(Maxdatablocksize-dbuf->appendsize, wbufsize-written);
			memcpy(dbuf->append+dbuf->appendsize, wbuf+written, n);
			dbuf->appendsize += n;
			written += n;
		}
		else
			panic("writefile: should not be here");

	}

writeend:
	d->mtime = nsec();
	if(dowrite)
		d->version++;
	poperror();
	putbuf(dbuf, dowrite);
	if(oldbuf){
		freeblockbuf(oldbuf);
		goto Writefileagain;
	}

	/* How will this work for partial writes? */
	if(chatty9p > 1)
		dprint("writefile(): written %d\n", written);
	return written;
}

void *
malloc9p(u32 sz)
{
	return emalloc9p((ulong)sz);
}