code: 9ferno

Download patch

ref: 693f5ff94696f7386d9adb6310b5cebb954bd64f
parent: b1eca83bfc6cb960855d9e5b6457e710e335284e
author: 9ferno <gophone2015@gmail.com>
date: Tue Nov 9 02:15:32 EST 2021

import 9front devmnt.c

--- a/include/kernel.h
+++ b/include/kernel.h
@@ -31,14 +31,14 @@
 extern	int	kmount(int, int, char*, int, char*);
 extern	int	kopen(char*, int);
 extern	int	kpipe(int[2]);
-extern	long	kpread(int, void*, long, vlong);
-extern	long	kread(int, void*, long);
+extern	s32	kpread(int, void*, s32, s64);
+extern	s32	kread(int, void*, s32);
 extern	int	kremove(char*);
 extern	vlong	kseek(int, vlong, int);
 extern	int	kstat(char*, uchar*, int);
 extern	int	kunmount(char*, char*);
-extern	long	kpwrite(int, void*, long, vlong);
-extern	long	kwrite(int, void*, long);
+extern	s32	kpwrite(int, void*, s32, s64);
+extern	s32	kwrite(int, void*, s32);
 extern	int	kwstat(char*, uchar*, int);
 extern	int	klisten(char*, char*);
 extern	int	kannounce(char*, char*);
--- a/os/pc64/errstr.h
+++ b/os/pc64/errstr.h
@@ -1,6 +1,7 @@
 char Enoerror[] = "no error";
 char Emount[] = "inconsistent mount";
 char Eunmount[] = "not mounted";
+char Eismtpt[] = "is a mount point";
 char Eunion[] = "not in union";
 char Emountrpc[] = "mount rpc error";
 char Eshutdown[] = "mounted device shut down";
--- a/os/pc64/ff.s
+++ b/os/pc64/ff.s
@@ -6,6 +6,7 @@
 ff outputs to screen now.
 But, the input needs to be fixed.
 make this into a devff like device that reads commands and outputs the result.
+replace variable with value (as in open firmware), to avoid exposing addresses
 
  ff kernel, amd64 9front variant
 
--- a/os/port/chan.c
+++ b/os/port/chan.c
@@ -5,6 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 
+#define DBG if(0)print
+
 enum
 {
 	PATHSLOP	= 20,
@@ -54,30 +56,35 @@
 	return p[0]=='.' && p[1]=='.' && p[2]=='\0';
 }
 
+/*
+ * sticking with inferno's definition of Ref
+ * as it keeps the incref() and decref() simple
+ * and also puts the proc on the fast path by the
+ * scheduler's priorities (PriLock)
+ */
 int
 incref(Ref *r)
 {
-	long old, new;
+	int x;
 
-	do {
-		old = r->ref;
-		new = old+1;
-	} while(!cmpswap(&r->ref, old, new));
-	return new;
+	lock(&r->l);
+	x = ++r->ref;
+	unlock(&r->l);
+	return x;
 }
 
 int
 decref(Ref *r)
 {
-	long old, new;
+	int x;
 
-	do {
-		old = r->ref;
-		if(old <= 0)
-			panic("decref pc=%#p", getcallerpc(&r));
-		new = old-1;
-	} while(!cmpswap(&r->ref, old, new));
-	return new;
+	lock(&r->l);
+	x = --r->ref;
+	unlock(&r->l);
+	if(x < 0)
+		panic("decref, pc=0x%zux", getcallerpc(&r));
+
+	return x;
 }
 
 /*
@@ -159,7 +166,7 @@
 
 /*
  * closeproc() kproc is used by 9front not inferno
- * TODO not sure if closeproc() is needed for 9ferno
+ * used to close clunked chan's
  */
 static void closeproc(void*);
 
@@ -185,6 +192,12 @@
 		devtab[i]->shutdown();
 }
 
+void
+dumpchan(char *s, Chan *c)
+{
+	print("%s chanpath %s\n", s, chanpath(c));
+}
+
 Chan*
 newchan(void)
 {
@@ -192,18 +205,19 @@
 
 	lock(&chanalloc);
 	c = chanalloc.free;
-	if(c != 0)
+	if(c != nil){
 		chanalloc.free = c->next;
-	unlock(&chanalloc);
-
-	if(c == nil) {
+		c->next = nil;
+	} else {
+		unlock(&chanalloc);
 		c = smalloc(sizeof(Chan));
 		lock(&chanalloc);
-		c->fid = ++chanalloc.fid;
 		c->link = chanalloc.list;
 		chanalloc.list = c;
-		unlock(&chanalloc);
 	}
+	if(c->fid == 0)
+		c->fid = ++chanalloc.fid;
+	unlock(&chanalloc);
 
 	/* if you get an error before associating with a dev,
 	   close calls rootclose, a nop */
@@ -212,18 +226,23 @@
 	c->ref = 1;
 	c->dev = 0;
 	c->offset = 0;
+	c->devoffset = 0;
 	c->iounit = 0;
-	c->umh = 0;
+	c->umh = nil;
+	c->umc = nil;
 	c->uri = 0;
 	c->dri = 0;
-	c->aux = 0;
-	c->mchan = 0;
-	c->mcp = 0;
-	c->mux = 0;
-	c->mqid.path = 0;
-	c->mqid.vers = 0;
-	c->mqid.type = 0;
+	c->dirrock = nil;
+	c->nrock = 0;
+	c->mrock = 0;
+	c->ismtpt = 0;
+	c->mcp = nil;
+	c->mux = nil;
+	c->aux = nil;
+	c->mchan = nil;
+	memset(&c->mqid, 0, sizeof(c->mqid));
 	c->path = nil;
+	
 	return c;
 }
 
@@ -288,9 +307,13 @@
 
 	if(p == nil || decref(p))
 		return;
+	DBG("pathclose path %s mlen %d malen %d\n", p->s, p->mlen, p->malen);
 	for(i=0; i<p->mlen; i++)
-		if(p->mtpt[i] != nil)
+		if(p->mtpt[i] != nil){
+			DBG("pathclose i %d p->mtpt[i] path %s p->mtpt[i]->ref %d\n",
+				i, chanpath(p->mtpt[i]), p->mtpt[i]->ref);
 			cclose(p->mtpt[i]);
+		}
 	free(p->mtpt);
 	free(p->s);
 	free(p);
@@ -515,7 +538,8 @@
 	if(c == nil)
 		return;
 	if(c->ref < 1 || c->flag&CFREE)
-		panic("cclose %#p", getcallerpc(&c));
+		panic("cclose %#p c->path %s c->ref %d c->flag 0x%ux",
+				getcallerpc(&c), chanpath(c), c->ref, c->flag);
 
 	if(decref(c))
 		return;
@@ -529,7 +553,7 @@
 	}
 
 	if(!waserror()){
-		devtab[c->type]->close(c);
+		devtab[c->type]->close(c); 
 		poperror();
 	}
 	chanfree(c);
--- a/os/port/devmnt.c
+++ b/os/port/devmnt.c
@@ -25,71 +25,63 @@
 	Fcall	request;	/* Outgoing file system protocol message */
 	Fcall 	reply;		/* Incoming reply */
 	Mnt*	m;		/* Mount device during rpc */
-	Rendez	r;		/* Place to hang out */
-	uchar*	rpc;		/* I/O Data buffer */
-	uint		rpclen;	/* len of buffer */
-	Block	*b;		/* reply blocks */
-	char	done;		/* Rpc completed */
-	u64	stime;		/* start time for mnt statistics */
-	u32	reqlen;		/* request length for mnt statistics */
-	u32	replen;		/* reply length for mnt statistics */
+	Rendez*	z;		/* Place to hang out */
+	Block*	w;		/* copy of write rpc for cache */
+	Block*	b;		/* reply blocks */
 	Mntrpc*	flushed;	/* message this one flushes */
+	char	done;		/* Rpc completed */
 };
 
 enum
 {
-	TAGSHIFT = 5,			/* ulong has to be 32 bits */
+	TAGSHIFT = 5,
 	TAGMASK = (1<<TAGSHIFT)-1,
 	NMASK = (64*1024)>>TAGSHIFT,
 };
 
-struct Mntalloc
+static struct Mntalloc
 {
 	Lock;
 	Mnt*	list;		/* Mount devices in use */
 	Mnt*	mntfree;	/* Free list */
 	Mntrpc*	rpcfree;
-	int	nrpcfree;
-	int	nrpcused;
-	u32	id;
-	u32	tagmask[NMASK];
-}mntalloc;
+	ulong	nrpcfree;
+	ulong	nrpcused;
+	ulong	id;
+	u32int	tagmask[NMASK];
+} mntalloc;
 
-void	mattach(Mnt*, Chan*, char*);
-Mnt*	mntchk(Chan*);
-void	mntdirfix(uchar*, Chan*);
-Mntrpc*	mntflushalloc(Mntrpc*, ulong);
-void	mntflushfree(Mnt*, Mntrpc*);
-void	mntfree(Mntrpc*);
-void	mntgate(Mnt*);
-void	mntpntfree(Mnt*);
-void	mntqrm(Mnt*, Mntrpc*);
-Mntrpc*	mntralloc(Chan*, ulong);
-s32	mntrdwr(int, Chan*, void*, s32, s64);
-s32	mntrpcread(Mnt*, Mntrpc*);
-void	mountio(Mnt*, Mntrpc*);
-void	mountmux(Mnt*, Mntrpc*);
-void	mountrpc(Mnt*, Mntrpc*);
-int	rpcattn(void*);
-Chan*	mntchan(void);
+static Chan*	mntchan(void);
+static Mnt*	mntchk(Chan*);
+static void	mntdirfix(uchar*, Chan*);
+static Mntrpc*	mntflushalloc(Mntrpc*);
+static Mntrpc*	mntflushfree(Mnt*, Mntrpc*);
+static void	mntfree(Mntrpc*);
+static void	mntgate(Mnt*);
+static void	mntqrm(Mnt*, Mntrpc*);
+static Mntrpc*	mntralloc(Chan*);
+static long	mntrdwr(int, Chan*, void*, long, vlong);
+static int	mntrpcread(Mnt*, Mntrpc*);
+static void	mountio(Mnt*, Mntrpc*);
+static void	mountmux(Mnt*, Mntrpc*);
+static void	mountrpc(Mnt*, Mntrpc*);
+static int	rpcattn(void*);
 
 #define cachedchan(c) (((c)->flag & CCACHE) != 0 && (c)->mcp != nil)
 
 char	Esbadstat[] = "invalid directory entry received from server";
-char Enoversion[] = "version not established for mount channel";
+char	Enoversion[] = "version not established for mount channel";
 
 
-void (*mntstats)(int, Chan*, uvlong, ulong);
-
 static void
 mntreset(void)
 {
 	mntalloc.id = 1;
 	mntalloc.tagmask[0] = 1;			/* don't allow 0 as a tag */
-	mntalloc.tagmask[NMASK-1] = 0x80000000UL;	/* don't allow NOTAG */
+	mntalloc.tagmask[NMASK-1] = 0x80000000;		/* don't allow NOTAG */
 	fmtinstall('F', fcallfmt);
-/*	fmtinstall('D', dirfmt); */
-/*	fmtinstall('M', dirmodefmt);  */
+	fmtinstall('D', dirfmt);
+/* We can't install %M since eipfmt does and is used in the kernel [sape] */
 
 	cinit();
 }
@@ -104,11 +96,12 @@
 	uchar *msg;
 	Mnt *m;
 	char *v;
+	Queue *q;
 	long k, l;
 	uvlong oo;
 	char buf[128];
 
-	qlock(&c->umqlock);	/* make sure no one else does this until we've established ourselves */
+	eqlock(&c->umqlock);	/* make sure no one else does this until we've established ourselves */
 	if(waserror()){
 		qunlock(&c->umqlock);
 		nexterror();
@@ -170,7 +163,6 @@
 	unlock(c);
 
 	l = devtab[c->type]->write(c, msg, k, oo);
-
 	if(l < k){
 		lock(c);
 		c->offset -= k - l;
@@ -179,14 +171,15 @@
 	}
 
 	/* message sent; receive and decode reply */
-	k = devtab[c->type]->read(c, msg, 8192+IOHDRSZ, c->offset);
-	if(k <= 0)
-		error("EOF receiving fversion reply");
+	for(k = 0; k < BIT32SZ || (k < GBIT32(msg) && k < 8192+IOHDRSZ); k += l){
+		l = devtab[c->type]->read(c, msg+k, 8192+IOHDRSZ-k, c->offset);
+		if(l <= 0)
+			error("EOF receiving fversion reply");
+		lock(c);
+		c->offset += l;
+		unlock(c);
+	}
 
-	lock(c);
-	c->offset += k;
-	unlock(c);
-
 	l = convM2S(msg, k, &f);
 	if(l != k)
 		error("bad fversion conversion on reply");
@@ -199,36 +192,52 @@
 		error("server tries to increase msize in fversion");
 	if(f.msize<256 || f.msize>1024*1024)
 		error("nonsense value of msize in fversion");
-	if(strncmp(f.version, v, strlen(f.version)) != 0)
+	k = strlen(f.version);
+	if(strncmp(f.version, v, k) != 0)
 		error("bad 9P version returned from server");
+	if(returnlen > 0 && returnlen < k)
+		error(Eshort);
 
+	v = nil;
+	kstrdup(&v, f.version);
+	q = qopen(10*MAXRPC, 0, nil, nil);
+	if(q == nil){
+		free(v);
+		exhausted("mount queues");
+	}
+
 	/* now build Mnt associated with this connection */
 	lock(&mntalloc);
 	m = mntalloc.mntfree;
-	if(m != 0)
+	if(m != nil)
 		mntalloc.mntfree = m->list;
 	else {
+		unlock(&mntalloc);
 		m = malloc(sizeof(Mnt));
-		if(m == 0) {
-			unlock(&mntalloc);
+		if(m == nil) {
+			qfree(q);
+			free(v);
 			exhausted("mount devices");
 		}
+		lock(&mntalloc);
 	}
 	m->list = mntalloc.list;
 	mntalloc.list = m;
-	m->version = nil;
-	kstrdup(&m->version, f.version);
+	m->version = v;
 	m->id = mntalloc.id++;
-	m->q = qopen(10*MAXRPC, 0, nil, nil);
+	m->q = q;
 	m->msize = f.msize;
 	unlock(&mntalloc);
 
+	if(returnlen > 0)
+		memmove(version, f.version, k);	/* length was checked above */
+
 	poperror();	/* msg */
 	free(msg);
 
 	lock(m);
-	m->queue = 0;
-	m->rip = 0;
+	m->queue = nil;
+	m->rip = nil;
 
 	c->flag |= CMSG;
 	c->mux = m;
@@ -238,13 +247,6 @@
 	poperror();	/* c */
 	qunlock(&c->umqlock);
 
-	k = strlen(f.version);
-	if(returnlen > 0){
-		if(returnlen < k)
-			error(Eshort);
-		memmove(version, f.version, k);
-	}
-
 	return k;
 }
 
@@ -255,9 +257,8 @@
 	Mntrpc *r;
 
 	m = c->mux;
-
 	if(m == nil){
-		mntversion(c, VERSION9P, MAXRPC, 0);
+		mntversion(c, nil, 0, 0);
 		m = c->mux;
 		if(m == nil)
 			error(Enoversion);
@@ -272,8 +273,7 @@
 		nexterror();
 	}
 
-	r = mntralloc(0, m->msize);
-
+	r = mntralloc(c);
 	if(waserror()) {
 		mntfree(r);
 		nexterror();
@@ -290,6 +290,7 @@
 	incref(m->c);
 	c->mqid = c->qid;
 	c->mode = ORDWR;
+	c->iounit = m->msize-IOHDRSZ;
 
 	poperror();	/* r */
 	mntfree(r);
@@ -300,24 +301,16 @@
 
 }
 
-static Chan*
-mntattach(char *muxattach)
+Chan*
+mntattach(Chan *c, Chan *ac, char *spec, int flags)
 {
 	Mnt *m;
-	Chan *c;
 	Mntrpc *r;
-	struct bogus{
-		Chan	*chan;
-		Chan	*authchan;
-		char	*spec;
-		int	flags;
-	}bogus;
 
-	bogus = *((struct bogus *)muxattach);
-	c = bogus.chan;
+	if(ac != nil && ac->mchan != c)
+		error(Ebadusefd);
 
 	m = c->mux;
-
 	if(m == nil){
 		mntversion(c, nil, 0, 0);
 		m = c->mux;
@@ -334,21 +327,19 @@
 		nexterror();
 	}
 
-	r = mntralloc(0, m->msize);
-
+	r = mntralloc(c);
 	if(waserror()) {
 		mntfree(r);
 		nexterror();
 	}
-
 	r->request.type = Tattach;
 	r->request.fid = c->fid;
-	if(bogus.authchan == nil)
+	if(ac == nil)
 		r->request.afid = NOFID;
 	else
-		r->request.afid = bogus.authchan->fid;
+		r->request.afid = ac->fid;
 	r->request.uname = up->env->user;
-	r->request.aname = bogus.spec;
+	r->request.aname = spec;
 	mountrpc(m, r);
 
 	c->qid = r->reply.qid;
@@ -361,12 +352,19 @@
 
 	poperror();	/* c */
 
-	if(bogus.flags&MCACHE)
+	if(flags&MCACHE)
 		c->flag |= CCACHE;
 	return c;
 }
 
-Chan*
+static Chan*
+noattach(char *)
+{
+	error(Enoattach);
+	return nil;
+}
+
+static Chan*
 mntchan(void)
 {
 	Chan *c;
@@ -376,13 +374,13 @@
 	c->dev = mntalloc.id++;
 	unlock(&mntalloc);
 
-	if(c->mchan)
+	if(c->mchan != nil)
 		panic("mntchan non-zero %p", c->mchan);
 	return c;
 }
 
 static Walkqid*
-mntwalk(Chan *c, Chan *nc, char **name, s32 nname)
+mntwalk(Chan *c, Chan *nc, char **name, int nname)
 {
 	int i, alloc;
 	Mnt *m;
@@ -404,7 +402,7 @@
 
 	alloc = 0;
 	m = mntchk(c);
-	r = mntralloc(c, m->msize);
+	r = mntralloc(c);
 	if(nc == nil){
 		nc = devclone(c);
 		/*
@@ -412,6 +410,7 @@
 		 * Therefore set type to 0 for now; rootclose is known to be safe.
 		 */
 		nc->type = 0;
+		nc->flag |= (c->flag & CCACHE);
 		alloc = 1;
 	}
 	wq->clone = nc;
@@ -471,7 +470,7 @@
 	if(n < BIT16SZ)
 		error(Eshortstat);
 	m = mntchk(c);
-	r = mntralloc(c, m->msize);
+	r = mntralloc(c);
 	if(waserror()) {
 		mntfree(r);
 		nexterror();
@@ -481,9 +480,8 @@
 	mountrpc(m, r);
 
 	if(r->reply.nstat > n){
-		/* doesn't fit; just patch the count and return */
-		PBIT16((uchar*)dp, r->reply.nstat);
 		n = BIT16SZ;
+		PBIT16((uchar*)dp, r->reply.nstat-2);
 	}else{
 		n = r->reply.nstat;
 		memmove(dp, r->reply.stat, n);
@@ -496,13 +494,13 @@
 }
 
 static Chan*
-mntopencreate(int type, Chan *c, char *name, s32 omode, u32 perm)
+mntopencreate(int type, Chan *c, char *name, int omode, ulong perm)
 {
 	Mnt *m;
 	Mntrpc *r;
 
 	m = mntchk(c);
-	r = mntralloc(c, m->msize);
+	r = mntralloc(c);
 	if(waserror()) {
 		mntfree(r);
 		nexterror();
@@ -526,8 +524,11 @@
 	poperror();
 	mntfree(r);
 
-	if(c->flag & CCACHE)
-		copen(c);
+	if(c->flag & CCACHE){
+		if(copen(c))
+		if(type == Tcreate || (omode&OTRUNC) != 0)
+			ctrunc(c);
+	}
 
 	return c;
 }
@@ -550,13 +551,13 @@
 	Mnt *m;
 	Mntrpc *r;
 
+	cclunk(c);
 	m = mntchk(c);
-	r = mntralloc(c, m->msize);
-	if(waserror()){
+	r = mntralloc(c);
+	if(waserror()) {
 		mntfree(r);
 		nexterror();
 	}
-
 	r->request.type = t;
 	r->request.fid = c->fid;
 	mountrpc(m, r);
@@ -567,27 +568,22 @@
 void
 muxclose(Mnt *m)
 {
-	Mntrpc *q, *r;
+	Mnt *f, **l;
+	Mntrpc *r;
 
-	for(q = m->queue; q; q = r) {
-		r = q->list;
-		mntfree(q);
+	while((r = m->queue) != nil){
+		m->queue = r->list;
+		mntfree(r);
 	}
 	m->id = 0;
 	free(m->version);
 	m->version = nil;
-	mntpntfree(m);
-}
+	qfree(m->q);
+	m->q = nil;
 
-void
-mntpntfree(Mnt *m)
-{
-	Mnt *f, **l;
-	Queue *q;
-
 	lock(&mntalloc);
 	l = &mntalloc.list;
-	for(f = *l; f; f = f->list) {
+	for(f = *l; f != nil; f = f->list) {
 		if(f == m) {
 			*l = m->list;
 			break;
@@ -596,10 +592,7 @@
 	}
 	m->list = mntalloc.mntfree;
 	mntalloc.mntfree = m;
-	q = m->q;
 	unlock(&mntalloc);
-
-	qfree(q);
 }
 
 static void
@@ -614,14 +607,14 @@
 	mntclunk(c, Tremove);
 }
 
-static s32
-mntwstat(Chan *c, uchar *dp, s32 n)
+static int
+mntwstat(Chan *c, uchar *dp, int n)
 {
 	Mnt *m;
 	Mntrpc *r;
 
 	m = mntchk(c);
-	r = mntralloc(c, m->msize);
+	r = mntralloc(c);
 	if(waserror()) {
 		mntfree(r);
 		nexterror();
@@ -633,6 +626,11 @@
 	mountrpc(m, r);
 	poperror();
 	mntfree(r);
+
+	if(c->flag & CCACHE)
+	if(GBIT64(&dp[STATFIXLEN-4*BIT16SZ-BIT64SZ]) != ~0ULL)
+		ctrunc(c);
+
 	return n;
 }
 
@@ -640,32 +638,11 @@
 mntread(Chan *c, void *buf, s32 n, s64 off)
 {
 	uchar *p, *e;
-	int nc, cache, isdir, dirlen;
+	int dirlen;
 
-	isdir = 0;
-	cache = c->flag & CCACHE;
-	if(c->qid.type & QTDIR) {
-		cache = 0;
-		isdir = 1;
-	}
-
 	p = buf;
-	if(cache) {
-		nc = cread(c, buf, n, off);
-		if(nc > 0) {
-			n -= nc;
-			if(n == 0)
-				return nc;
-			p += nc;
-			off += nc;
-		}
-		n = mntrdwr(Tread, c, p, n, off);
-		cupdate(c, p, n, off);
-		return n + nc;
-	}
-
-	n = mntrdwr(Tread, c, buf, n, off);
-	if(isdir) {
+	n = mntrdwr(Tread, c, p, n, off);
+	if(c->qid.type & QTDIR) {
 		for(e = &p[n]; p+BIT16SZ < e; p += dirlen){
 			dirlen = BIT16SZ+GBIT16(p);
 			if(p+dirlen > e)
@@ -685,23 +662,69 @@
 	return mntrdwr(Twrite, c, buf, n, off);
 }
 
-s32
-mntrdwr(int type, Chan *c, void *buf, s32 n, s64 off)
+static void
+mntcache(Mntrpc *r)
 {
+	ulong n, m;
+	vlong off;
+	Block *b;
+	Chan *c;
+
+	c = r->c;
+	if(!cachedchan(c))
+		return;
+	off = r->request.offset;
+	switch(r->reply.type){
+	case Rread:
+		m = r->reply.count;
+		if(m > r->request.count)
+			m = r->request.count;
+		for(b = r->b; m > 0 && b != nil; m -= n, b = b->next) {
+			n = BLEN(b);
+			if(m < n)
+				n = m;
+			cupdate(c, b->rp, n, off);
+			off += n;
+		}
+		break;
+	case Rwrite:
+		b = r->w;
+		if(convM2S(b->rp, BLEN(b), &r->request) == 0)
+			panic("convM2S");
+		m = r->reply.count;
+		if(m > r->request.count)
+			m = r->request.count;
+		cwrite(c, (uchar*)r->request.data, m, off);
+		break;
+	}
+}
+
+static long
+mntrdwr(int type, Chan *c, void *buf, long n, vlong off)
+{
 	Mnt *m;
  	Mntrpc *r;
 	char *uba;
-	int cache;
 	ulong cnt, nr, nreq;
 
 	m = mntchk(c);
 	uba = buf;
 	cnt = 0;
-	cache = c->flag & CCACHE;
-	if(c->qid.type & QTDIR)
-		cache = 0;
+
 	for(;;) {
-		r = mntralloc(c, m->msize);
+		nreq = n;
+		if(nreq > c->iounit)
+			nreq = c->iounit;
+
+		if(type == Tread && cachedchan(c)) {
+			nr = cread(c, (uchar*)uba, nreq, off);
+			if(nr > 0) {
+				nreq = nr;
+				goto Next;
+			}
+		}
+
+		r = mntralloc(c);
 		if(waserror()) {
 			mntfree(r);
 			nexterror();
@@ -710,34 +733,252 @@
 		r->request.fid = c->fid;
 		r->request.offset = off;
 		r->request.data = uba;
-		nr = n;
-		if(nr > m->msize-IOHDRSZ)
-			nr = m->msize-IOHDRSZ;
-		r->request.count = nr;
+		r->request.count = nreq;
 		mountrpc(m, r);
-		nreq = r->request.count;
+		mntcache(r);
 		nr = r->reply.count;
 		if(nr > nreq)
 			nr = nreq;
-
 		if(type == Tread)
 			nr = readblist(r->b, (uchar*)uba, nr, 0);
-		else if(cache)
-			cwrite(c, (uchar*)uba, nr, off);
-
-		poperror();
 		mntfree(r);
+		poperror();
+
+	Next:
 		off += nr;
 		uba += nr;
 		cnt += nr;
 		n -= nr;
-		if(nr != nreq || n == 0 || up->killed)
+		if(nr != nreq || n == 0 || up->nnote)
 			break;
 	}
 	return cnt;
 }
 
+static int
+mntprocwork(void *a)
+{
+	Mntproc *p = a;
+	return p->f != nil;
+}
+
+static void
+mntproc(void *a)
+{
+	Mntproc *p = a;
+	Chan *c;
+	Mnt *m;
+
+	while(waserror())
+		;
+
+	m = p->m;
+	for(;;){
+		tsleep(p, mntprocwork, p, 500);
+
+		lock(m);
+		if(p->f == nil){
+			p->m = nil;
+			unlock(m);
+			pexit("no work", 1);
+		}
+		c = p->r->c;
+		unlock(m);
+
+		(*p->f)(p->r, p->a);
+
+		lock(m);
+		p->r = nil;
+		p->a = nil;
+		p->f = nil;
+		unlock(m);
+
+		cclose(c);
+	}
+}
+
+static int
+mntdefer(void (*f)(Mntrpc*, void*), Mntrpc *r, void *a)
+{
+	Mntproc *p;
+	Mnt *m;
+	int i;
+
+	m = mntchk(r->c);
+	lock(m);
+	for(i = 0; i < nelem(m->defered); i++){
+		p = &m->defered[i];
+		if(p->f != nil)
+			continue;
+
+		incref(r->c);
+		r->m = m;
+		p->r = r;
+		p->a = a;
+		p->f = f;
+
+		if(p->m == nil){
+			p->m = m;
+			unlock(m);
+			kproc("mntproc", mntproc, p, 0);
+		} else {
+			unlock(m);
+			wakeup(p);
+		}
+		return 1;
+	}
+	unlock(m);
+	return 0;
+}
+
+static void
+rahproc(Mntrpc *r, void *a)
+{
+	Mntrah *rah = a;
+
+	if(!waserror()){
+		mountrpc(r->m, r);
+		poperror();
+	}
+	r->done = 2;
+	wakeup(rah);
+}
+
+static int
+rahdone(void *v)
+{
+	Mntrpc *r = v;
+	return r->done == 2;
+}
+
+static Mntrpc*
+rahfindrpc(Mntrah *rah, vlong off)
+{
+	Mntrpc *r;
+	int i, n;
+	vlong o;
+
+	for(i=0; i<nelem(rah->r); i++){
+		if((r = rah->r[i]) == nil)
+			continue;
+		n = r->request.count;
+		o = r->request.offset;
+		if(off >= o && off < o+n)
+			return r;
+	}
+	return nil;
+}
+
 void
+mntrahinit(Mntrah *rah)
+{
+	Mntrpc *r;
+	int i;
+
+	while(waserror())
+		;
+
+	for(i=0; i<nelem(rah->r); i++){
+		if((r = rah->r[i]) != nil){
+			while(!rahdone(r))
+				sleep(rah, rahdone, r);
+			rah->r[i] = nil;
+			mntfree(r);
+		}
+	}
+	rah->i = 0;
+
+	rah->off = 0;
+	rah->seq = 0;
+
+	poperror();
+}
+
+long
+mntrahread(Mntrah *rah, Chan *c, uchar *buf, long len, vlong off)
+{
+	Mntrpc *r, **rr;
+	vlong o, w, e;
+	long n, tot;
+
+	if(len <= 0)
+		return 0;
+	if(off != rah->off){
+		rah->off = off;
+		if(rahfindrpc(rah, off) == nil)
+			rah->seq = 0;
+	}
+	rah->off += len;
+	rah->seq += len;
+	if(rah->seq >= 2*c->iounit){
+		w = (off / c->iounit) * c->iounit;
+		e = w + rah->seq;
+		for(o = w; o < e; o += c->iounit){
+			if(rahfindrpc(rah, o) != nil)
+				continue;
+
+			rr = &rah->r[rah->i % nelem(rah->r)];
+			if((r = *rr) != nil){
+				if(!rahdone(r) || (r->request.offset >= w && r->request.offset < e))
+					break;
+				*rr = nil;
+				mntfree(r);
+			}
+
+			r = mntralloc(c);
+			r->request.type = Tread;
+			r->request.fid = c->fid;
+			r->request.offset = o;
+			r->request.count = c->iounit;
+			if(!mntdefer(rahproc, r, rah)){
+				mntfree(r);
+				break;
+			}
+			*rr = r;
+			rah->i++;
+		}
+	}
+
+	tot = 0;
+	while(len > 0 && (r = rahfindrpc(rah, off)) != nil){
+		while(!rahdone(r))
+			sleep(rah, rahdone, r);
+
+		switch(r->reply.type){
+		default:
+			error(Emountrpc);
+		case Rflush:
+			error(Eintr);
+		case Rerror:
+			error(r->reply.ename);
+		case Rread:
+			break;
+		}
+		mntcache(r);
+		n = r->request.count;
+		o = r->request.offset;
+		if(r->reply.count < n)
+			n = r->reply.count;
+		n -= (off - o);
+		if(n <= 0)
+			break;
+		if(len < n)
+			n = len;
+		n = readblist(r->b, buf, n, off - o);
+		buf += n;
+		off += n;
+		tot += n;
+		len -= n;
+	}
+	if(tot > 0){
+		rah->off -= len;
+		rah->seq -= len;
+	}
+
+	return tot;
+}
+
+static void
 mountrpc(Mnt *m, Mntrpc *r)
 {
 	int t;
@@ -764,22 +1005,32 @@
 	}
 }
 
-void
+static void
 mountio(Mnt *m, Mntrpc *r)
 {
+	Block *b;
 	int n;
 
 	while(waserror()) {
 		if(m->rip == up)
 			mntgate(m);
-		if(strcmp(up->env->errstr, Eintr) != 0){
-			mntflushfree(m, r);
+		if(strcmp(up->env->errstr, Eintr) != 0 || waserror()){
+			r = mntflushfree(m, r);
+			switch(r->request.type){
+			case Tremove:
+			case Tclunk:
+				/* botch, abandon fid */ 
+				if(strcmp(up->env->errstr, Ehungup) != 0)
+					r->c->fid = 0;
+			}
 			nexterror();
 		}
-		r = mntflushalloc(r, m->msize);
+		r = mntflushalloc(r);
+		poperror();
 	}
 
 	lock(m);
+	r->z = &up->sleep;
 	r->m = m;
 	r->list = m->queue;
 	m->queue = r;
@@ -786,24 +1037,32 @@
 	unlock(m);
 
 	/* Transmit a file system rpc */
-	if(m->msize == 0)
-		panic("msize");
-	n = convS2M(&r->request, r->rpc, m->msize);
-	if(n < 0)
-		panic("bad message type in mountio");
-	if(devtab[m->c->type]->write(m->c, r->rpc, n, 0) != n)
+	n = sizeS2M(&r->request);
+	b = allocb(n);
+	if(waserror()){
+		freeb(b);
+		nexterror();
+	}
+	n = convS2M(&r->request, b->wp, n);
+	if(n <= 0 || n > m->msize) {
+		print("mountio: proc %s %lud: convS2M returned %d for tag %d fid %d T%d\n",
+			up->text, up->pid, n, r->request.tag, r->request.fid, r->request.type);
 		error(Emountrpc);
-/*	r->stime = fastticks(nil); */
-	r->reqlen = n;
+	}
+	b->wp += n;
+	if(r->request.type == Twrite && cachedchan(r->c))
+		r->w = copyblock(b, n);
+	poperror();
+	devtab[m->c->type]->bwrite(m->c, b, 0);
 
 	/* Gate readers onto the mount point one at a time */
 	for(;;) {
 		lock(m);
-		if(m->rip == 0)
+		if(m->rip == nil)
 			break;
 		unlock(m);
-		sleep(&r->r, rpcattn, r);
-		if(r->done){
+		sleep(r->z, rpcattn, r);
+		if(r->done) {
 			poperror();
 			mntflushfree(m, r);
 			return;
@@ -828,18 +1087,13 @@
 
 	while(qlen(m->q) < len){
 		b = devtab[m->c->type]->bread(m->c, m->msize, 0);
-		if(b == nil)
+		if(b == nil || qaddlist(m->q, b) == 0)
 			return -1;
-		if(blocklen(b) == 0){
-			freeblist(b);
-			return -1;
-		}
-		qaddlist(m->q, b);
 	}
 	return 0;
 }
 
-int
+static int
 mntrpcread(Mnt *m, Mntrpc *r)
 {
 	int i, t, len, hlen;
@@ -911,55 +1165,57 @@
 	return 0;
 }
 
-void
+static void
 mntgate(Mnt *m)
 {
 	Mntrpc *q;
 
 	lock(m);
-	m->rip = 0;
-	for(q = m->queue; q; q = q->list) {
+	m->rip = nil;
+	for(q = m->queue; q != nil; q = q->list) {
 		if(q->done == 0)
-		if(wakeup(&q->r))
+		if(wakeup(q->z))
 			break;
 	}
 	unlock(m);
 }
 
-void
+static void
 mountmux(Mnt *m, Mntrpc *r)
 {
 	Mntrpc **l, *q;
+	Rendez *z;
 
 	lock(m);
 	l = &m->queue;
-	for(q = *l; q; q = q->list) {
+	for(q = *l; q != nil; q = q->list) {
 		/* look for a reply to a message */
 		if(q->request.tag == r->reply.tag) {
 			*l = q->list;
-			if(q != r) {
-				/*
-				 * Completed someone else.
-				 * Trade pointers to receive buffer.
-				 */
-				q->reply = r->reply;
-				q->b = r->b;
-				r->b = nil;
+			if(q == r) {
+				q->done = 1;
+				unlock(m);
+				return;
 			}
+			/*
+			 * Completed someone else.
+			 * Trade pointers to receive buffer.
+			 */
+			q->reply = r->reply;
+			q->b = r->b;
+			r->b = nil;
+			z = q->z;
+			coherence();
 			q->done = 1;
+			wakeup(z);
 			unlock(m);
-			if(mntstats != nil)
-				(*mntstats)(q->request.type,
-					m->c, q->stime,
-					q->reqlen + r->replen);
-			if(q != r)
-				wakeup(&q->r);
 			return;
 		}
 		l = &q->list;
 	}
 	unlock(m);
-	print("unexpected reply tag %ud; type %d\n", r->reply.tag, r->reply.type);
+	print("mnt: unexpected reply from %s tag %ud; type %d\n",
+		chanpath(m->c), r->reply.tag, r->reply.type);
 }
 
 /*
@@ -966,13 +1222,12 @@
  * Create a new flush request and chain the previous
  * requests from it
  */
-Mntrpc*
-mntflushalloc(Mntrpc *r, ulong iounit)
+static Mntrpc*
+mntflushalloc(Mntrpc *r)
 {
 	Mntrpc *fr;
 
-	fr = mntralloc(0, iounit);
-
+	fr = mntralloc(r->c);
 	fr->request.type = Tflush;
 	if(r->request.type == Tflush)
 		fr->request.oldtag = r->request.oldtag;
@@ -988,23 +1243,25 @@
  *  flush and the original message from the unanswered
  *  request queue.  Mark the original message as done
  *  and if it hasn't been answered set the reply to to
- *  Rflush.
+ *  Rflush. Return the original rpc.
  */
-void
+static Mntrpc*
 mntflushfree(Mnt *m, Mntrpc *r)
 {
 	Mntrpc *fr;
 
-	while(r){
+	while(r != nil){
 		fr = r->flushed;
 		if(!r->done){
 			r->reply.type = Rflush;
 			mntqrm(m, r);
 		}
-		if(fr)
-			mntfree(r);
+		if(fr == nil)
+			break;
+		mntfree(r);
 		r = fr;
 	}
+	return r;
 }
 
 static int
@@ -1011,19 +1268,18 @@
 alloctag(void)
 {
 	int i, j;
-	ulong v;
+	u32int v;
 
 	for(i = 0; i < NMASK; i++){
 		v = mntalloc.tagmask[i];
-		if(v == ~0UL)
+		if(v == -1)
 			continue;
-		for(j = 0; j < 1<<TAGSHIFT; j++)
-			if((v & (1<<j)) == 0){
-				mntalloc.tagmask[i] |= 1<<j;
-				return (i<<TAGSHIFT) + j;
-			}
+		for(j = 0; (v & 1) != 0; j++)
+			v >>= 1;
+		mntalloc.tagmask[i] |= 1<<j;
+		return i<<TAGSHIFT | j;
 	}
-	/* panic("no devmnt tags left"); */
+	panic("no friggin tags left");
 	return NOTAG;
 }
 
@@ -1033,51 +1289,27 @@
 	mntalloc.tagmask[t>>TAGSHIFT] &= ~(1<<(t&TAGMASK));
 }
 
-Mntrpc*
-mntralloc(Chan *c, ulong msize)
+static Mntrpc*
+mntralloc(Chan *c)
 {
 	Mntrpc *new;
 
-	lock(&mntalloc);
-	new = mntalloc.rpcfree;
-	if(new == nil){
+	if(mntalloc.nrpcfree == 0) {
+	Alloc:
 		new = malloc(sizeof(Mntrpc));
-		if(new == nil) {
-			unlock(&mntalloc);
+		if(new == nil)
 			exhausted("mount rpc header");
-		}
-		/*
-		 * The header is split from the data buffer as
-		 * mountmux may swap the buffer with another header.
-		 */
-		new->rpc = mallocz(msize, 0);
-		if(new->rpc == nil){
-			free(new);
-			unlock(&mntalloc);
-			exhausted("mount rpc buffer");
-		}
-		new->rpclen = msize;
+		lock(&mntalloc);
 		new->request.tag = alloctag();
-		if(new->request.tag == NOTAG){
-			free(new);
+	} else {
+		lock(&mntalloc);
+		new = mntalloc.rpcfree;
+		if(new == nil) {
 			unlock(&mntalloc);
-			exhausted("rpc tags");
+			goto Alloc;
 		}
-	}
-	else {
 		mntalloc.rpcfree = new->list;
 		mntalloc.nrpcfree--;
-		if(new->rpclen < msize){
-			free(new->rpc);
-			new->rpc = mallocz(msize, 0);
-			if(new->rpc == nil){
-				free(new);
-				mntalloc.nrpcused--;
-				unlock(&mntalloc);
-				exhausted("mount rpc buffer");
-			}
-			new->rpclen = msize;
-		}
 	}
 	mntalloc.nrpcused++;
 	unlock(&mntalloc);
@@ -1085,30 +1317,30 @@
 	new->done = 0;
 	new->flushed = nil;
 	new->b = nil;
+	new->w = nil;
 	return new;
 }
 
-void
+static void
 mntfree(Mntrpc *r)
 {
-	if(r->b != nil)
-		freeblist(r->b);
+	freeb(r->w);
+	freeblist(r->b);
 	lock(&mntalloc);
-	if(mntalloc.nrpcfree >= 10){
-		free(r->rpc);
-		freetag(r->request.tag);
-		free(r);
-	}
-	else{
+	mntalloc.nrpcused--;
+	if(mntalloc.nrpcfree < 32) {
 		r->list = mntalloc.rpcfree;
 		mntalloc.rpcfree = r;
 		mntalloc.nrpcfree++;
+		unlock(&mntalloc);
+		return;
 	}
-	mntalloc.nrpcused--;
+	freetag(r->request.tag);
 	unlock(&mntalloc);
+	free(r);
 }
 
-void
+static void
 mntqrm(Mnt *m, Mntrpc *r)
 {
 	Mntrpc **l, *f;
@@ -1117,7 +1349,7 @@
 	r->done = 1;
 
 	l = &m->queue;
-	for(f = *l; f; f = f->list) {
+	for(f = *l; f != nil; f = f->list) {
 		if(f == r) {
 			*l = r->list;
 			break;
@@ -1127,23 +1359,21 @@
 	unlock(m);
 }
 
-Mnt*
+static Mnt*
 mntchk(Chan *c)
 {
 	Mnt *m;
 
 	/* This routine is mostly vestiges of prior lives; now it's just sanity checking */
-
 	if(c->mchan == nil)
-		panic("mntchk 1: nil mchan c %s\n", chanpath(c));
+		panic("mntchk 1: nil mchan c %s", chanpath(c));
 
 	m = c->mchan->mux;
-
 	if(m == nil)
 		print("mntchk 2: nil mux c %s c->mchan %s \n", chanpath(c), chanpath(c->mchan));
 
 	/*
-	 * Was it closed and reused (was error(Eshutdown); now, it can't happen)
+	 * Was it closed and reused (was error(Eshutdown); now, it cannot happen)
 	 */
 	if(m->id == 0 || m->id >= c->dev)
 		panic("mntchk 3: can't happen");
@@ -1156,7 +1386,7 @@
  * reflect local values.  These entries are known to be
  * the first two in the Dir encoding after the count.
  */
-void
+static void
 mntdirfix(uchar *dirbuf, Chan *c)
 {
 	uint r;
@@ -1168,13 +1398,13 @@
 	PBIT32(dirbuf, c->dev);
 }
 
-int
+static int
 rpcattn(void *v)
 {
 	Mntrpc *r;
 
 	r = v;
-	return r->done || r->m->rip == 0;
+	return r->done || r->m->rip == nil;
 }
 
 Dev mntdevtab = {
@@ -1184,7 +1414,7 @@
 	mntreset,
 	devinit,
 	devshutdown,
-	mntattach,
+	noattach,
 	mntwalk,
 	mntstat,
 	mntopen,
--- a/os/port/devprog.c
+++ b/os/port/devprog.c
@@ -487,7 +487,7 @@
 		&"r w rw"[(c->mode&3)<<1],
 		devtab[c->type]->dc, c->dev,
 		c->qid.path, w, c->qid.vers, c->qid.type,
-		c->iounit, c->offset, c->path->s);
+		c->iounit, c->offset, chanpath(c));
 	return n;
 }
 
@@ -499,7 +499,7 @@
 	int n, i, w, ww;
 
 	f = o->fgrp;	/* f is not locked because we've acquired */
-	n = readstr(0, va, count, o->pgrp->dot->path->s);
+	n = readstr(0, va, count, chanpath(o->pgrp->dot));
 	n += snprint(va+n, count-n, "\n");
 	offset = progoffset(offset, va, &n);
 	/* compute width of qid.path */
@@ -887,19 +887,19 @@
 		mntscan(mw, o->pgrp);
 		if(mw->mh == 0) {
 			mw->cddone = 1;
-			i = snprint(a, n, "cd %s\n", o->pgrp->dot->path->s);
+			i = snprint(a, n, "cd %s\n", chanpath(o->pgrp->dot));
 			poperror();
 			release();
 			return i;
 		}
 		int2flag(mw->cm->mflag, flag);
-		if(strcmp(mw->cm->to->path->s, "#M") == 0){
+		if(strcmp(chanpath(mw->cm->to), "#M") == 0){
 			i = snprint(a, n, "mount %s %s %s %s\n", flag,
-				mw->cm->to->mchan->path->s,
-				mw->mh->from->path->s, mw->cm->spec? mw->cm->spec : "");
+				chanpath(mw->cm->to->mchan),
+				chanpath(mw->mh->from), mw->cm->spec? mw->cm->spec : "");
 		}else
 			i = snprint(a, n, "bind %s %s %s\n", flag,
-				mw->cm->to->path->s, mw->mh->from->path->s);
+				chanpath(mw->cm->to), chanpath(mw->mh->from));
 		poperror();
 		release();
 		return i;
--- a/os/port/error.h
+++ b/os/port/error.h
@@ -1,6 +1,7 @@
 extern char Enoerror[];		/* no error */
 extern char Emount[];		/* inconsistent mount */
 extern char Eunmount[];		/* not mounted */
+extern char Eismtpt[];		/* is a mount point */
 extern char Eunion[];		/* not in union */
 extern char Emountrpc[];	/* mount rpc error */
 extern char Eshutdown[];	/* mounted device shut down */
--- a/os/port/inferno.c
+++ b/os/port/inferno.c
@@ -265,7 +265,7 @@
 	n = f->n;
 	if(f->buf == (Array*)H || n < 0) {
 		*f->ret = 0;
-		return;		
+		return;
 	}
 	if(n > f->buf->len)
 		n = f->buf->len;
--- a/os/port/pgrp.c
+++ b/os/port/pgrp.c
@@ -12,10 +12,60 @@
 	Whinesecs = 10,		/* frequency of out-of-resources printing */
 };
 
-/* TODO code here is different from 9front. Need to understand why. */
-
 static Ref mountid;
 
+void
+dumpmount(char *s, Mount *m)
+{
+	if(m == nil)
+		return;
+
+	print("%smountid %d spec %s",
+			s, m->mountid, m->spec);
+	dumpchan("to", m->to);
+}
+
+void
+dumpmhead(char *s, Mhead *mh)
+{
+	Mount *m;
+
+	if(mh == nil)
+		return;
+
+	dumpchan("		from ", mh->from);
+	print("			to\n");
+	m = mh->mount;
+	dumpmount("			", m);
+	print("				next\n");
+	for(m = m->next; m != nil; m = m->next) {
+		dumpmount("			", m);
+	}
+}
+
+void
+dumppgrp(char *s, Pgrp *p)
+{
+	int i;
+	Mhead *mh;
+
+	if(p == nil)
+		return;
+
+	rlock(&p->ns);
+	print("%s%p:%3ud slash %s dot %s\n		mnthash\n",
+			s, p, p->pgrpid, chanpath(p->slash), chanpath(p->dot));
+	for(i = 0; i<MNTHASH; i++){
+		print("		i %d\n", i);
+		for(mh = p->mnthash[i]; mh != nil; mh = mh->hash){
+			rlock(&mh->lock);
+			dumpmhead("		", mh);
+			runlock(&mh->lock);
+		}
+	}
+	runlock(&p->ns);
+}
+
 Pgrp*
 newpgrp(void)
 {
@@ -48,43 +98,36 @@
 void
 closepgrp(Pgrp *p)
 {
-	Mhead **h, **e, *f, *next;
+	Mhead **h, **e, *f;
+	Mount *m;
 	
-	if(p == nil || decref(p) != 0)
+	if(p == nil || decref(p))
 		return;
 
-	wlock(&p->ns);
-	p->pgrpid = -1;
-
 	e = &p->mnthash[MNTHASH];
 	for(h = p->mnthash; h < e; h++) {
-		for(f = *h; f; f = next) {
+		while((f = *h) != nil){
+			*h = f->hash;
 			wlock(&f->lock);
-			cclose(f->from);
-			mountfree(f->mount);
+			m = f->mount;
 			f->mount = nil;
-			next = f->hash;
 			wunlock(&f->lock);
+			mountfree(m);
 			putmhead(f);
 		}
 	}
-	wunlock(&p->ns);
 	cclose(p->dot);
 	cclose(p->slash);
 	free(p);
 }
 
-void
+static void
 pgrpinsert(Mount **order, Mount *m)
 {
 	Mount *f;
 
-	m->order = 0;
-	if(*order == 0) {
-		*order = m;
-		return;
-	}
-	for(f = *order; f; f = f->order) {
+	m->order = nil;
+	for(f = *order; f != nil; f = f->order) {
 		if(m->mountid < f->mountid) {
 			m->order = f;
 			*order = m;
@@ -97,6 +140,8 @@
 
 /*
  * pgrpcpy MUST preserve the mountid allocation order of the parent group
+ * Hence, uses Mount.order to build a sorted linked list of mounts while
+ * copying the mounts.
  */
 void
 pgrpcpy(Pgrp *to, Pgrp *from)
@@ -105,6 +150,8 @@
 	Mount *n, *m, **link, *order;
 	Mhead *f, **l, *mh;
 
+/*	print("pgrpcpy to->pgrpid %d from->pgrpid %d\n", to->pgrpid, from->pgrpid);
+	dumppgrp("	from	\n	", from); */
 	wlock(&to->ns);
 	rlock(&from->ns);
 	order = nil;
@@ -144,6 +191,7 @@
 
 	runlock(&from->ns);
 	wunlock(&to->ns);
+/*	dumppgrp("	to	\n	", to); */
 }
 
 /* not used by 9front. why? */
@@ -224,7 +272,7 @@
 	int i;
 	Chan *c;
 
-	if(f == nil || decref(f))
+	if(f == nil || decref(f) != 0)
 		return;
 
 	/*
--- a/os/port/portdat.h
+++ b/os/port/portdat.h
@@ -20,7 +20,9 @@
 typedef struct Mntcache Mntcache;
 typedef struct Mntparam Mntparam;
 typedef struct Mount	Mount;
+typedef struct Mntrah	Mntrah;
 typedef struct Mntrpc	Mntrpc;
+typedef struct Mntproc	Mntproc;
 typedef struct Mntwalk	Mntwalk;
 typedef struct Mnt	Mnt;
 typedef struct Mhead	Mhead;
@@ -62,6 +64,12 @@
 #include "fcall.h"
 #include <pool.h>
 
+/*
+ * sticking with inferno's definition of Ref
+ * as it keeps the incref() and decref() simple
+ * and also puts the proc on the fast path by the
+ * scheduler's priorities (PriLock)
+ */
 struct Ref
 {
 	Lock	l;
@@ -116,10 +124,10 @@
 
 struct QLock
 {
-	Lock	use;			/* to access Qlock structure */
-	Proc	*head;			/* next process waiting for object */
-	Proc	*tail;			/* last process waiting for object */
-	s32	locked;			/* flag */
+	Lock	use;	/* to access Qlock structure */
+	Proc	*head;	/* next process waiting for object */
+	Proc	*tail;	/* last process waiting for object */
+	s32	locked;		/* flag */
 };
 
 struct RWlock
@@ -203,8 +211,8 @@
 {
 	Ref;
 	Lock;
-	Chan*	next;			/* allocation */
-	Chan*	link;
+	Chan	*next;		/* allocation */
+	Chan	*link;
 	s64	offset;			/* in fd */
 	s64	devoffset;		/* in underlying device; see read */
 	u16	type;
@@ -214,25 +222,25 @@
 	Qid	qid;
 	s32	fid;			/* for devmnt */
 	u32	iounit;			/* chunk size for i/o; 0==default */
-	Mhead*	umh;			/* mount point that derived Chan; used in unionread */
-	Chan*	umc;			/* channel in union; held for union read */
-	QLock	umqlock;		/* serialize unionreads */
+	Mhead	*umh;		/* mount point that derived Chan; used in unionread */
+	Chan	*umc;		/* channel in union; held for union read */
+	QLock	umqlock;	/* serialize unionreads */
 	s32	uri;			/* union read index */
 	s32	dri;			/* devdirread index */
-	uchar*	dirrock;		/* directory entry rock for translations */
+	uchar	*dirrock;	/* directory entry rock for translations */
 	int	nrock;
 	int	mrock;
 	QLock	rockqlock;
 	int	ismtpt;
-	Mntcache*mcp;			/* Mount cache pointer */
-	Mnt*	mux;			/* Mnt for clients using me for messages */
+	Mntcache	*mcp;	/* Mount cache pointer */
+	Mnt	*mux;			/* Mnt for clients using me for messages */
 	union {
-		void*	aux;
+		void	*aux;
 		u32	mid;		/* for ns in devproc */
 	};
-	Chan*	mchan;			/* channel to mounted server */
+	Chan	*mchan;		/* channel to mounted server */
 	Qid	mqid;			/* qid of root of mount point */
-	Path*	path;
+	Path	*path;
 };
 
 struct Path
@@ -239,7 +247,7 @@
 {
 	Ref;
 	char	*s;
-	Chan	**mtpt;			/* mtpt history */
+	Chan	**mtpt;		/* mtpt history */
 	int	len;			/* strlen(s) */
 	int	alen;			/* allocated length of s */
 	int	mlen;			/* number of path elements */
@@ -249,7 +257,7 @@
 struct Dev
 {
 	s32	dc;
-	char*	name;
+	char	*name;
 
 	void	(*reset)(void);
 	void	(*init)(void);
@@ -296,16 +304,23 @@
 {
 	s32		cddone;
 	u32	id;
-	Mhead*	mh;
-	Mount*	cm;
+	Mhead	*mh;
+	Mount	*cm;
 };
 
+/*
+ * *order is used to build a temporary mountid sorted linked
+ * list by pgrpcpy() to preserve the mountid allocation order
+ * of the source pgrp.
+ * Alternative would be to build an array of copied mounts and
+ * qsort() it at the end before allocating mountid's.
+ */
 struct Mount
 {
 	u32	mountid;
-	Mount*	next;
-	Mount*	order;
-	Chan*	to;			/* channel replacing channel */
+	Mount	*next;
+	Mount	*order;
+	Chan	*to;			/* channel replacing channel */
 	s32	mflag;
 	char	*spec;
 };
@@ -314,11 +329,34 @@
 {
 	Ref;
 	RWlock	lock;
-	Chan*	from;			/* channel mounted upon */
-	Mount*	mount;			/* what's mounted upon it */
-	Mhead*	hash;			/* Hash chain */
+	Chan	*from;			/* channel mounted upon */
+	Mount	*mount;			/* what's mounted upon it */
+	Mhead	*hash;			/* Hash chain */
 };
 
+struct Mntrah
+{
+	Rendez;
+
+	ulong	vers;
+
+	vlong	off;
+	vlong	seq;
+
+	uint	i;
+	Mntrpc	*r[8];
+};
+
+struct Mntproc
+{
+	Rendez;
+
+	Mnt	*m;
+	Mntrpc	*r;
+	void	*a;
+	void	(*f)(Mntrpc*, void*);
+};
+
 struct Mnt
 {
 	Lock;
@@ -326,6 +364,7 @@
 	Chan	*c;		/* Channel to file service */
 	Proc	*rip;		/* Reader in progress */
 	Mntrpc	*queue;		/* Queue of pending requests on this channel */
+	Mntproc	defered[8];	/* Worker processes for defered RPCs (read ahead) */
 	u32	id;		/* Multiplexer id for channel check */
 	Mnt	*list;		/* Free list */
 	s32	flags;		/* cache */
@@ -367,6 +406,10 @@
 	s32	flags;
 };
 
+/*
+ * All processes in a process group share the namespace.
+ * Hence, this can be called the namespace group too
+ */
 struct Pgrp
 {
 	Ref;				/* also used as a lock when mounting */
@@ -383,6 +426,11 @@
 	s32	pin;
 };
 
+/*
+ * Array of Chan* (Every file is a Chan* in the server).
+ * fd (file descriptor) is the file's index in that array.
+ * fdtochan(fd) => Chan*
+ */
 struct Fgrp
 {
 	Ref;
--- a/os/port/portfns.h
+++ b/os/port/portfns.h
@@ -78,11 +78,14 @@
 void		drawactive(int);
 void		drawcmap(void);
 void		dumpaproc(Proc*);
+void		dumpchan(char*, Chan*);
+void		dumppgrp(char *s, Pgrp *p);
 void		dumpstack(void);
 Fgrp*		dupfgrp(Fgrp*);
 void		egrpcpy(Egrp*, Egrp*);
 int		emptystr(char*);
 int		eqchan(Chan*, Chan*, int);
+int		eqchantdqid(Chan*, int, int, Qid, int);
 int		eqqid(Qid, Qid);
 void		eqlock(QLock*);
 void		error(char*);
@@ -95,7 +98,7 @@
 int		export(int, char*, int);
 uvlong		fastticks(uvlong*);
 uvlong		fastticks2ns(uvlong);
-void		fdclose(Fgrp*, int);
+void		fdclose(Fgrp*, int, int);
 Chan*		fdtochan(Fgrp*, int, int, int, int);
 int		findmount(Chan**, Mhead**, int, int, Qid);
 void		forceclosefgrp(void);
@@ -171,6 +174,7 @@
 uvlong		mk64fract(uvlong, uvlong);
 void		mkqid(Qid*, vlong, ulong, int);
 void		modinit(void);
+Chan*		mntattach(Chan*, Chan*, char*, int);
 Chan*		mntauth(Chan*, char*);
 int		mntversion(Chan*, char*, int, int);
 void		mountfree(Mount*);
@@ -181,6 +185,7 @@
 void		mul64fract(uvlong*, uvlong, uvlong);
 void		muxclose(Mnt*);
 Chan*		namec(char*, int, int, ulong);
+void		nameerror(char*, char*);
 Chan*		newchan(void);
 Egrp*		newegrp(void);
 Fgrp*		newfgrp(Fgrp*);
--- a/os/port/proc.c
+++ b/os/port/proc.c
@@ -896,6 +896,7 @@
 			continue;
 
 		dumpaproc(p);
+		dumppgrp("	", p->env->pgrp);
 	}
 }
 
--- a/os/port/sysfile.c
+++ b/os/port/sysfile.c
@@ -5,6 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 
+#define DBG if(1)print
+
 static void
 unlockfgrp(Fgrp *f)
 {
@@ -138,10 +140,10 @@
 {
 	Chan *c;
 
-	c = 0;
+	c = nil;
 
 	lock(f);
-	if(fd<0 || f->maxfd<fd || (c = f->fd[fd])==0) {
+	if(fd<0 || f->maxfd<fd || (c = f->fd[fd])==nil) {
 		unlock(f);
 		error(Ebadfd);
 	}
@@ -209,24 +211,21 @@
 }
 
 void
-fdclose(Fgrp *f, int fd)
+fdclose(Fgrp *f, int fd, int flag)
 {
-	int i;
 	Chan *c;
 
 	lock(f);
-	c = f->fd[fd];
-	if(c == 0){
-		/* can happen for users with shared fd tables */
+	c = fd <= f->maxfd ? f->fd[fd] : nil;
+	if(c == nil || (flag != 0 && ((f->flag[fd]|c->flag)&flag) == 0)){
 		unlock(f);
 		return;
 	}
-	f->fd[fd] = 0;
-	if(fd == f->maxfd)
-		for(i=fd; --i>=0 && f->fd[i]==0; )
-			f->maxfd = i;
-	if(fd < f->minfd)
-		f->minfd = fd;
+	f->fd[fd] = nil;
+	if(fd == f->maxfd){
+		while(fd > 0 && f->fd[fd] == nil)
+			f->maxfd = --fd;
+	}
 	unlock(f);
 	cclose(c);
 }
@@ -260,7 +259,7 @@
 	 * fdclose takes care of processes racing through here.
 	 */
 	fdtochan(f, fd, -1, 0, 0);
-	fdclose(f, fd);
+	fdclose(f, fd, 0);
 	poperror();
 	return 0;
 }
@@ -448,84 +447,390 @@
 }
 
 int
-kpipe(int fd[2])
+kpipe(int ufd[2])
 {
-	Dev *d;
-	Fgrp *f;
+	static char *datastr[] = {"data", "data1"};
+	int fd[2];
 	Chan *c[2];
-	static char *names[] = {"data", "data1"};
 
-	f = up->env->fgrp;
-
-	d = devtab[devno('|', 0)];
+	ufd[0] = ufd[1] = fd[0] = fd[1] = -1;
 	c[0] = namec("#|", Atodir, 0, 0);
-	c[1] = 0;
-	fd[0] = -1;
-	fd[1] = -1;
+	c[1] = nil;
 	if(waserror()) {
-		if(c[0] != 0)
+		if(c[0] != nil)
 			cclose(c[0]);
-		if(c[1] != 0)
+		if(c[1] != nil)
 			cclose(c[1]);
-		if(fd[0] >= 0)
-			f->fd[fd[0]]=0;
-		if(fd[1] >= 0)
-			f->fd[fd[1]]=0;
 		return -1;
 	}
 	c[1] = cclone(c[0]);
-	if(walk(&c[0], &names[0], 1, 1, nil) < 0)
+	if(walk(&c[0], datastr+0, 1, 1, nil) < 0)
 		error(Egreg);
-	if(walk(&c[1], &names[1], 1, 1, nil) < 0)
+	if(walk(&c[1], datastr+1, 1, 1, nil) < 0)
 		error(Egreg);
-	c[0] = d->open(c[0], ORDWR);
-	c[1] = d->open(c[1], ORDWR);
+	c[0] = devtab[c[0]->type]->open(c[0], ORDWR);
+	c[1] = devtab[c[1]->type]->open(c[1], ORDWR);
 	if(newfd2(fd, c) < 0)
-		error(Enofd);
+		error(Enofd); 
+	ufd[0] = fd[0];
+	ufd[1] = fd[1];
 	poperror();
 	return 0;
 }
 
-int
-kfwstat(int fd, uchar *buf, int n)
+static int
+dirfixed(uchar *p, uchar *e, Dir *d)
 {
-	Chan *c;
+	int len;
 
-	if(waserror())
+	len = GBIT16(p)+BIT16SZ;
+	if(p + len > e)
 		return -1;
 
-	validstat(buf, n);
-	c = fdtochan(up->env->fgrp, fd, -1, 1, 1);
-	if(waserror()) {
+	p += BIT16SZ;	/* ignore size */
+	d->type = devno(GBIT16(p), 1);
+	p += BIT16SZ;
+	d->dev = GBIT32(p);
+	p += BIT32SZ;
+	d->qid.type = GBIT8(p);
+	p += BIT8SZ;
+	d->qid.vers = GBIT32(p);
+	p += BIT32SZ;
+	d->qid.path = GBIT64(p);
+	p += BIT64SZ;
+	d->mode = GBIT32(p);
+	p += BIT32SZ;
+	d->atime = GBIT32(p);
+	p += BIT32SZ;
+	d->mtime = GBIT32(p);
+	p += BIT32SZ;
+	d->length = GBIT64(p);
+
+	return len;
+}
+
+static char*
+dirname(uchar *p, int *n)
+{
+	p += BIT16SZ+BIT16SZ+BIT32SZ+BIT8SZ+BIT32SZ+BIT64SZ
+		+ BIT32SZ+BIT32SZ+BIT32SZ+BIT64SZ;
+	*n = GBIT16(p);
+	return (char*)p+BIT16SZ;
+}
+
+static long
+dirsetname(char *name, int len, uchar *p, long n, long maxn)
+{
+	char *oname;
+	int olen;
+	long nn;
+
+	if(n == BIT16SZ)
+		return BIT16SZ;
+
+	oname = dirname(p, &olen);
+
+	nn = n+len-olen;
+	PBIT16(p, nn-BIT16SZ);
+	if(nn > maxn)
+		return BIT16SZ;
+
+	if(len != olen)
+		memmove(oname+len, oname+olen, p+n-(uchar*)(oname+olen));
+	PBIT16((uchar*)(oname-2), len);
+	memmove(oname, name, len);
+	return nn;
+}
+
+/*
+ * Mountfix might have caused the fixed results of the directory read
+ * to overflow the buffer.  Catch the overflow in c->dirrock.
+ */
+static void
+mountrock(Chan *c, uchar *p, uchar **pe)
+{
+	uchar *e, *r;
+	int len, n;
+
+	e = *pe;
+
+	/* find last directory entry */
+	for(;;){
+		len = BIT16SZ+GBIT16(p);
+		if(p+len >= e)
+			break;
+		p += len;
+	}
+
+	/* save it away */
+	qlock(&c->rockqlock);
+	if(c->nrock+len > c->mrock){
+		n = ROUND(c->nrock+len, 1024);
+		r = smalloc(n);
+		memmove(r, c->dirrock, c->nrock);
+		free(c->dirrock);
+		c->dirrock = r;
+		c->mrock = n;
+	}
+	memmove(c->dirrock+c->nrock, p, len);
+	c->nrock += len;
+	qunlock(&c->rockqlock);
+
+	/* drop it */
+	*pe = p;
+}
+
+/*
+ * Satisfy a directory read with the results saved in c->dirrock.
+ */
+static int
+mountrockread(Chan *c, uchar *op, s32 n, s32 *nn)
+{
+	long dirlen;
+	uchar *rp, *erp, *ep, *p;
+
+	/* common case */
+	if(c->nrock == 0)
+		return 0;
+
+	/* copy out what we can */
+	qlock(&c->rockqlock);
+	rp = c->dirrock;
+	erp = rp+c->nrock;
+	p = op;
+	ep = p+n;
+	while(rp+BIT16SZ <= erp){
+		dirlen = BIT16SZ+GBIT16(rp);
+		if(p+dirlen > ep)
+			break;
+		memmove(p, rp, dirlen);
+		p += dirlen;
+		rp += dirlen;
+	}
+
+	if(p == op){
+		qunlock(&c->rockqlock);
+		return 0;
+	}
+
+	/* shift the rest */
+	if(rp != erp)
+		memmove(c->dirrock, rp, erp-rp);
+	c->nrock = erp - rp;
+
+	*nn = p - op;
+	qunlock(&c->rockqlock);
+	return 1;
+}
+
+static void
+mountrewind(Chan *c)
+{
+	c->nrock = 0;
+}
+
+/*
+ * Rewrite the results of a directory read to reflect current 
+ * name space bindings and mounts.  Specifically, replace
+ * directory entries for bind and mount points with the results
+ * of statting what is mounted there.  Except leave the old names.
+ */
+static long
+mountfix(Chan *c, uchar *op, s32 n, s32 maxn)
+{
+	char *name;
+	int nbuf, nname;
+	Chan *nc;
+	Mhead *mh;
+	Mount *m;
+	uchar *p;
+	int dirlen, rest;
+	long l;
+	uchar *buf, *e;
+	Dir d;
+
+	p = op;
+	buf = nil;
+	nbuf = 0;
+	for(e=&p[n]; p+BIT16SZ<e; p+=dirlen){
+		dirlen = dirfixed(p, e, &d);
+		if(dirlen < 0)
+			break;
+		nc = nil;
+		mh = nil;
+		if(findmount(&nc, &mh, d.type, d.dev, d.qid)){
+			/*
+			 * If it's a union directory and the original is
+			 * in the union, don't rewrite anything.
+			 */
+			rlock(&mh->lock);
+			for(m = mh->mount; m != nil; m = m->next){
+				if(eqchantdqid(m->to, d.type, d.dev, d.qid, 1)){
+					runlock(&mh->lock);
+					goto Norewrite;
+				}
+			}
+			runlock(&mh->lock);
+
+			name = dirname(p, &nname);
+			/*
+			 * Do the stat but fix the name.  If it fails, leave old entry.
+			 * BUG: If it fails because there isn't room for the entry,
+			 * what can we do?  Nothing, really.  Might as well skip it.
+			 */
+			if(buf == nil){
+				nbuf = 4096;
+				buf = smalloc(nbuf);
+			}
+			if(waserror())
+				goto Norewrite;
+			l = devtab[nc->type]->stat(nc, buf, nbuf);
+			l = dirsetname(name, nname, buf, l, nbuf);
+			if(l == BIT16SZ)
+				error("dirsetname");
+			poperror();
+
+			/*
+			 * Shift data in buffer to accomodate new entry,
+			 * possibly overflowing into rock.
+			 */
+			rest = e - (p+dirlen);
+			if(l > dirlen){
+				while(p+l+rest > op+maxn){
+					mountrock(c, p, &e);
+					if(e == p){
+						dirlen = 0;
+						goto Norewrite;
+					}
+					rest = e - (p+dirlen);
+				}
+			}
+			if(l != dirlen){
+				memmove(p+l, p+dirlen, rest);
+				dirlen = l;
+				e = p+dirlen+rest;
+			}
+
+			/*
+			 * Rewrite directory entry.
+			 */
+			memmove(p, buf, l);
+
+		    Norewrite:
+			cclose(nc);
+			putmhead(mh);
+		}
+	}
+	if(buf != nil)
+		free(buf);
+
+	if(p != e)
+		error("oops in rockfix");
+
+	return e-op;
+}
+
+static u32
+wstat(Chan *c, uchar *d, int nd)
+{
+	u32 l;
+	int namelen;
+
+	if(waserror()){
 		cclose(c);
 		nexterror();
 	}
-	n = devtab[c->type]->wstat(c, buf, n);
+	if(c->ismtpt){
+		/*
+		 * Renaming mount points is disallowed to avoid surprises
+		 * (which should be renamed? the mount point or the mounted Chan?).
+		 */
+		dirname(d, &namelen);
+		if(namelen)
+			nameerror(chanpath(c), Eismtpt);
+	}
+	l = devtab[c->type]->wstat(c, d, nd);
 	poperror();
 	cclose(c);
+	return l;
+}
 
-	poperror();
-	return n;
+int
+kfwstat(int fd, uchar *buf, int n)
+{
+	Chan *c;
+
+	if(waserror())
+		return -1;
+
+	validstat(buf, n);
+	c = fdtochan(up->env->fgrp, fd, -1, 1, 1);
+	return (wstat(c, buf, n));
 }
 
-long
-bindmount(Chan *c, char *old, int flag, char *spec)
+static int
+bindmount(int ismount, int fd, int afd, char* arg0, char* arg1, int flag, char* spec)
 {
 	int ret;
-	Chan *c1;
+	Chan *c0, *c1, *ac, *bc;
 
-	if(flag>MMASK || (flag&MORDER) == (MBEFORE|MAFTER))
+	if((flag&~MMASK) || (flag&MORDER)==(MBEFORE|MAFTER))
 		error(Ebadarg);
 
-	c1 = namec(old, Amount, 0, 0);
+	if(ismount){
+		spec = validnamedup(spec, 1);
+		if(waserror()){
+			free(spec);
+			nexterror();
+		}
+
+		if(up->env->pgrp->noattach)
+			error(Enoattach);
+
+		ac = nil;
+		bc = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
+		if(waserror()) {
+			if(ac != nil)
+				cclose(ac);
+			cclose(bc);
+			nexterror();
+		}
+
+		if(afd >= 0)
+			ac = fdtochan(up->env->fgrp, afd, ORDWR, 0, 1);
+
+		c0 = mntattach(bc, ac, spec, flag&MCACHE);
+		poperror();	/* ac bc */
+		if(ac != nil)
+			cclose(ac);
+		cclose(bc);
+	}else{
+		spec = nil;
+		c0 = namec(arg0, Abind, 0, 0);
+	}
+
 	if(waserror()){
+		cclose(c0);
+		nexterror();
+	}
+
+	c1 = namec(arg1, Amount, 0, 0);
+	if(waserror()){
 		cclose(c1);
 		nexterror();
 	}
-	ret = cmount(c, c1, flag, spec);
 
+	ret = cmount(c0, c1, flag, spec);
+
 	poperror();
 	cclose(c1);
+	poperror();
+	cclose(c0);
+	if(ismount){
+		fdclose(up->env->fgrp, fd, 0);
+		poperror();
+		free(spec);
+	}
 	return ret;
 }
 
@@ -532,77 +837,29 @@
 int
 kbind(char *new, char *old, int flags)
 {
-	long r;
-	Chan *c0;
-
-	if(waserror())
-		return -1;
-
-	c0 = namec(new, Abind, 0, 0);
-	if(waserror()) {
-		cclose(c0);
-		nexterror();
-	}
-	r = bindmount(c0, old, flags, "");
-	poperror();
-	cclose(c0);
-
-	poperror();
-	return r;
+	return bindmount(0, -1, -1, new, old, flags, nil);
 }
 
 int
 kmount(int fd, int afd, char *old, int flags, char *spec)
 {
-	long r;
-	volatile struct { Chan *c; } c0;
-	volatile struct { Chan *c; } bc;
-	volatile struct { Chan *c; } ac;
-	Mntparam mntparam;
-
-	ac.c = nil;
-	bc.c = nil;
-	c0.c = nil;
-	if(waserror()) {
-		cclose(ac.c);
-		cclose(bc.c);
-		cclose(c0.c);
-		return -1;
-	}
-	bc.c = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
-	if(afd >= 0)
-		ac.c = fdtochan(up->env->fgrp, afd, ORDWR, 0, 1);
-	mntparam.chan = bc.c;
-	mntparam.authchan = ac.c;
-	mntparam.spec = spec;
-	mntparam.flags = flags;
-	c0.c = devtab[devno('M', 0)]->attach((char*)&mntparam);
-
-	r = bindmount(c0.c, old, flags, spec);
-	poperror();
-	cclose(ac.c);
-	cclose(bc.c);
-	cclose(c0.c);
-
-	return r;
+	return bindmount(1, fd, afd, nil, old, flags, spec);
 }
 
 int
-kunmount(char *old, char *new)
+kunmount(char *name, char *old)
 {
-	volatile struct { Chan *c; } cmount;
-	volatile struct { Chan *c; } cmounted;
+	Chan *cmount, *cmounted;
 
-	cmount.c = nil;
-	cmounted.c = nil;
+	cmounted = nil;
+	cmount = namec(old, Amount, 0, 0);
 	if(waserror()) {
-		cclose(cmount.c);
-		cclose(cmounted.c);
-		return -1;
+		cclose(cmount);
+		if(cmounted != nil)
+			cclose(cmounted);
+		nexterror();
 	}
-
-	cmount.c = namec(new, Amount, 0, 0);
-	if(old != nil && old[0] != '\0') {
+	if(name != nil) {
 		/*
 		 * This has to be namec(..., Aopen, ...) because
 		 * if arg[0] is something like /srv/cs or /fd/0,
@@ -609,13 +866,13 @@
 		 * opening it is the only way to get at the real
 		 * Chan underneath.
 		 */
-		cmounted.c = namec(old, Aopen, OREAD, 0);
+		cmounted = namec(name, Aopen, OREAD, 0);
 	}
-
-	cunmount(cmount.c, cmounted.c);
+	cunmount(cmount, cmounted);
 	poperror();
-	cclose(cmount.c);
-	cclose(cmounted.c);
+	cclose(cmount);
+	if(cmounted != nil)
+		cclose(cmounted);
 	return 0;
 }
 
@@ -679,7 +936,7 @@
 
 		/* Advance to next element */
 		c->uri++;
-		if(c->umc) {
+		if(c->umc != nil) {
 			cclose(c->umc);
 			c->umc = nil;
 		}
@@ -695,7 +952,7 @@
 {
 	qlock(&c->umqlock);
 	c->uri = 0;
-	if(c->umc){
+	if(c->umc != nil){
 		cclose(c->umc);
 		c->umc = nil;
 	}
@@ -702,69 +959,87 @@
 	qunlock(&c->umqlock);
 }
 
-static long
-rread(int fd, void *va, long n, vlong *offp)
+static s32
+rread(int fd, void *p, s32 n, s64 *offp)
 {
-	int dir;
+	s32 nn, nnn;
 	Chan *c;
-	vlong off;
+	s64 off;
 
-	if(waserror())
-		return -1;
-
 	c = fdtochan(up->env->fgrp, fd, OREAD, 1, 1);
-	if(waserror()) {
+
+	if(waserror()){
 		cclose(c);
 		nexterror();
 	}
 
-	if(n < 0)
-		error(Etoosmall);
+	/*
+	 * The offset is passed through on directories, normally.
+	 * Sysseek complains, but pread is used by servers like exportfs,
+	 * that shouldn't need to worry about this issue.
+	 *
+	 * Notice that c->devoffset is the offset that c's dev is seeing.
+	 * The number of bytes read on this fd (c->offset) may be different
+	 * due to rewritings in rockfix.
+	 */
+	if(offp == nil)	/* use and maintain channel's offset */
+		off = c->offset;
+	else
+		off = *offp;
+	if(off < 0)
+		error(Enegoff);
 
-	dir = c->qid.type & QTDIR;
-	if(dir && c->umh)
-		n = unionread(c, va, n);
-	else{
-		if(offp == nil){
-			lock(c);	/* lock for vlong assignment */
-			off = c->offset;
-			unlock(c);
-		}else
-			off = *offp;
-		if(off < 0)
-			error(Enegoff);
-		if(off == 0){
-			if(offp == nil){
-				lock(c);
-				c->offset = 0;
-				c->dri = 0;
-				unlock(c);
-			}
-			unionrewind(c);
+	if(off == 0){	/* rewind to the beginning of the directory */
+		if(offp == nil || (c->qid.type & QTDIR)){
+			c->offset = 0;
+			c->devoffset = 0;
 		}
-		n = devtab[c->type]->read(c, va, n, off);
+		mountrewind(c);
+		unionrewind(c);
+	}
+
+	if(c->qid.type & QTDIR){
+		if(mountrockread(c, p, n, &nn)){
+			/* do nothing: mountrockread filled buffer */
+		}else if(c->umh != nil)
+			nn = unionread(c, p, n);
+		else{
+			if(off != c->offset)
+				error(Edirseek);
+			nn = devtab[c->type]->read(c, p, n, c->devoffset);
+		}
+		nnn = mountfix(c, p, nn, n);
+	}else
+		nnn = nn = devtab[c->type]->read(c, p, n, off);
+
+	if(offp == nil || (c->qid.type & QTDIR)){
 		lock(c);
-		c->offset += n;
+		c->devoffset += nn;
+		c->offset += nnn;
 		unlock(c);
 	}
 
 	poperror();
 	cclose(c);
-
-	poperror();
-	return n;
+	return nnn;
 }
 
-long
-kread(int fd, void *va, long n)
+s32
+kread(int fd, void *va, s32 n)
 {
 	return rread(fd, va, n, nil);
 }
 
-long
-kpread(int fd, void *va, long n, vlong off)
+s32
+kpread(int fd, void *va, s32 n, s64 off)
 {
-	return rread(fd, va, n, &off);
+	s64 *offp;
+
+	if(off != ~0ULL)
+		offp = &off;
+	else
+		offp = nil;
+	return rread(fd, va, n, offp);
 }
 
 int
@@ -772,11 +1047,16 @@
 {
 	Chan *c;
 
-	if(waserror())
-		return -1;
-
 	c = namec(path, Aremove, 0, 0);
-	if(waserror()) {
+	/*
+	 * Removing mount points is disallowed to avoid surprises
+	 * (which should be removed: the mount point or the mounted Chan?).
+	 */
+	if(c->ismtpt){
+		cclose(c);
+		error(Eismtpt);
+	}
+	if(waserror()){
 		c->type = 0;	/* see below */
 		cclose(c);
 		nexterror();
@@ -789,40 +1069,35 @@
 	c->type = 0;
 	poperror();
 	cclose(c);
-
-	poperror();
 	return 0;
 }
 
-vlong
-kseek(int fd, vlong off, int whence)
+s64
+kseek(int fd, s64 o, int type)
 {
-	Dir *dir;
 	Chan *c;
+	uchar buf[sizeof(Dir)+100];
+	Dir dir;
+	int n;
+	s64 off;
 
-	if(waserror())
-		return -1;
-
 	c = fdtochan(up->env->fgrp, fd, -1, 1, 1);
-	if(waserror()) {
+	if(waserror()){
 		cclose(c);
 		nexterror();
 	}
-
-	if(devtab[c->type]->dc == '|')
+	if(devtab[c->type]->dc == L'|')
 		error(Eisstream);
 
-	switch(whence) {
+	off = 0;
+	switch(type){
 	case 0:
-		if(c->qid.type & QTDIR){
-			if(off != 0)
-				error(Eisdir);
-			unionrewind(c);
-		}else if(off < 0)
+		off = o;
+		if((c->qid.type & QTDIR) && off != 0)
+			error(Eisdir);
+		if(off < 0)
 			error(Enegoff);
-		lock(c);	/* lock for vlong assignment */
 		c->offset = off;
-		unlock(c);
 		break;
 
 	case 1:
@@ -829,7 +1104,7 @@
 		if(c->qid.type & QTDIR)
 			error(Eisdir);
 		lock(c);	/* lock for read/write update */
-		off += c->offset;
+		off = o + c->offset;
 		if(off < 0){
 			unlock(c);
 			error(Enegoff);
@@ -841,23 +1116,19 @@
 	case 2:
 		if(c->qid.type & QTDIR)
 			error(Eisdir);
-		dir = chandirstat(c);
-		if(dir == nil)
+		n = devtab[c->type]->stat(c, buf, sizeof buf);
+		if(convM2D(buf, n, &dir, nil) == 0)
 			error("internal error: stat error in seek");
-		off += dir->length;
-		free(dir);
+		off = dir.length + o;
 		if(off < 0)
 			error(Enegoff);
-		lock(c);	/* lock for read/write update */
 		c->offset = off;
-		unlock(c);
 		break;
 
 	default:
 		error(Ebadarg);
-		break;
 	}
-	poperror();
+	c->uri = 0;
 	c->dri = 0;
 	cclose(c);
 	poperror();
@@ -891,10 +1162,26 @@
 		validname(buf, 0);
 }
 
+static char*
+pathlast(Path *p)
+{
+	char *s;
+
+	if(p == nil)
+		return nil;
+	if(p->len == 0)
+		return nil;
+	s = strrchr(p->s, '/');
+	if(s != nil)
+		return s+1;
+	return p->s;
+}
+
 int
 kstat(char *path, uchar *buf, int n)
 {
 	Chan *c;
+	uint r;
 
 	if(waserror())
 		return -1;
@@ -904,35 +1191,42 @@
 		cclose(c);
 		nexterror();
 	}
-	devtab[c->type]->stat(c, buf, n);
+	r = devtab[c->type]->stat(c, buf, n);
+	path = pathlast(c->path);
+	if(path != nil)
+		r = dirsetname(path, strlen(path), buf, r, n);
 	poperror();
 	cclose(c);
 
 	poperror();
-	return 0;
+	return r;
 }
 
-static long
-rwrite(int fd, void *va, long n, vlong *offp)
+static s32
+rwrite(int fd, void *buf, s32 len, s64 *offp)
 {
 	Chan *c;
-	vlong off;
-	long m;
+	s32 m, n;
+	s64 off;
 
-	if(waserror())
-		return -1;
+	n = 0;
 	c = fdtochan(up->env->fgrp, fd, OWRITE, 1, 1);
 	if(waserror()) {
+		if(offp == nil){
+			lock(c);
+			c->offset -= n;
+			unlock(c);
+		}
 		cclose(c);
 		nexterror();
 	}
+
 	if(c->qid.type & QTDIR)
 		error(Eisdir);
 
-	if(n < 0)
-		error(Etoosmall);
+	n = len;
 
-	if(offp == nil){
+	if(offp == nil){	/* use and maintain channel's offset */
 		lock(c);
 		off = c->offset;
 		c->offset += n;
@@ -940,19 +1234,10 @@
 	}else
 		off = *offp;
 
-	if(waserror()){
-		if(offp == nil){
-			lock(c);
-			c->offset -= n;
-			unlock(c);
-		}
-		nexterror();
-	}
 	if(off < 0)
 		error(Enegoff);
-	m = devtab[c->type]->write(c, va, n, off);
-	poperror();
 
+	m = devtab[c->type]->write(c, buf, n, off);
 	if(offp == nil && m < n){
 		lock(c);
 		c->offset -= n - m;
@@ -961,21 +1246,25 @@
 
 	poperror();
 	cclose(c);
-
-	poperror();
-	return n;
+	return m;
 }
 
-long
-kwrite(int fd, void *va, long n)
+s32
+kwrite(int fd, void *va, s32 n)
 {
 	return rwrite(fd, va, n, nil);
 }
 
-long
-kpwrite(int fd, void *va, long n, vlong off)
+s32
+kpwrite(int fd, void *va, s32 n, s64 off)
 {
-	return rwrite(fd, va, n, &off);
+	s64 *offp;
+
+	if(off != ~0ULL)
+		offp = &off;
+	else
+		offp = nil;
+	return rwrite(fd, va, n, offp);
 }
 
 int
@@ -982,6 +1271,8 @@
 kwstat(char *path, uchar *buf, int n)
 {
 	Chan *c;
+	long l;
+	int namelen;
 
 	if(waserror())
 		return -1;
@@ -992,12 +1283,19 @@
 		cclose(c);
 		nexterror();
 	}
-	n = devtab[c->type]->wstat(c, buf, n);
+	if(c->ismtpt){
+		/*
+		 * Renaming mount points is disallowed to avoid surprises
+		 * (which should be renamed? the mount point or the mounted Chan?).
+		 */
+		dirname(buf, &namelen);
+		if(namelen)
+			nameerror(chanpath(c), Eismtpt);
+	}
+	l = devtab[c->type]->wstat(c, buf, n);
 	poperror();
 	cclose(c);
-
-	poperror();
-	return n;
+	return l;
 }
 
 enum