shithub: 9ferno

Download patch

ref: c11957a7915d355d80ff7e6a21721ba05441ca29
parent: d8b894afc0ff3c81fc750826ac0016250e4f81e4
author: 9ferno <gophone2015@gmail.com>
date: Sat Aug 21 01:39:52 EDT 2021

replacing ip with 9front ip

diff: cannot open b/os/ip.original//null: file does not exist: 'b/os/ip.original//null'
--- a/os/ip/arp.c
+++ b/os/ip/arp.c
@@ -47,7 +47,8 @@
 
 #define haship(s) ((s)[IPaddrlen-1]%NHASH)
 
-extern int 	ReTransTimer = RETRANS_TIMER;
+int 	ReTransTimer = RETRANS_TIMER;
+
 static void 	rxmitproc(void *v);
 
 void
@@ -57,145 +58,121 @@
 	f->arp->f = f;
 	f->arp->rxmt = nil;
 	f->arp->dropf = f->arp->dropl = nil;
-	kproc("rxmitproc", rxmitproc, f->arp, 0);
+	kproc("rxmitproc", rxmitproc, f->arp);
 }
 
-/*
- *  create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
 {
-	uint t;
-	Block *next, *xp;
-	Arpent *a, *e, *f, **l;
-	Medium *m = ifc->m;
-	int empty;
+	Block *next;
 
-	/* find oldest entry */
-	e = &arp->cache[NCACHE];
-	a = arp->cache;
-	t = a->utime;
-	for(f = a; f < e; f++){
-		if(f->utime < t){
-			t = f->utime;
-			a = f;
-		}
+	while(bp != nil){
+		next = bp->list;
+		freeblist(bp);
+		bp = next;
 	}
+}
 
-	/* dump waiting packets */
-	xp = a->hold;
-	a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+	Arpent **l;
 
-	if(isv4(a->ip)){
-		while(xp){
-			next = xp->list;
-			freeblist(xp);
-			xp = next;
+	for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+		if(*l == a){
+			*l = a->nextrxt;
+			break;
 		}
 	}
-	else {	// queue icmp unreachable for rxmitproc later on, w/o arp lock
-		if(xp){
-			if(arp->dropl == nil) 
-				arp->dropf = xp;
-			else
-				arp->dropl->list = xp;
+	a->nextrxt = nil;
+	return l;
+}
 
-			for(next = xp->list; next; next = next->list)
-				xp = next;
-			arp->dropl = xp;
-			wakeup(&arp->rxmtq);
-		}
-	}
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+	Arpent **l;
+	Block *bp;
 
 	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
+	for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+		if(*l == a){
 			*l = a->hash;
 			break;
 		}
-		l = &f->hash;
 	}
+	a->hash = nil;
 
-	/* insert into new chain */
-	l = &arp->hash[haship(ip)];
-	a->hash = *l;
-	*l = a;
+	/* dump waiting packets */
+	bp = a->hold;
+	a->hold = nil;
+	if(isv4(a->ip))
+		freeblistchain(bp);
+	else {
+		rxmtunchain(arp, a);
 
-	memmove(a->ip, ip, sizeof(a->ip));
-	a->utime = NOW;
-	a->ctime = 0;
-	a->type = m;
+		/* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+		if(bp != nil){
+			if(arp->dropf == nil)
+				arp->dropf = bp;
+			else
+				arp->dropl->list = bp;
+			arp->dropl = a->last;
 
-	a->rtime = NOW + ReTransTimer;
-	a->rxtsrem = MAX_MULTICAST_SOLICIT;
-	a->ifc = ifc;
-	a->ifcid = ifc->ifcid;
-
-	/* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
-	if(!ipismulticast(a->ip) && addrxt){
-		l = &arp->rxmt;
-		empty = (*l==nil);
-
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
+			if(bp == arp->dropf)
+				wakeup(&arp->rxmtq);
 		}
-		for(f = *l; f; f = f->nextrxt){
-			l = &f->nextrxt;
-		}
-		*l = a;
-		if(empty) 
-			wakeup(&arp->rxmtq);
 	}
+	a->last = nil;
 
-	a->nextrxt = nil;
+	a->ifc = nil;
+	a->ifcid = 0;
 
-	return a;
-}
+	a->state = 0;
+	a->rxtsrem = 0;
 
-/* called with arp qlocked */
+	a->utime = 0;
+	a->ctime = 0;
 
-void
-cleanarpent(Arp *arp, Arpent *a)
+	memset(a->ip, 0, sizeof(a->ip));
+	memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ *  create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
 {
-	Arpent *f, **l;
+	Arpent *a, *e, *f, **l;
+	ulong t;
 
-	a->utime = 0;
-	a->ctime = 0;
-	a->type = 0;
-	a->state = 0;
-	
-	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
-			*l = a->hash;
-			break;
+	/* find oldest entry */
+	e = &arp->cache[NCACHE];
+	a = arp->cache;
+	t = a->utime;
+	for(f = a; f < e; f++){
+		if(f->utime < t){
+			t = f->utime;
+			a = f;
 		}
-		l = &f->hash;
 	}
+	cleanarpent(arp, a);
 
-	/* take out of re-transmit chain */
-	l = &arp->rxmt;
-	for(f = *l; f; f = f->nextrxt){
-		if(f == a){
-			*l = a->nextrxt;
-			break;
-		}
-		l = &f->nextrxt;
-	}
-	a->nextrxt = nil;
-	a->hash = nil;
-	a->hold = nil;
-	a->last = nil;
-	a->ifc = nil;
+	ipmove(a->ip, ip);
+	a->ifc = ifc;
+	a->ifcid = ifc->ifcid;
+
+	/* insert into new chain */
+	l = &arp->hash[haship(ip)];
+	a->hash = *l;
+	*l = a;
+
+	return a;
 }
 
+
 /*
  *  fill in the media address if we have it.  Otherwise return an
  *  Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
 {
 	int hash;
 	Arpent *a;
-	Medium *type = ifc->m;
 	uchar v6ip[IPaddrlen];
 
 	if(version == V4){
@@ -217,30 +193,28 @@
 
 	qlock(arp);
 	hash = haship(ip);
-	for(a = arp->hash[hash]; a; a = a->hash){
-		if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
-		if(type == a->type)
+	for(a = arp->hash[hash]; a != nil; a = a->hash){
+		if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
 			break;
 	}
-
 	if(a == nil){
-		a = newarp6(arp, ip, ifc, (version != V4));
+		a = newarpent(arp, ip, ifc);
 		a->state = AWAIT;
 	}
 	a->utime = NOW;
 	if(a->state == AWAIT){
 		if(bp != nil){
-			if(a->hold)
-				a->last->list = bp;
-			else
+			bp->list = nil; 
+			if(a->hold == nil)
 				a->hold = bp;
+			else
+				a->last->list = bp;
 			a->last = bp;
-			bp->list = nil; 
 		}
 		return a;		/* return with arp qlocked */
 	}
 
-	memmove(mac, a->mac, a->type->maclen);
+	memmove(mac, a->mac, ifc->m->maclen);
 
 	/* remove old entries */
 	if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
 arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
 {
 	Block *bp;
-	Arpent *f, **l;
 
-	if(!isv4(a->ip)){
-		l = &arp->rxmt;
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
-		}
-	}
-
 	memmove(a->mac, mac, type->maclen);
-	a->type = type;
+	if(a->state == AWAIT && !isv4(a->ip)){
+		rxmtunchain(arp, a);
+		a->rxtsrem = 0;
+	}
 	a->state = AOK;
-	a->utime = NOW;
+	a->ctime = a->utime = NOW;
 	bp = a->hold;
-	a->hold = nil;
+	a->hold = a->last = nil;
 	qunlock(arp);
 
 	return bp;
 }
 
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
 {
-	Arp *arp;
-	Route *r;
-	Arpent *a, *f, **l;
-	Ipifc *ifc;
-	Medium *type;
-	Block *bp, *next;
 	uchar v6ip[IPaddrlen];
+	Block *bp, *next;
+	Arpent *a;
+	Route *r;
+	Arp *arp;
 
-	arp = fs->arp;
+	if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+		return -1;
 
-	if(n != 6){
-//		print("arp: len = %d\n", n);
-		return;
-	}
-
 	switch(version){
 	case V4:
-		r = v4lookup(fs, ip, nil);
+		r = v4lookup(fs, ip, ia, nil);
 		v4tov6(v6ip, ip);
 		ip = v6ip;
 		break;
 	case V6:
-		r = v6lookup(fs, ip, nil);
+		r = v6lookup(fs, ip, ia, nil);
 		break;
 	default:
 		panic("arpenter: version %d", version);
-		return;	/* to supress warnings */
+		return -1;	/* to supress warnings */
 	}
 
-	if(r == nil){
-//		print("arp: no route for entry\n");
-		return;
-	}
+	if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+		return -1;
 
-	ifc = r->ifc;
-	type = ifc->m;
-
+	arp = fs->arp;
 	qlock(arp);
-	for(a = arp->hash[haship(ip)]; a; a = a->hash){
-		if(a->type != type || (a->state != AWAIT && a->state != AOK))
+	for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+		if(a->ifc != ifc || a->ifcid != ifc->ifcid)
 			continue;
-
 		if(ipcmp(a->ip, ip) == 0){
-			a->state = AOK;
-			memmove(a->mac, mac, type->maclen);
-
-			if(version == V6){
-				/* take out of re-transmit chain */
-				l = &arp->rxmt;
-				for(f = *l; f; f = f->nextrxt){
-					if(f == a){
-						*l = a->nextrxt;
-						break;
-					}
-					l = &f->nextrxt;
-				}
-			}
-
-			a->ifc = ifc;
-			a->ifcid = ifc->ifcid;
-			bp = a->hold;
-			a->hold = nil;
 			if(version == V4)
 				ip += IPv4off;
-			a->utime = NOW;
-			a->ctime = a->utime;
-			qunlock(arp);
-
-			while(bp){
+			bp = arpresolve(arp, a, ifc->m, mac);	/* unlocks arp */
+			for(; bp != nil; bp = next){
 				next = bp->list;
-				if(ifc != nil){
-					if(waserror()){
-						runlock(ifc);
-						nexterror();
-					}
-					rlock(ifc);
-					if(ifc->m != nil)
-						ifc->m->bwrite(ifc, bp, version, ip);
-					else
-						freeb(bp);
-					runlock(ifc);
-					poperror();
-				} else
-					freeb(bp);
-				bp = next;
+				bp->list = nil;
+				if(waserror()){
+					freeblistchain(next);
+					break;
+				}
+				ipifcoput(ifc, bp, version, ip);
+				poperror();
 			}
-			return;
+			return 1;
 		}
 	}
 
 	if(refresh == 0){
-		a = newarp6(arp, ip, ifc, 0);
+		a = newarpent(arp, ip, ifc);
 		a->state = AOK;
-		a->type = type;
-		a->ctime = NOW;
-		memmove(a->mac, mac, type->maclen);
+		a->ctime = a->utime = NOW;
+		memmove(a->mac, mac, n);
 	}
-
 	qunlock(arp);
+
+	return refresh == 0;
 }
 
 int
@@ -401,13 +325,12 @@
 arpwrite(Fs *fs, char *s, int len)
 {
 	int n;
-	Route *r;
 	Arp *arp;
-	Block *bp;
-	Arpent *a, *fl, **l;
+	Arpent *a, *x;
 	Medium *m;
-	char *f[4], buf[256];
-	uchar ip[IPaddrlen], mac[MAClen];
+	Ipifc *ifc;
+	char *f[5], buf[256];
+	uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
 
 	arp = fs->arp;
 
@@ -420,7 +343,7 @@
 	if(len > 0 && buf[len-1] == '\n')
 		buf[len-1] = 0;
 
-	n = getfields(buf, f, 4, 1, " ");
+	n = getfields(buf, f, nelem(f), 1, " ");
 	if(strcmp(f[0], "flush") == 0){
 		qlock(arp);
 		for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
 			memset(a->ip, 0, sizeof(a->ip));
 			memset(a->mac, 0, sizeof(a->mac));
 			a->hash = nil;
+			a->nextrxt = nil;
+			a->ifc = nil;
+			a->ifcid = 0;
 			a->state = 0;
+			a->rxtsrem = 0;
+			a->ctime = 0;
 			a->utime = 0;
-			while(a->hold != nil){
-				bp = a->hold->list;
-				freeblist(a->hold);
-				a->hold = bp;
-			}
+			freeblistchain(a->hold);
+			a->hold = a->last = nil;
 		}
 		memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+		freeblistchain(arp->dropf);
+		arp->dropf = arp->dropl = nil;
 		arp->rxmt = nil;
-		arp->dropf = nil;
-		arp->dropl = nil;
 		qunlock(arp);
 	} else if(strcmp(f[0], "add") == 0){
 		switch(n){
@@ -446,64 +370,53 @@
 		default:
 			error(Ebadarg);
 		case 3:
-			parseip(ip, f[1]);
-			if(isv4(ip))
-				r = v4lookup(fs, ip+IPv4off, nil);
-			else
-				r = v6lookup(fs, ip, nil);
-			if(r == nil)
-				error("Destination unreachable");
-			m = r->ifc->m;
-			n = parsemac(mac, f[2], m->maclen);
+			if(parseip(ip, f[1]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
 		case 4:
 			m = ipfindmedium(f[1]);
-			if(m == nil)
+			if(m == nil || m->maclen == 0)
 				error(Ebadarp);
-			parseip(ip, f[2]);
-			n = parsemac(mac, f[3], m->maclen);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
+		case 5:
+			m = ipfindmedium(f[1]);
+			if(m == nil || m->maclen == 0)
+				error(Ebadarp);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			if(parseip(ia, f[4]) == -1)
+				error(Ebadip);
+			break;
 		}
-
-		if(m->ares == nil)
-			error(Ebadarp);
-
-		m->ares(fs, V6, ip, mac, n, 0);
+		if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+			error("no interface");
+		rlock(ifc);
+		if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+			runlock(ifc);
+			error("destination unreachable");
+		}
+		runlock(ifc);
 	} else if(strcmp(f[0], "del") == 0){
-		if(n != 2)
+		if (n != 2)
 			error(Ebadarg);
-
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			error(Ebadip);
 		qlock(arp);
-
-		l = &arp->hash[haship(ip)];
-		for(a = *l; a; a = a->hash){
-			if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
-				*l = a->hash;
-				break;
-			}
-			l = &a->hash;
+		for(a = arp->hash[haship(ip)]; a != nil; a = x){
+			x = a->hash;
+			if(ipcmp(ip, a->ip) == 0)
+				cleanarpent(arp, a);
 		}
-	
-		if(a){
-			/* take out of re-transmit chain */
-			l = &arp->rxmt;
-			for(fl = *l; fl; fl = fl->nextrxt){
-				if(fl == a){
-					*l = a->nextrxt;
-					break;
-				}
-				l = &fl->nextrxt;
-			}
-
-			a->nextrxt = nil;
-			a->hash = nil;
-			a->hold = nil;
-			a->last = nil;
-			a->ifc = nil;
-			memset(a->ip, 0, sizeof(a->ip));
-			memset(a->mac, 0, sizeof(a->mac));
-		}
 		qunlock(arp);
 	} else
 		error(Ebadarp);
@@ -511,13 +424,6 @@
 	return len;
 }
 
-enum
-{
-	Alinelen=	90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
 static void
 convmac(char *p, uchar *mac, int n)
 {
@@ -526,136 +432,136 @@
 }
 
 int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
 {
+	char mac[2*MAClen+1], *state, *mname, *p;
+	uchar ip[IPaddrlen], ia[IPaddrlen];
+	Ipifc *ifc;
 	Arpent *a;
-	int n;
-	char mac[2*MAClen+1];
+	long n, o;
 
-	if(offset % Alinelen)
-		return 0;
-
-	offset = offset/Alinelen;
-	len = len/Alinelen;
-
-	n = 0;
+	p = s;
+	o = -offset;
 	for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
-		if(a->state == 0)
+		if(a->state == 0 || (ifc = a->ifc) == nil)
 			continue;
-		if(offset > 0){
-			offset--;
+
+		rlock(ifc);
+		qlock(arp);
+		state = arpstate[a->state];
+		ipmove(ip, a->ip);
+		if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+			qunlock(arp);
+			runlock(ifc);
 			continue;
 		}
-		len--;
-		qlock(arp);
-		convmac(mac, a->mac, a->type->maclen);
-		n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+		mname = ifc->m->name;
+		convmac(mac, a->mac, ifc->m->maclen);
 		qunlock(arp);
+		runlock(ifc);
+
+		n = snprint(up->genbuf, sizeof up->genbuf,
+			"%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+			mname, state, ip, mac, ia);
+		o += n;
+		if(o <= 0)
+			continue;
+		if(n > len)
+			break;
+		memmove(p, up->genbuf, n);
+		len -= n;
+		p += n;
 	}
 
-	return n;
+	return p - s;
 }
 
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
 {
-	uint sflag;
-	Block *next, *xp;
-	Arpent *a, *b, **l;
-	Fs *f;
-	uchar ipsrc[IPaddrlen];
-	Ipifc *ifc = nil;
-	long nrxt;
+	uchar targ[IPaddrlen], src[IPaddrlen];
+	Arpent **l;
 
-	qlock(arp);
-	f = arp->f;
+	a->ctime = NOW;
+	if(a->rxtsrem == 0)
+		a->rxtsrem = MAX_MULTICAST_SOLICIT;
+	else
+		a->rxtsrem--;
 
-	a = arp->rxmt;
-	if(a==nil){
-		nrxt = 0;
-		goto dodrops; 		//return nrxt;
-	}
-	nrxt = a->rtime - NOW;
-	if(nrxt > 3*ReTransTimer/4) 
-		goto dodrops; 		//return nrxt;
+	/* put on end of re-transmit chain */
+	for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+		;
+	*l = a;
 
-	for(; a; a = a->nextrxt){
-		ifc = a->ifc;
-		assert(ifc != nil);
-		if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
-			xp = a->hold;
-			a->hold = nil;
+	if(l == &f->arp->rxmt)
+		wakeup(&f->arp->rxmtq);
 
-			if(xp){
-				if(arp->dropl == nil) 
-					arp->dropf = xp;
-				else
-					arp->dropl->list = xp;
-			}
+	/* try to use source address of original packet */
+	ipmove(targ, a->ip);
+	if(a->last != nil){
+		ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+		arprelease(f->arp, a);
 
-			cleanarpent(arp, a);
-		}
-		else
-			break;
+		if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+			goto send;
+	} else {
+		arprelease(f->arp, a);
 	}
-	if(a == nil)
-		goto dodrops;
+	if(!ipv6local(ifc, src, 0, targ))
+		return;
+send:
+	if(!waserror()){
+		icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+		poperror();
+	}
+}
 
+static void
+rxmitsols(Arp *arp)
+{
+	Block *next, *bp;
+	Arpent *a;
+	Ipifc *ifc;
+	Route *r;
 
-	qunlock(arp);	/* for icmpns */
-	if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
-		icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
-
-	runlock(ifc);
-	qlock(arp);	
-
-	/* put to the end of re-transmit chain */
-	l = &arp->rxmt;
-	for(b = *l; b; b = b->nextrxt){
-		if(b == a){
-			*l = a->nextrxt;
-			break;
+	qlock(arp);
+	while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+		if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+			if(a->ifcid == ifc->ifcid){
+				ndpsendsol(arp->f, ifc, a);	/* unlocks arp */
+				runlock(ifc);
+				qlock(arp);
+				continue;
+			}
+			runlock(ifc);
 		}
-		l = &b->nextrxt;
+		cleanarpent(arp, a);
 	}
-	for(b = *l; b; b = b->nextrxt){
-		l = &b->nextrxt;
-	}
-	*l = a;
-	a->rxtsrem--;
-	a->nextrxt = nil;
-	a->rtime = NOW + ReTransTimer;
-
-	a = arp->rxmt;
-	if(a==nil)
-		nrxt = 0;
-	else 
-		nrxt = a->rtime - NOW;
-
-dodrops:
-	xp = arp->dropf;
-	arp->dropf = nil;
-	arp->dropl = nil;
+	bp = arp->dropf;
+	arp->dropf = arp->dropl = nil;
 	qunlock(arp);
 
-	for(; xp; xp = next){
-		next = xp->list;
-		icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+	for(; bp != nil; bp = next){
+		next = bp->list;
+		bp->list = nil;
+		r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+		if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+			if(!waserror()){
+				icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+				poperror();
+			}
+			runlock(ifc);
+		}
+		freeblist(bp);
 	}
-
-	return nrxt;
-
 }
 
 static int
 rxready(void *v)
 {
-	Arp *arp = (Arp *) v;
-	int x;
+	Arp *arp = (Arp *)v;
 
-	x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
-	return x;
+	return arp->rxmt != nil || arp->dropf != nil;
 }
 
 static void
@@ -662,20 +568,15 @@
 rxmitproc(void *v)
 {
 	Arp *arp = v;
-	long wakeupat;
 
 	arp->rxmitp = up;
-	//print("arp rxmitproc started\n");
 	if(waserror()){
-		arp->rxmitp = 0;
+		arp->rxmitp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;){
-		wakeupat = rxmitsols(arp);
-		if(wakeupat == 0) 
-			sleep(&arp->rxmtq, rxready, v); 
-		else if(wakeupat > ReTransTimer/4) 
-			tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+		sleep(&arp->rxmtq, rxready, v);
+		rxmitsols(arp);
+		tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
 	}
 }
-
--- a/os/ip/bootp.c
+++ /dev/null
@@ -1,231 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-
-static	ulong	fsip;
-static	ulong	auip;
-static	ulong	gwip;
-static	ulong	ipmask;
-static	ulong	ipaddr;
-
-enum
-{
-	Bootrequest = 1,
-	Bootreply   = 2,
-};
-
-typedef struct Bootp
-{
-	/* udp.c oldheader */
-	uchar	raddr[IPaddrlen];
-	uchar	laddr[IPaddrlen];
-	uchar	rport[2];
-	uchar	lport[2];
-	/* bootp itself */
-	uchar	op;		/* opcode */
-	uchar	htype;		/* hardware type */
-	uchar	hlen;		/* hardware address len */
-	uchar	hops;		/* hops */
-	uchar	xid[4];		/* a random number */
-	uchar	secs[2];	/* elapsed snce client started booting */
-	uchar	pad[2];
-	uchar	ciaddr[4];	/* client IP address (client tells server) */
-	uchar	yiaddr[4];	/* client IP address (server tells client) */
-	uchar	siaddr[4];	/* server IP address */
-	uchar	giaddr[4];	/* gateway IP address */
-	uchar	chaddr[16];	/* client hardware address */
-	uchar	sname[64];	/* server host name (optional) */
-	uchar	file[128];	/* boot file name */
-	uchar	vend[128];	/* vendor-specific goo */
-} Bootp;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d"
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
- */
-
-static	Bootp	req;
-static	Proc*	rcvprocp;
-static	int	recv;
-static	int	done;
-static	Rendez	bootpr;
-static	char	rcvbuf[512+2*IPaddrlen+2*2];
-
-static void
-rcvbootp(void *a)
-{
-	int n, fd;
-	Bootp *rp;
-	char *field[4];
-	uchar ip[IPaddrlen];
-
-	if(waserror())
-		pexit("", 0);
-	rcvprocp = up;	/* store for postnote below */
-	fd = (int)a;	/* at compilation: warning: ../ip/bootp.c:78 conversion of pointer to shorter integer */
-	while(done == 0) {
-		n = kread(fd, rcvbuf, sizeof(rcvbuf));
-		if(n <= 0)
-			break;
-		rp = (Bootp*)rcvbuf;
-		/* currently ignore udp's header */
-		if(memcmp(req.chaddr, rp->chaddr, 6) == 0
-		&& rp->htype == 1 && rp->hlen == 6
-		&& getfields((char*)rp->vend+4, field, 4, 1, " ") == 4
-		&& strncmp((char*)rp->vend, "p9  ", 4) == 0){
-			if(ipaddr == 0)
-				ipaddr = nhgetl(rp->yiaddr);
-			if(ipmask == 0)
-				ipmask = parseip(ip, field[0]);
-			if(fsip == 0)
-				fsip = parseip(ip, field[1]);
-			if(auip == 0)
-				auip = parseip(ip, field[2]);
-			if(gwip == 0)
-				gwip = parseip(ip, field[3]);
-			break;
-		}
-	}
-	poperror();
-	rcvprocp = nil;
-
-	recv = 1;
-	wakeup(&bootpr);
-	pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
-	int cfd, dfd, tries, n;
-	char ia[5+3*24], im[16], *av[3];
-	uchar nipaddr[4], ngwip[4], nipmask[4];
-	char dir[Maxpath];
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	cfd = kannounce("udp!*!68", dir);
-	if(cfd < 0)
-		return "bootp announce failed";
-	strcat(dir, "/data");
-	if(kwrite(cfd, "headers", 7) < 0){
-		kclose(cfd);
-		return "bootp ctl headers failed";
-	}
-	kwrite(cfd, "oldheaders", 10);
-	dfd = kopen(dir, ORDWR);
-	if(dfd < 0){
-		kclose(cfd);
-		return "bootp open data failed";
-	}
-	kclose(cfd);
-	
-
-	/* create request */
-	memset(&req, 0, sizeof(req));
-	ipmove(req.raddr, IPv4bcast);
-	hnputs(req.rport, 67);
-	req.op = Bootrequest;
-	req.htype = 1;			/* ethernet (all we know) */
-	req.hlen = 6;			/* ethernet (all we know) */
-
-	/* Hardware MAC address */
-	memmove(req.chaddr, ifc->mac, 6);
-	/* Fill in the local IP address if we know it */
-	ipv4local(ifc, req.ciaddr);
-	memset(req.file, 0, sizeof(req.file));
-	strcpy((char*)req.vend, "p9  ");
-
-	done = 0;
-	recv = 0;
-
-	kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
-
-	/*
-	 * broadcast bootp's till we get a reply,
-	 * or fixed number of tries
-	 */
-	tries = 0;
-	while(recv == 0) {
-		if(kwrite(dfd, &req, sizeof(req)) < 0)
-			print("bootp: write: %s\n", commonerror());
-
-		tsleep(&bootpr, return0, 0, 1000);
-		if(++tries > 10) {
-			print("bootp: timed out\n");
-			break;
-		}
-	}
-	kclose(dfd);
-	done = 1;
-	if(rcvprocp != nil){
-		postnote(rcvprocp, 1, "timeout", 0);
-		rcvprocp = nil;
-	}
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcrem(ifc, av, 3);
-
-	hnputl(nipaddr, ipaddr);
-	sprint(ia, "%V", nipaddr);
-	hnputl(nipmask, ipmask);
-	sprint(im, "%V", nipmask);
-	av[1] = ia;
-	av[2] = im;
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	if(gwip != 0) {
-		hnputl(ngwip, gwip);
-		n = snprint(ia, sizeof(ia), "add 0.0.0.0 0.0.0.0 %V", ngwip);
-		routewrite(ifc->conv->p->f, nil, ia, n);
-	}
-	return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
-	int n;
-	char *buf;
-	uchar a[4];
-
-	buf = smalloc(READSTR);
-	if(waserror()){
-		free(buf);
-		nexterror();
-	}
-	hnputl(a, fsip);
-	n = snprint(buf, READSTR, "fsip %15V\n", a);
-	hnputl(a, auip);
-	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
-	hnputl(a, gwip);
-	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
-	hnputl(a, ipmask);
-	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
-	hnputl(a, ipaddr);
-	snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
-
-	len = readstr(offset, bp, len, buf);
-	poperror();
-	free(buf);
-	return len;
-}
-
-char*	(*bootp)(Ipifc*) = rbootp;
-int	(*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/compress.c
+++ /dev/null
@@ -1,520 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-
-#include	"ip.h"
-#include	"ppp.h"
-
-typedef struct Iphdr	Iphdr;
-typedef struct Tcphdr	Tcphdr;
-typedef struct Ilhdr	Ilhdr;
-typedef struct Hdr	Hdr;
-typedef struct Tcpc	Tcpc;
-
-struct Iphdr
-{
-	uchar	vihl;		/* Version and header length */
-	uchar	tos;		/* Type of service */
-	uchar	length[2];	/* packet length */
-	uchar	id[2];		/* Identification */
-	uchar	frag[2];	/* Fragment information */
-	uchar	ttl;		/* Time to live */
-	uchar	proto;		/* Protocol */
-	uchar	cksum[2];	/* Header checksum */
-	ulong	src;		/* Ip source (byte ordering unimportant) */
-	ulong	dst;		/* Ip destination (byte ordering unimportant) */
-};
-
-struct Tcphdr
-{
-	ulong	ports;		/* defined as a ulong to make comparisons easier */
-	uchar	seq[4];
-	uchar	ack[4];
-	uchar	flag[2];
-	uchar	win[2];
-	uchar	cksum[2];
-	uchar	urg[2];
-};
-
-struct Ilhdr
-{
-	uchar	sum[2];	/* Checksum including header */
-	uchar	len[2];	/* Packet length */
-	uchar	type;		/* Packet type */
-	uchar	spec;		/* Special */
-	uchar	src[2];	/* Src port */
-	uchar	dst[2];	/* Dst port */
-	uchar	id[4];	/* Sequence id */
-	uchar	ack[4];	/* Acked sequence */
-};
-
-enum
-{
-	URG		= 0x20,		/* Data marked urgent */
-	ACK		= 0x10,		/* Aknowledge is valid */
-	PSH		= 0x08,		/* Whole data pipe is pushed */
-	RST		= 0x04,		/* Reset connection */
-	SYN		= 0x02,		/* Pkt. is synchronise */
-	FIN		= 0x01,		/* Start close down */
-
-	IP_DF		= 0x4000,	/* Don't fragment */
-
-	IP_TCPPROTO	= 6,
-	IP_ILPROTO	= 40,
-	IL_IPHDR	= 20,
-};
-
-struct Hdr
-{
-	uchar	buf[128];
-	Iphdr	*ip;
-	Tcphdr	*tcp;
-	int	len;
-};
-
-struct Tcpc
-{
-	uchar	lastrecv;
-	uchar	lastxmit;
-	uchar	basexmit;
-	uchar	err;
-	uchar	compressid;
-	Hdr	t[MAX_STATES];
-	Hdr	r[MAX_STATES];
-};
-
-enum
-{	/* flag bits for what changed in a packet */
-	NEW_U=(1<<0),	/* tcp only */
-	NEW_W=(1<<1),	/* tcp only */
-	NEW_A=(1<<2),	/* il tcp */
-	NEW_S=(1<<3),	/* tcp only */
-	NEW_P=(1<<4),	/* tcp only */
-	NEW_I=(1<<5),	/* il tcp */
-	NEW_C=(1<<6),	/* il tcp */
-	NEW_T=(1<<7),	/* il only */
-	TCP_PUSH_BIT	= 0x10,
-};
-
-/* reserved, special-case values of above for tcp */
-#define SPECIAL_I (NEW_S|NEW_W|NEW_U)		/* echoed interactive traffic */
-#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U)	/* unidirectional data */
-#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
-
-int
-encode(void *p, ulong n)
-{
-	uchar	*cp;
-
-	cp = p;
-	if(n >= 256 || n == 0) {
-		*cp++ = 0;
-		cp[0] = n >> 8;
-		cp[1] = n;
-		return 3;
-	} else 
-		*cp = n;
-	return 1;
-}
-
-#define DECODEL(f) { \
-	if (*cp == 0) {\
-		hnputl(f, nhgetl(f) + ((cp[1] << 8) | cp[2])); \
-		cp += 3; \
-	} else { \
-		hnputl(f, nhgetl(f) + (ulong)*cp++); \
-	} \
-}
-#define DECODES(f) { \
-	if (*cp == 0) {\
-		hnputs(f, nhgets(f) + ((cp[1] << 8) | cp[2])); \
-		cp += 3; \
-	} else { \
-		hnputs(f, nhgets(f) + (ulong)*cp++); \
-	} \
-}
-
-ushort
-tcpcompress(Tcpc *comp, Block *b, Fs *)
-{
-	Iphdr	*ip;		/* current packet */
-	Tcphdr	*tcp;		/* current pkt */
-	ulong 	iplen, tcplen, hlen;	/* header length in bytes */
-	ulong 	deltaS, deltaA;	/* general purpose temporaries */
-	ulong 	changes;	/* change mask */
-	uchar	new_seq[16];	/* changes from last to current */
-	uchar	*cp;
-	Hdr	*h;		/* last packet */
-	int 	i, j;
-
-	/*
-	 * Bail if this is not a compressible TCP/IP packet
-	 */
-	ip = (Iphdr*)b->rp;
-	iplen = (ip->vihl & 0xf) << 2;
-	tcp = (Tcphdr*)(b->rp + iplen);
-	tcplen = (tcp->flag[0] & 0xf0) >> 2;
-	hlen = iplen + tcplen;
-	if((tcp->flag[1] & (SYN|FIN|RST|ACK)) != ACK)
-		return Pip;	/* connection control */
-
-	/*
-	 * Packet is compressible, look for a connection
-	 */
-	changes = 0;
-	cp = new_seq;
-	j = comp->lastxmit;
-	h = &comp->t[j];
-	if(ip->src != h->ip->src || ip->dst != h->ip->dst
-	|| tcp->ports != h->tcp->ports) {
-		for(i = 0; i < MAX_STATES; ++i) {
-			j = (comp->basexmit + i) % MAX_STATES;
-			h = &comp->t[j];
-			if(ip->src == h->ip->src && ip->dst == h->ip->dst
-			&& tcp->ports == h->tcp->ports)
-				goto found;
-		}
-
-		/* no connection, reuse the oldest */
-		if(i == MAX_STATES) {
-			j = comp->basexmit;
-			j = (j + MAX_STATES - 1) % MAX_STATES;
-			comp->basexmit = j;
-			h = &comp->t[j];
-			goto raise;
-		}
-	}
-found:
-
-	/*
-	 * Make sure that only what we expect to change changed. 
-	 */
-	if(ip->vihl  != h->ip->vihl || ip->tos   != h->ip->tos ||
-	   ip->ttl   != h->ip->ttl  || ip->proto != h->ip->proto)
-		goto raise;	/* headers changed */
-	if(iplen != sizeof(Iphdr) && memcmp(ip+1, h->ip+1, iplen - sizeof(Iphdr)))
-		goto raise;	/* ip options changed */
-	if(tcplen != sizeof(Tcphdr) && memcmp(tcp+1, h->tcp+1, tcplen - sizeof(Tcphdr)))
-		goto raise;	/* tcp options changed */
-
-	if(tcp->flag[1] & URG) {
-		cp += encode(cp, nhgets(tcp->urg));
-		changes |= NEW_U;
-	} else if(memcmp(tcp->urg, h->tcp->urg, sizeof(tcp->urg)) != 0)
-		goto raise;
-	if(deltaS = nhgets(tcp->win) - nhgets(h->tcp->win)) {
-		cp += encode(cp, deltaS);
-		changes |= NEW_W;
-	}
-	if(deltaA = nhgetl(tcp->ack) - nhgetl(h->tcp->ack)) {
-		if(deltaA > 0xffff)
-			goto raise;
-		cp += encode(cp, deltaA);
-		changes |= NEW_A;
-	}
-	if(deltaS = nhgetl(tcp->seq) - nhgetl(h->tcp->seq)) {
-		if (deltaS > 0xffff)
-			goto raise;
-		cp += encode(cp, deltaS);
-		changes |= NEW_S;
-	}
-
-	/*
-	 * Look for the special-case encodings.
-	 */
-	switch(changes) {
-	case 0:
-		/*
-		 * Nothing changed. If this packet contains data and the last
-		 * one didn't, this is probably a data packet following an
-		 * ack (normal on an interactive connection) and we send it
-		 * compressed. Otherwise it's probably a retransmit,
-		 * retransmitted ack or window probe.  Send it uncompressed
-		 * in case the other side missed the compressed version.
-		 */
-		if(nhgets(ip->length) == nhgets(h->ip->length) ||
-		   nhgets(h->ip->length) != hlen)
-			goto raise;
-		break;
-	case SPECIAL_I:
-	case SPECIAL_D:
-		/*
-		 * Actual changes match one of our special case encodings --
-		 * send packet uncompressed.
-		 */
-		goto raise;
-	case NEW_S | NEW_A:
-		if (deltaS == deltaA &&
-			deltaS == nhgets(h->ip->length) - hlen) {
-			/* special case for echoed terminal traffic */
-			changes = SPECIAL_I;
-			cp = new_seq;
-		}
-		break;
-	case NEW_S:
-		if (deltaS == nhgets(h->ip->length) - hlen) {
-			/* special case for data xfer */
-			changes = SPECIAL_D;
-			cp = new_seq;
-		}
-		break;
-	}
-	deltaS = nhgets(ip->id) - nhgets(h->ip->id);
-	if(deltaS != 1) {
-		cp += encode(cp, deltaS);
-		changes |= NEW_I;
-	}
-	if (tcp->flag[1] & PSH)
-		changes |= TCP_PUSH_BIT;
-	/*
-	 * Grab the cksum before we overwrite it below. Then update our
-	 * state with this packet's header.
-	 */
-	deltaA = nhgets(tcp->cksum);
-	memmove(h->buf, b->rp, hlen);
-	h->len = hlen;
-	h->tcp = (Tcphdr*)(h->buf + iplen);
-
-	/*
-	 * We want to use the original packet as our compressed packet. (cp -
-	 * new_seq) is the number of bytes we need for compressed sequence
-	 * numbers. In addition we need one byte for the change mask, one
-	 * for the connection id and two for the tcp checksum. So, (cp -
-	 * new_seq) + 4 bytes of header are needed. hlen is how many bytes
-	 * of the original packet to toss so subtract the two to get the new
-	 * packet size. The temporaries are gross -egs.
-	 */
-	deltaS = cp - new_seq;
-	cp = b->rp;
-	if(comp->lastxmit != j || comp->compressid == 0) {
-		comp->lastxmit = j;
-		hlen -= deltaS + 4;
-		cp += hlen;
-		*cp++ = (changes | NEW_C);
-		*cp++ = j;
-	} else {
-		hlen -= deltaS + 3;
-		cp += hlen;
-		*cp++ = changes;
-	}
-	b->rp += hlen;
-	hnputs(cp, deltaA);
-	cp += 2;
-	memmove(cp, new_seq, deltaS);
-	return Pvjctcp;
-
-raise:
-	/*
-	 * Update connection state & send uncompressed packet
-	 */
-	memmove(h->buf, b->rp, hlen);
-	h->tcp = (Tcphdr*)(h->buf + iplen);
-	h->len = hlen;
-	h->ip->proto = j;
-	comp->lastxmit = j;
-	return Pvjutcp;
-}
-
-Block*
-tcpuncompress(Tcpc *comp, Block *b, ushort type, Fs *f)
-{
-	uchar	*cp, changes;
-	int	i;
-	int	iplen, len;
-	Iphdr	*ip;
-	Tcphdr	*tcp;
-	Hdr	*h;
-
-	if(type == Pvjutcp) {
-		/*
-		 *  Locate the saved state for this connection. If the state
-		 *  index is legal, clear the 'discard' flag.
-		 */
-		ip = (Iphdr*)b->rp;
-		if(ip->proto >= MAX_STATES)
-			goto raise;
-		iplen = (ip->vihl & 0xf) << 2;
-		tcp = (Tcphdr*)(b->rp + iplen);
-		comp->lastrecv = ip->proto;
-		len = iplen + ((tcp->flag[0] & 0xf0) >> 2);
-		comp->err = 0;
-netlog(f, Logcompress, "uncompressed %d\n", comp->lastrecv);
-		/*
-		 * Restore the IP protocol field then save a copy of this
-		 * packet header. The checksum is zeroed in the copy so we
-		 * don't have to zero it each time we process a compressed
-		 * packet.
-		 */
-		ip->proto = IP_TCPPROTO;
-		h = &comp->r[comp->lastrecv];
-		memmove(h->buf, b->rp, len);
-		h->tcp = (Tcphdr*)(h->buf + iplen);
-		h->len = len;
-		h->ip->cksum[0] = h->ip->cksum[1] = 0;
-		return b;
-	}
-
-	cp = b->rp;
-	changes = *cp++;
-	if(changes & NEW_C) {
-		/*
-		 * Make sure the state index is in range, then grab the
-		 * state. If we have a good state index, clear the 'discard'
-		 * flag.
-		 */
-		if(*cp >= MAX_STATES)
-			goto raise;
-		comp->err = 0;
-		comp->lastrecv = *cp++;
-netlog(f, Logcompress, "newc %d\n", comp->lastrecv);
-	} else {
-		/*
-		 * This packet has no state index. If we've had a
-		 * line error since the last time we got an explicit state
-		 * index, we have to toss the packet.
-		 */
-		if(comp->err != 0){
-			freeblist(b);
-			return nil;
-		}
-netlog(f, Logcompress, "oldc %d\n", comp->lastrecv);
-	}
-
-	/*
-	 * Find the state then fill in the TCP checksum and PUSH bit.
-	 */
-	h = &comp->r[comp->lastrecv];
-	ip = h->ip;
-	tcp = h->tcp;
-	len = h->len;
-	memmove(tcp->cksum, cp, sizeof tcp->cksum);
-	cp += 2;
-	if(changes & TCP_PUSH_BIT)
-		tcp->flag[1] |= PSH;
-	else
-		tcp->flag[1] &= ~PSH;
-	/*
-	 * Fix up the state's ack, seq, urg and win fields based on the
-	 * changemask.
-	 */
-	switch (changes & SPECIALS_MASK) {
-	case SPECIAL_I:
-		i = nhgets(ip->length) - len;
-		hnputl(tcp->ack, nhgetl(tcp->ack) + i);
-		hnputl(tcp->seq, nhgetl(tcp->seq) + i);
-		break;
-
-	case SPECIAL_D:
-		hnputl(tcp->seq, nhgetl(tcp->seq) + nhgets(ip->length) - len);
-		break;
-
-	default:
-		if(changes & NEW_U) {
-			tcp->flag[1] |= URG;
-			if(*cp == 0){
-				hnputs(tcp->urg, nhgets(cp+1));
-				cp += 3;
-			}else
-				hnputs(tcp->urg, *cp++);
-		} else
-			tcp->flag[1] &= ~URG;
-		if(changes & NEW_W)
-			DECODES(tcp->win)
-		if(changes & NEW_A)
-			DECODEL(tcp->ack)
-		if(changes & NEW_S)
-			DECODEL(tcp->seq)
-		break;
-	}
-
-	/* Update the IP ID */
-	if(changes & NEW_I)
-		DECODES(ip->id)
-	else
-		hnputs(ip->id, nhgets(ip->id) + 1);
-
-	/*
-	 *  At this point, cp points to the first byte of data in the packet.
-	 *  Back up cp by the TCP/IP header length to make room for the
-	 *  reconstructed header.
-	 *  We assume the packet we were handed has enough space to prepend
-	 *  up to 128 bytes of header.
-	 */
-	b->rp = cp;
-	if(b->rp - b->base < len){
-		b = padblock(b, len);
-		b = pullupblock(b, blocklen(b));
-	} else
-		b->rp -= len;
-	hnputs(ip->length, BLEN(b));
-	memmove(b->rp, ip, len);
-	
-	/* recompute the ip header checksum */
-	ip = (Iphdr*)b->rp;
-	hnputs(ip->cksum, ipcsum(b->rp));
-	return b;
-
-raise:
-	netlog(f, Logcompress, "Bad Packet!\n");
-	comp->err = 1;
-	freeblist(b);
-	return nil;
-}
-
-Tcpc*
-compress_init(Tcpc *c)
-{
-	int i;
-	Hdr *h;
-
-	if(c == nil){
-		c = malloc(sizeof(Tcpc));
-		if(c == nil)
-			return nil;
-	}
-	memset(c, 0, sizeof(*c));
-	for(i = 0; i < MAX_STATES; i++){
-		h = &c->t[i];
-		h->ip = (Iphdr*)h->buf;
-		h->tcp = (Tcphdr*)(h->buf + 10);
-		h->len = 20;
-		h = &c->r[i];
-		h->ip = (Iphdr*)h->buf;
-		h->tcp = (Tcphdr*)(h->buf + 10);
-		h->len = 20;
-	}
-
-	return c;
-}
-
-ushort
-compress(Tcpc *tcp, Block *b, Fs *f)
-{
-	Iphdr		*ip;
-
-	/*
-	 * Bail if this is not a compressible IP packet
-	 */
-	ip = (Iphdr*)b->rp;
-	if((nhgets(ip->frag) & 0x3fff) != 0)
-		return Pip;
-
-	switch(ip->proto) {
-	case IP_TCPPROTO:
-		return tcpcompress(tcp, b, f);
-	default:
-		return Pip;
-	}
-}
-
-int
-compress_negotiate(Tcpc *tcp, uchar *data)
-{
-	if(data[0] != MAX_STATES - 1)
-		return -1;
-	tcp->compressid = data[1];
-	return 0;
-}
--- a/os/ip/devip.c
+++ b/os/ip/devip.c
@@ -14,7 +14,6 @@
 	Qbootp,
 	Qndb,
 	Qiproute,
-	Qiprouter,
 	Qipselftab,
 	Qlog,
 
@@ -43,11 +42,11 @@
 	Maskproto=	(1<<Logproto)-1,
 	Shiftproto=	Logtype + Logconv,
 
-	Nfs=		32,
+	Nfs=		128,
 };
-#define TYPE(x) 	( ((u32)(x).path) & Masktype )
-#define CONV(x) 	( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) 	( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) 	( ((ulong)(x).path) & Masktype )
+#define CONV(x) 	( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((ulong)(x).path) >> Shiftproto) & Maskproto )
 #define QID(p, c, y) 	( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
 
 static char network[] = "network";
@@ -58,8 +57,7 @@
 
 extern	void nullmediumlink(void);
 extern	void pktmediumlink(void);
-static	long ndbwrite(Fs*, char*, ulong, int);
-extern void    closeconv(Conv*);
+	long ndbwrite(Fs *f, char *a, ulong off, int n);
 
 static int
 ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
 		mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
 		devdir(c, q, "stats", 0, network, 0444, dp);
 		return 1;
-	}	
+	}
 	return -1;
 }
 
@@ -144,11 +142,10 @@
 		return -1;
 	case Qarp:
 		p = "arp";
+		prot = 0664;
 		break;
 	case Qbootp:
 		p = "bootp";
-		if(bootp == nil)
-			return 0;
 		break;
 	case Qndb:
 		p = "ndb";
@@ -157,14 +154,12 @@
 		break;
 	case Qiproute:
 		p = "iproute";
+		prot = 0664;
 		break;
 	case Qipselftab:
 		p = "ipselftab";
 		prot = 0444;
 		break;
-	case Qiprouter:
-		p = "iprouter";
-		break;
 	case Qlog:
 		p = "log";
 		break;
@@ -188,7 +183,7 @@
 	case Qtopdir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
@@ -206,19 +201,18 @@
 	case Qndb:
 	case Qlog:
 	case Qiproute:
-	case Qiprouter:
 	case Qipselftab:
 		return ip1gen(c, TYPE(c->qid), dp);
 	case Qprotodir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
 		if(s < f->p[PROTO(c->qid)]->ac) {
 			cv = f->p[PROTO(c->qid)]->conv[s];
-			sprint(up->genbuf, "%d", s);
+			snprint(up->genbuf, sizeof up->genbuf, "%d", s);
 			mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
 			devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
 			return 1;
@@ -262,45 +256,14 @@
 	fmtinstall('M', eipfmt);
 }
 
-static Fs*
-ipgetfs(int dev)
-{
-	extern void (*ipprotoinit[])(Fs*);
-	Fs *f;
-	int i;
-
-	if(dev >= Nfs)
-		return nil;
-
-	qlock(&fslock);
-	if(ipfs[dev] == nil){
-		f = smalloc(sizeof(Fs));
-		ip_init(f);
-		arpinit(f);
-		netloginit(f);
-		for(i = 0; ipprotoinit[i]; i++)
-			ipprotoinit[i](f);
-		f->dev = dev;
-		ipfs[dev] = f;
-	}
-	qunlock(&fslock);
-
-	return ipfs[dev];
-}
-
 IPaux*
 newipaux(char *owner, char *tag)
 {
 	IPaux *a;
-	int n;
 
 	a = smalloc(sizeof(*a));
 	kstrdup(&a->owner, owner);
-	memset(a->tag, ' ', sizeof(a->tag));
-	n = strlen(tag);
-	if(n > sizeof(a->tag))
-		n = sizeof(a->tag);
-	memmove(a->tag, tag, n);
+	strncpy(a->tag, tag, sizeof(a->tag));
 	return a;
 }
 
@@ -310,13 +273,29 @@
 ipattach(char* spec)
 {
 	Chan *c;
-	int dev;
+	ulong dev;
 
-	dev = atoi(spec);
+	dev = strtoul(spec, nil, 10);
 	if(dev >= Nfs)
-		error("bad specification");
+		error(Enodev);
 
-	ipgetfs(dev);
+	qlock(&fslock);
+	if(ipfs[dev] == nil){
+		extern void (*ipprotoinit[])(Fs*);
+		Fs *f;
+		int i;
+
+		f = smalloc(sizeof(Fs));
+		ip_init(f);
+		arpinit(f);
+		netloginit(f);
+		for(i = 0; ipprotoinit[i]; i++)
+			ipprotoinit[i](f);
+		f->dev = dev;
+		ipfs[dev] = f;
+	}
+	qunlock(&fslock);
+
 	c = devattach('I', spec);
 	mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
 	c->dev = dev;
@@ -327,7 +306,7 @@
 }
 
 static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
 {
 	IPaux *a = c->aux;
 	Walkqid* w;
@@ -338,8 +317,9 @@
 	return w;
 }
 
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
 {
 	return devstat(c, db, n, nil, 0, ipgen);
 }
@@ -360,7 +340,7 @@
 };
 
 static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
 {
 	Conv *cv, *nc;
 	Proto *p;
@@ -375,7 +355,7 @@
 	default:
 		break;
 	case Qndb:
-		if(omode & (OWRITE|OTRUNC) && !iseve())
+		if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
 			error(Eperm);
 		if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
 			f->ndb[0] = 0;
@@ -383,10 +363,10 @@
 	case Qlog:
 		netlogopen(f);
 		break;
-	case Qiprouter:
-		iprouteropen(f);
-		break;
 	case Qiproute:
+	case Qarp:
+		if(omode != OREAD && !iseve())
+			error(Eperm);
 		break;
 	case Qtopdir:
 	case Qprotodir:
@@ -412,13 +392,8 @@
 	case Qclone:
 		p = f->p[PROTO(c->qid)];
 		qlock(p);
-		if(waserror()){
-			qunlock(p);
-			nexterror();
-		}
 		cv = Fsprotoclone(p, ATTACHER(c));
 		qunlock(p);
-		poperror();
 		if(cv == nil) {
 			error(Enodev);
 			break;
@@ -437,15 +412,12 @@
 			qunlock(p);
 			nexterror();
 		}
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
-		}
-		cv->inuse++;
-		if(cv->inuse == 1){
+		if(++cv->inuse == 1){
 			kstrdup(&cv->owner, ATTACHER(c));
 			cv->perm = 0660;
 		}
@@ -455,24 +427,26 @@
 		break;
 	case Qlisten:
 		cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
-
+		qlock(cv);
+		if(waserror()){
+			qunlock(cv);
+			nexterror();
 		}
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
 		if(cv->state != Announced)
 			error("not announced");
 
+		cv->inuse++;
+		qunlock(cv);
+		poperror();
 		if(waserror()){
 			closeconv(cv);
 			nexterror();
 		}
-		qlock(cv);
-		cv->inuse++;
-		qunlock(cv);
 
 		nc = nil;
 		while(nc == nil) {
@@ -494,7 +468,6 @@
 			if(nc != nil){
 				cv->incall = nc->next;
 				mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
-				kstrdup(&cv->owner, ATTACHER(c));
 			}
 			qunlock(cv);
 
@@ -511,13 +484,25 @@
 	return c;
 }
 
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
 {
-	Dir *d;
+	error(Eperm);
+	return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+	error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+	Dir *dir;
 	Conv *cv;
 	Fs *f;
-	Proto *p;
 
 	f = ipfs[c->dev];
 	switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
 		break;
 	}
 
-	d = smalloc(sizeof(*d)+n);
+	dir = smalloc(sizeof(Dir)+n);
 	if(waserror()){
-		free(d);
+		free(dir);
 		nexterror();
 	}
-	n = convM2D(dp, n, d, (char*)&d[1]);
+	n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
 	if(n == 0)
 		error(Eshortstat);
-	p = f->p[PROTO(c->qid)];
-	cv = p->conv[CONV(c->qid)];
-	if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+	cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+	qlock(cv);
+	if(waserror()){
+		qunlock(cv);
+		nexterror();
+	}
+	if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
 		error(Eperm);
-	if(!emptystr(d->uid))
-		kstrdup(&cv->owner, d->uid);
-	if(d->mode != ~0UL)
-		cv->perm = d->mode & 0777;
+	if(!emptystr(dir->uid)){
+		if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+			error(Eperm);
+		kstrdup(&cv->owner, dir->uid);
+	}
+	if(dir->mode != ~0UL)
+		cv->perm = dir->mode & 0666;
+	qunlock(cv);
 	poperror();
-	free(d);
+
+	free(dir);
+	poperror();
+
 	return n;
 }
 
-extern void
+void
 closeconv(Conv *cv)
 {
 	Conv *nc;
@@ -564,7 +561,7 @@
 	}
 
 	/* close all incoming calls since no listen will ever happen */
-	for(nc = cv->incall; nc; nc = cv->incall){
+	for(nc = cv->incall; nc != nil; nc = cv->incall){
 		cv->incall = nc->next;
 		closeconv(nc);
 	}
@@ -576,9 +573,9 @@
 	while((mp = cv->multi) != nil)
 		ipifcremmulti(cv, mp->ma, mp->ia);
 
-	cv->r = nil;
-	cv->rgen = 0;
-	cv->p->close(cv);
+	if(cv->p->close != nil)
+		(*cv->p->close)(cv);
+
 	cv->state = Idle;
 	qunlock(cv);
 }
@@ -596,10 +593,6 @@
 		if(c->flag & COPEN)
 			netlogclose(f);
 		break;
-	case Qiprouter:
-		if(c->flag & COPEN)
-			iprouterclose(f);
-		break;
 	case Qdata:
 	case Qctl:
 	case Qerr:
@@ -620,13 +613,13 @@
 	Statelen=	32*1024,
 };
 
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
 	char *buf, *p;
-	s32 rv;
+	long rv;
 	Fs *f;
 	ulong offset = off;
 
@@ -648,21 +641,22 @@
 		return readstr(offset, a, n, f->ndb);
 	case Qiproute:
 		return routeread(f, a, offset, n);
-	case Qiprouter:
-		return iprouterread(f, a, n);
 	case Qipselftab:
 		return ipselftabread(f, a, offset, n);
 	case Qlog:
 		return netlogread(f, a, offset, n);
 	case Qctl:
-		sprint(up->genbuf, "%ud", CONV(ch->qid));
-		return readstr(offset, p, n, up->genbuf);
+		buf = smalloc(16);
+		snprint(buf, 16, "%lud", CONV(ch->qid));
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
 	case Qremote:
 		buf = smalloc(Statelen);
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->remote == nil) {
-			sprint(buf, "%I!%d\n", c->raddr, c->rport);
+			snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
 		} else {
 			(*x->remote)(c, buf, Statelen-2);
 		}
@@ -674,7 +668,7 @@
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->local == nil) {
-			sprint(buf, "%I!%d\n", c->laddr, c->lport);
+			snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
 		} else {
 			(*x->local)(c, buf, Statelen-2);
 		}
@@ -711,7 +705,7 @@
 }
 
 static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
 {
 	Conv *c;
 	Proto *x;
@@ -740,7 +734,7 @@
 /*
  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
  */
-static char*
+char*
 setluniqueport(Conv* c, int lport)
 {
 	Proto *p;
@@ -771,51 +765,63 @@
 }
 
 /*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+	int x;
+
+	for(x = 0; x < p->nc && p->conv[x]; x++)
+		if(p->conv[x]->lport == lport)
+			return 1;
+	return 0;
+}
+
+/*
  *  pick a local port and set it
  */
-extern void
+char *
 setlport(Conv* c)
 {
 	Proto *p;
-	ushort *pp;
-	int x, found;
+	int i, port;
 
 	p = c->p;
-	if(c->restricted)
-		pp = &p->nextrport;
-	else
-		pp = &p->nextport;
 	qlock(p);
-	for(;;(*pp)++){
+	if(c->restricted){
+		/* Restricted ports cycle between 600 and 1024. */
+		for(i=0; i<1024-600; i++){
+			if(p->nextrport >= 1024 || p->nextrport < 600)
+				p->nextrport = 600;
+			port = p->nextrport++;
+			if(!lportinuse(p, port))
+				goto chosen;
+		}
+	}else{
 		/*
-		 * Fsproto initialises p->nextport to 0 and the restricted
-		 * ports (p->nextrport) to 600.
-		 * Restricted ports must lie between 600 and 1024.
-		 * For the initial condition or if the unrestricted port number
-		 * has wrapped round, select a random port between 5000 and 1<<15
-		 * to start at.
+		 * Unrestricted ports are chosen randomly
+		 * between 2^15 and 2^16.  There are at most
+		 * 4*Nchan = 4096 ports in use at any given time,
+		 * so even in the worst case, a random probe has a
+		 * 1 - 4096/2^15 = 87% chance of success.
+		 * If 64 successive probes fail, there is a bug somewhere
+		 * (or a once in 10^58 event has happened, but that's
+		 * less likely than a venti collision).
 		 */
-		if(c->restricted){
-			if(*pp >= 1024)
-				*pp = 600;
+		for(i=0; i<64; i++){
+			port = (1<<15) + nrand(1<<15);
+			if(!lportinuse(p, port))
+				goto chosen;
 		}
-		else while(*pp < 5000)
-			*pp = nrand(1<<15);
-
-		found = 0;
-		for(x = 0; x < p->nc; x++){
-			if(p->conv[x] == nil)
-				break;
-			if(p->conv[x]->lport == *pp){
-				found = 1;
-				break;
-			}
-		}
-		if(found == 0)
-			break;
 	}
-	c->lport = (*pp)++;
 	qunlock(p);
+	return "no ports available";
+
+chosen:
+	c->lport = port;
+	qunlock(p);
+	return nil;
 }
 
 /*
@@ -822,7 +828,7 @@
  *  set a local address and port from a string of the form
  *	[address!]port[!r]
  */
-static char*
+char*
 setladdrport(Conv* c, char* str, int announcing)
 {
 	char *p;
@@ -830,8 +836,6 @@
 	ushort lport;
 	uchar addr[IPaddrlen];
 
-	rv = nil;
-
 	/*
 	 *  ignore restricted part if it exists.  it's
 	 *  meaningless on local ports.
@@ -854,8 +858,9 @@
 		if(strcmp(str, "*") == 0)
 			ipmove(c->laddr, IPnoaddr);
 		else {
-			parseip(addr, str);
-			if(ipforme(c->p->f, addr))
+			if(parseip(addr, str) == -1)
+				return Ebadip;
+			if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
 				ipmove(c->laddr, addr);
 			else
 				return "not a local IP address";
@@ -869,9 +874,13 @@
 		return setluniqueport(c, 0);
 	}
 
-	lport = atoi(p);
+	str = p;
+	lport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
 	if(lport <= 0)
-		setlport(c);
+		rv = setlport(c);
 	else
 		rv = setluniqueport(c, lport);
 	return rv;
@@ -886,13 +895,17 @@
 	if(p == nil)
 		return "malformed address";
 	*p++ = 0;
-	parseip(c->raddr, str);
-	c->rport = atoi(p);
-	p = strchr(p, '!');
-	if(p){
-		if(strstr(p, "!r") != nil)
-			c->restricted = 1;
-	}
+	if(parseip(c->raddr, str) == -1)
+		return Ebadip;
+
+	str = p;
+	c->rport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
+	if(strstr(p, "!r") != nil)
+		c->restricted = 1;
+
 	return nil;
 }
 
@@ -912,7 +925,9 @@
 		if(p != nil)
 			return p;
 		setladdr(c);
-		setlport(c);
+		p = setlport(c);
+		if (p != nil)
+			return p;
 		break;
 	case 3:
 		p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
 			return p;
 	}
 
-	if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-		memcmp(c->laddr, v4prefix, IPv4off) == 0)
-		|| ipcmp(c->raddr, IPnoaddr) == 0)
-		c->ipversion = V4;
-	else
-		c->ipversion = V6;
+	c->ipversion = convipvers(c);
 
 	return nil;
 }
@@ -978,10 +988,11 @@
 	c->rport = 0;
 	switch(argc){
 	default:
-		return "bad args to announce";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 1);
 	}
+	return "bad args to announce";
 }
 
 /*
@@ -1028,10 +1039,11 @@
 {
 	switch(argc){
 	default:
-		return "bad args to bind";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 0);
 	}
+	return "bad args to bind";
 }
 
 static void
@@ -1042,7 +1054,7 @@
 	if(x->bind == nil)
 		p = Fsstdbind(c, cb->f, cb->nf);
 	else
-		p = x->bind(c, cb->f, cb->nf);
+		p = (*x->bind)(c, cb->f, cb->nf);
 	if(p != nil)
 		error(p);
 }
@@ -1065,8 +1077,8 @@
 		c->ttl = atoi(cb->f[1]);
 }
 
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
@@ -1075,6 +1087,7 @@
 	uchar ia[IPaddrlen], ma[IPaddrlen];
 	Fs *f;
 	char *a;
+	ulong offset = off;
 
 	a = v;
 	f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
 		netlogctl(f, a, n);
 		return n;
 	case Qndb:
-		return ndbwrite(f, a, off, n);
+		return ndbwrite(f, a, offset, n);
+		break;
 	case Qctl:
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
 			if(cb->nf == 2){
 				if(!ipismulticast(c->raddr))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
+				if (parseip(ia, cb->f[1]) == -1)
+					error(Ebadip);
 				ipifcaddmulti(c, c->raddr, ia);
 			} else {
-				parseip(ma, cb->f[2]);
+				if (parseip(ia, cb->f[1]) == -1 ||
+				    parseip(ma, cb->f[2]) == -1)
+					error(Ebadip);
 				if(!ipismulticast(ma))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
 				ipifcaddmulti(c, ma, ia);
 			}
 		} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
 				error("remmulti needs interface address");
 			if(!ipismulticast(c->raddr))
 				error("remmulti for a non multicast address");
-			parseip(ia, cb->f[1]);
+			if (parseip(ia, cb->f[1]) == -1)
+				error(Ebadip);
 			ipifcremmulti(c, c->raddr, ia);
 		} else if(x->ctl != nil) {
-			p = x->ctl(c, cb->f, cb->nf);
+			p = (*x->ctl)(c, cb->f, cb->nf);
 			if(p != nil)
 				error(p);
 		} else
@@ -1160,13 +1177,12 @@
 	return n;
 }
 
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
 {
 	Conv *c;
 	Proto *x;
 	Fs *f;
-	int n;
 
 	switch(TYPE(ch->qid)){
 	case Qdata:
@@ -1177,11 +1193,7 @@
 		if(c->wq == nil)
 			error(Eperm);
 
-		if(bp->next)
-			bp = concatblock(bp);
-		n = BLEN(bp);
-		qbwrite(c->wq, bp);
-		return n;
+		return qbwrite(c->wq, bp);
 	default:
 		return devbwrite(ch, bp, offset);
 	}
@@ -1198,13 +1210,13 @@
 	ipwalk,
 	ipstat,
 	ipopen,
-	devcreate,
+	ipcreate,
 	ipclose,
 	ipread,
 	ipbread,
 	ipwrite,
 	ipbwrite,
-	devremove,
+	ipremove,
 	ipwstat,
 };
 
@@ -1224,12 +1236,15 @@
 
 	p->qid.type = QTDIR;
 	p->qid.path = QID(f->np, 0, Qprotodir);
+	if(p->nc > Maskconv+1){
+		print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+		p->nc = Maskconv+1;
+	}
 	p->conv = malloc(sizeof(Conv*)*(p->nc+1));
 	if(p->conv == nil)
 		panic("Fsproto");
 
 	p->x = f->np;
-	p->nextport = 0;
 	p->nextrport = 600;
 	f->p[f->np++] = p;
 
@@ -1262,21 +1277,33 @@
 		if(c == nil){
 			c = malloc(sizeof(Conv));
 			if(c == nil)
-				error(Enomem);
-			qlock(c);
+				return nil;
+			if(waserror()){
+				qfree(c->rq);
+				qfree(c->wq);
+				qfree(c->eq);
+				qfree(c->sq);
+				free(c->ptcl);
+				free(c);
+				return nil;
+			}
 			c->p = p;
 			c->x = pp - p->conv;
 			if(p->ptclsize != 0){
 				c->ptcl = malloc(p->ptclsize);
-				if(c->ptcl == nil) {
-					free(c);
+				if(c->ptcl == nil)
 					error(Enomem);
-				}
 			}
-			*pp = c;
-			p->ac++;
 			c->eq = qopen(1024, Qmsg, 0, 0);
+			if(c->eq == nil)
+				error(Enomem);
 			(*p->create)(c);
+			if(c->rq == nil || c->wq == nil)
+				error(Enomem);
+			poperror();
+			qlock(c);
+			*pp = c;
+			p->ac++;
 			break;
 		}
 		if(canqlock(c)){
@@ -1291,8 +1318,11 @@
 		}
 	}
 	if(pp >= ep) {
-		if(p->gc != nil && (*p->gc)(p))
-			goto retry;
+		if(p->gc != nil){
+			print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+			if((*p->gc)(p))
+				goto retry;
+		}
 		return nil;
 	}
 
@@ -1307,8 +1337,9 @@
 	c->lport = 0;
 	c->rport = 0;
 	c->restricted = 0;
+	c->ignoreadvice = 0;
 	c->ttl = MAXTTL;
-	c->tos = DFLTTOS;
+	c->tos = 0;
 	qreopen(c->rq);
 	qreopen(c->wq);
 	qreopen(c->eq);
@@ -1321,7 +1352,7 @@
 Fsconnected(Conv* c, char* msg)
 {
 	if(msg != nil && *msg != '\0')
-		kstrcpy(c->cerr, msg, sizeof(c->cerr));
+		strncpy(c->cerr, msg, ERRMAX-1);
 
 	switch(c->state){
 
@@ -1368,12 +1399,19 @@
 	for(l = &c->incall; *l; l = &(*l)->next)
 		i++;
 	if(i >= Maxincall) {
+		static int beenhere;
+
 		qunlock(c);
+		if (!beenhere) {
+			beenhere = 1;
+			print("Fsnewcall: incall queue full (%d) on port %d\n",
+				i, c->lport);
+		}
 		return nil;
 	}
 
 	/* find a free conversation */
-	nc = Fsprotoclone(c->p, network);
+	nc = Fsprotoclone(c->p, c->owner);
 	if(nc == nil) {
 		qunlock(c);
 		return nil;
@@ -1394,12 +1432,12 @@
 	return nc;
 }
 
-static long
+long
 ndbwrite(Fs *f, char *a, ulong off, int n)
 {
 	if(off > strlen(f->ndb))
 		error(Eio);
-	if(off+n >= sizeof(f->ndb)-1)
+	if(off+n >= sizeof(f->ndb))
 		error(Eio);
 	memmove(f->ndb+off, a, n);
 	f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
 ulong
 scalednconv(void)
 {
-	if(conf.npage*BY2PG >= 128*MB)
+	if(cpuserver && conf.npage*BY2PG >= 128*MB)
 		return Nchans*4;
 	return Nchans;
 }
--- a/os/ip/dhcp.c
+++ /dev/null
@@ -1,447 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-#include "ppp.h"
-
-Ipaddr pppdns[2];
-
-static	ulong	fsip;
-static	ulong	auip;
-static	ulong	gwip;
-static	ulong	ipmask;
-static	ulong	ipaddr;
-static	ulong	dns1ip;
-static	ulong	dns2ip;
-
-int		dhcpmsgtype;
-int		debug=0;
-enum
-{
-	Bootrequest = 1,
-	Bootreply   = 2,
-};
-
-typedef struct Bootp
-{
-	/* udp.c oldheader */
-	uchar	raddr[IPaddrlen];
-	uchar	laddr[IPaddrlen];
-	uchar	rport[2];
-	uchar	lport[2];
-	/* bootp itself */
-	uchar	op;			/* opcode */
-	uchar	htype;		/* hardware type */
-	uchar	hlen;			/* hardware address len */
-	uchar	hops;		/* hops */
-	uchar	xid[4];		/* a random number */
-	uchar	secs[2];		/* elapsed snce client started booting */
-	uchar	flags[2];		/* flags */
-	uchar	ciaddr[4];		/* client IP address (client tells server) */
-	uchar	yiaddr[4];		/* client IP address (server tells client) */
-	uchar	siaddr[4];		/* server IP address */
-	uchar	giaddr[4];		/* gateway IP address */
-	uchar	chaddr[16];	/* client hardware address */
-	uchar	sname[64];	/* server host name (optional) */
-	uchar	file[128];		/* boot file name */
-	uchar	vend[128];	/* vendor-specific goo 340 */
-} Bootp;
-
-static	Bootp	req;
-static	Proc*	rcvprocp;
-static	int	recv;
-static	int	done;
-static	Rendez	bootpr;
-static	char	rcvbuf[512+2*IPaddrlen+2*2];	  /* 576 */
-static	uchar sid[4];
-static	ulong iplease;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d
- * dns1ip	d.d.d.d
- * dns2ip	d.d.d.d
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
-	Last change:  SUN  13 Sep 2001    4:36 pm
- */
-
-/*
- * Parse the vendor specific fields according to RFC 1084.
- * We are overloading the "cookie server" to be the Inferno 
- * authentication server and the "resource location server"
- * to be the Inferno file server.
- *
- * If the vendor specific field is formatted properly, it
- * will being with the four bytes 99.130.83.99 and end with
- * an 0xFF byte.
- */
-static int
-parsevend(uchar* pvend)
-{	
-	uchar *vend=pvend;
-	int dhcpmsg=0;
-	/* The field must start with 99.130.83.99 to be compliant */
-	if ((vend[0] != 99) || (vend[1] != 130) || (vend[2] != 83) || (vend[3] != 99)){
-		print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
-		return -1;
-	}
-
-	/* Skip over the magic cookie */
-	vend += 4;
-
-	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
-		int i;
-//	
-		if(debug){
-			print(">>>Opt[%d] [%d]", vend[0], vend[1]);
-			for(i=0; i<vend[1]; i++)
-				print(" %2.2x", vend[i+2]);
-			print("\n");
-		}
-//
-		switch (vend[0]) {
-		case 1:	/* Subnet mask field */
-			/* There must be only one subnet mask */
-			if (vend[1] == 4)
-				ipmask = (vend[2]<<24)|(vend[3]<<16)| (vend[4]<<8)| vend[5];
-			else{ 
-				return -1;
-			}
-			break;
-
-		case 3:	/* Gateway/router field */
-			/* We are only concerned with first address */
-			if (vend[1] >0 && vend[1]%4==0)
-				gwip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
-			else 
-				return -1;
-			break;
-		case 6:	/* domain name server */
-			if(vend[1]>0 && vend[1] %4==0){
-				dns1ip=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
-				if(vend[1]>4)
-					dns2ip=(vend[6]<<24)|(vend[7]<<16)|(vend[8]<<8)|vend[9];
-			}else
-				return -1;
-			break;
-
-		case 8:	/* "Cookie server" (auth server) field */
-			/* We are only concerned with first address */
-			if (vend[1] > 0 && vend[1]%4==0)
-				auip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
-			else
-				return -1;
-			break;
-
-		case 11:	/* "Resource loc server" (file server) field */
-			/* We are only concerned with first address */
-			if (vend[1] > 0 && vend[1]%4==0)
-				fsip = (vend[2]<<24)| (vend[3]<<16)| (vend[4]<<8)| vend[5];
-			else
-				return -1;
-			break;
-		case 51:	/* ip lease time */
-			if(vend[1]==4){
-				iplease=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
-			}else
-				return -1;
-			break;
-		case 53:	/* DHCP message type */
-			if(vend[1]==1)
-				dhcpmsg=vend[2];
-			else
-				return -1;
-			break;
-		case 54:	/* server identifier */
-			if(vend[1]==4){
-				memmove(sid, vend+2, 4);
-			}else
-				return -1;
-			break;
-
-		default:	/* Everything else stops us */
-			break;
-		}
-
-		/* Skip over the field */
-		vend += vend[1] + 2;
-	}
-	if(debug)
-		print(">>>Opt[%d] [%d]\n", vend[0], vend[1]);
-	return dhcpmsg;
-}
-
-static void
-dispvend(uchar* pvend)
-{	
-	uchar *vend=pvend;
-
-	//print("<<<Magic : %2.2x%2.2x%2.2x%2.2x\n", vend[0], vend[1], vend[2], vend[3]);
-	
-	vend += 4;		/* Skip over the magic cookie */
-	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
-	//	int i;
-	  //	print("<<<Opt[%d] [%d]", vend[0], vend[1]);
-		//for(i=0; i<vend[1]; i++)
-		//	print(" %2.2x", vend[i+2]);
-		//print("\n");
-	
-		vend += vend[1] + 2;
-	}
-	//print("<<<Opt[ %2.2x] [%2.2x]\n", vend[0], vend[1]);
-}
-
-static void
-rcvbootp(void *a)
-{
-	int n, fd, dhcp;
-	Bootp *rp;
-
-	if(waserror())
-		pexit("", 0);
-	rcvprocp = up;	/* store for postnote below */
-	fd = (int)a;
-	while(done == 0) {
-		if(debug)
-			print("rcvbootp:looping\n");
-
-		n = kread(fd, rcvbuf, sizeof(rcvbuf));
-		if(n <= 0)
-			break;
-		rp = (Bootp*)rcvbuf;
-		if (memcmp(req.chaddr, rp->chaddr, 6) == 0 && rp->htype == 1 && rp->hlen == 6) {
-			ipaddr = (rp->yiaddr[0]<<24)| (rp->yiaddr[1]<<16)| (rp->yiaddr[2]<<8)| rp->yiaddr[3];
-			if(debug)
-				print("ipaddr = %2.2x %2.2x %2.2x %2.2x \n", rp->yiaddr[0], rp->yiaddr[1], rp->yiaddr[2], rp->yiaddr[3]);
-			//memmove(req.siaddr, rp->siaddr, 4);	/* siaddr */
-			dhcp = parsevend(rp->vend);
-	
-			if(dhcpmsgtype < dhcp){
-				dhcpmsgtype=dhcp;
-				recv = 1;
-				wakeup(&bootpr);
-				if(dhcp==0 || dhcp ==5 || dhcp == 6 )
-					break;
-			}
-		}
-	}
-	poperror();
-	rcvprocp = nil;
-
-	if(debug)
-		print("rcvbootp exit\n");
-	pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
-	int cfd, dfd, tries, n;
-	char ia[5+3*16], im[16], *av[3];
-	uchar nipaddr[4], ngwip[4], nipmask[4];
-	char dir[Maxpath];
-	static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
-	uchar *vend;
-
-	/*
-	 * broadcast bootp's till we get a reply,
-	 * or fixed number of tries
-	 */
-	if(debug)
-	    print("dhcp: bootp() called\n");
-	tries = 0;
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	cfd = kannounce("udp!*!68", dir);
-	if(cfd < 0)
-		return "dhcp announce failed";
-	strcat(dir, "/data");
-	if(kwrite(cfd, "headers", 7) < 0){
-		kclose(cfd);
-		return "dhcp ctl headers failed";
-	}
-	kwrite(cfd, "oldheaders", 10);
-	dfd = kopen(dir, ORDWR);
-	if(dfd < 0){
-		kclose(cfd);
-		return "dhcp open data failed";
-	}
-	kclose(cfd);
-	
-	while(tries<1){
-		tries++;
-		memset(sid, 0, 4);
-		iplease=0;
-		dhcpmsgtype=-2;
-/* DHCPDISCOVER*/
-		done = 0;
-		recv = 0;
-		kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
-		/* Prepare DHCPDISCOVER */	
-		memset(&req, 0, sizeof(req));
-		ipmove(req.raddr, IPv4bcast);
-		hnputs(req.rport, 67);
-		req.op = Bootrequest;
-		req.htype = 1;			/* ethernet (all we know) */
-		req.hlen = 6;			/* ethernet (all we know) */
-		
-		memmove(req.chaddr, ifc->mac, 6);	/* Hardware MAC address */
-		//ipv4local(ifc, req.ciaddr);				/* Fill in the local IP address if we know it */
-		memset(req.file, 0, sizeof(req.file));
-		vend=req.vend;
-		memmove(vend, vend_rfc1048, 4); vend+=4;
-		*vend++=53; *vend++=1;*vend++=1;		/* dhcp msg type==3, dhcprequest */
-		
-		*vend++=61;*vend++=7;*vend++=1;
-		memmove(vend, ifc->mac, 6);vend+=6;
-		*vend=0xff;
-
-		if(debug)
-			dispvend(req.vend); 
-		for(n=0;n<4;n++){
-			if(kwrite(dfd, &req, sizeof(req))<0)	/* SEND DHCPDISCOVER */
-				print("DHCPDISCOVER: %r");
-		
-			tsleep(&bootpr, return0, 0, 1000);	/* wait DHCPOFFER */
-			if(debug)
-				print("[DHCP] DISCOVER: msgtype = %d\n", dhcpmsgtype);
-
-			if(dhcpmsgtype==2)		/* DHCPOFFER */
-				break;
-			else if(dhcpmsgtype==0)	/* bootp */
-				return nil;
-			else if(dhcpmsgtype== -2)	/* time out */
-				continue;
-			else
-				break;
-			
-		}
-		if(dhcpmsgtype!=2)
-			continue;
-
-/* DHCPREQUEST */	
-		memset(req.vend, 0, sizeof(req.vend));
-		vend=req.vend;
-		memmove(vend, vend_rfc1048, 4);vend+=4;	
-
-		*vend++=53; *vend++=1;*vend++=3;		/* dhcp msg type==3, dhcprequest */
-
-		*vend++=50;	*vend++=4;				/* requested ip address */
-		*vend++=(ipaddr >> 24)&0xff;
-		*vend++=(ipaddr >> 16)&0xff;
-		*vend++=(ipaddr >> 8) & 0xff;
-		*vend++=ipaddr & 0xff;
-
-		*vend++=51;*vend++=4;					/* lease time */
-		*vend++=(iplease>>24)&0xff; *vend++=(iplease>>16)&0xff; *vend++=(iplease>>8)&0xff; *vend++=iplease&0xff;
-
-		*vend++=54; *vend++=4;					/* server identifier */
-		memmove(vend, sid, 4);	vend+=4;
-	
-		*vend++=61;*vend++=07;*vend++=01;		/* client identifier */
-		memmove(vend, ifc->mac, 6);vend+=6;
-		*vend=0xff;
-		if(debug) 
-			dispvend(req.vend); 
-		if(kwrite(dfd, &req, sizeof(req))<0){
-			print("DHCPREQUEST: %r");
-			continue;
-		}
-		tsleep(&bootpr, return0, 0, 2000);
-		if(dhcpmsgtype==5)		/* wait for DHCPACK */
-			break;
-		else
-			continue;
-		/* CHECK ARP */
-		/* DHCPDECLINE */
-	}
-	kclose(dfd);
-	done = 1;
-	if(rcvprocp != nil){
-		postnote(rcvprocp, 1, "timeout", 0);
-		rcvprocp = nil;
-	}
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcrem(ifc, av, 3);
-
-	hnputl(nipaddr, ipaddr);
-	sprint(ia, "%V", nipaddr);
-	hnputl(nipmask, ipmask);
-	sprint(im, "%V", nipmask);
-	av[1] = ia;
-	av[2] = im;
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	if(gwip != 0) {
-		hnputl(ngwip, gwip);
-		n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
-		routewrite(ifc->conv->p->f, nil, ia, n);
-	}
-	return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
-	int n, i;
-	char *buf;
-	uchar a[4];
-
-	if(debug)
-		print("dhcp: bootpread() \n");
-	buf = smalloc(READSTR);
-	if(waserror()){
-		free(buf);
-		nexterror();
-	}
-
-	hnputl(a, fsip);
-	n = snprint(buf, READSTR, "fsip %15V\n", a);
-	hnputl(a, auip);
-	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
-	hnputl(a, gwip);
-	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
-	hnputl(a, ipmask);
-	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
-	hnputl(a, ipaddr);
-	n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
-	n += snprint(buf+n, READSTR-n, "expired %lud\n", iplease);
-
-	n += snprint(buf + n, READSTR-n, "dns");
-	if(dns2ip){
-		hnputl(a, dns2ip);
-		n+=snprint(buf + n, READSTR-n, " %15V", a);
-	}
-	if(dns1ip){
-		hnputl(a, dns1ip);
-		n += snprint(buf + n, READSTR-n, " %15V", a);
-	}
-
-	for(i=0; i<2; i++)
-		if(ipcmp(pppdns[i], IPnoaddr) != 0 && ipcmp(pppdns[i], v4prefix) != 0)
-			n += snprint(buf + n, READSTR-n, " %15I", pppdns[i]);
-
-	snprint(buf + n, READSTR-n, "\n");
-	len = readstr(offset, bp, len, buf);
-	poperror();
-	free(buf);
-	return len;
-}
-
-char*	(*bootp)(Ipifc*) = rbootp;
-int	(*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/eipconvtest.c
+++ /dev/null
@@ -1,152 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-enum
-{
-	Isprefix= 16,
-};
-
-uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-uchar v4prefix[16] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0, 0, 0, 0
-};
-
-void
-hnputl(void *p, ulong v)
-{
-	uchar *a;
-
-	a = p;
-	a[0] = v>>24;
-	a[1] = v>>16;
-	a[2] = v>>8;
-	a[3] = v;
-}
-
-int
-eipconv(va_list *arg, Fconv *f)
-{
-	char buf[8*5];
-	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
-	static char *ifmt = "%d.%d.%d.%d";
-	uchar *p, ip[16];
-	ulong *lp;
-	ushort s;
-	int i, j, n, eln, eli;
-
-	switch(f->chr) {
-	case 'E':		/* Ethernet address */
-		p = va_arg(*arg, uchar*);
-		sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-		break;
-	case 'I':		/* Ip address */
-		p = va_arg(*arg, uchar*);
-common:
-		if(memcmp(p, v4prefix, 12) == 0)
-			sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
-		else {
-			/* find longest elision */
-			eln = eli = -1;
-			for(i = 0; i < 16; i += 2){
-				for(j = i; j < 16; j += 2)
-					if(p[j] != 0 || p[j+1] != 0)
-						break;
-				if(j > i && j - i > eln){
-					eli = i;
-					eln = j - i;
-				}
-			}
-
-			/* print with possible elision */
-			n = 0;
-			for(i = 0; i < 16; i += 2){
-				if(i == eli){
-					n += sprint(buf+n, "::");
-					i += eln;
-					if(i >= 16)
-						break;
-				} else if(i != 0)
-					n += sprint(buf+n, ":");
-				s = (p[i]<<8) + p[i+1];
-				n += sprint(buf+n, "%ux", s);
-			}
-		}
-		break;
-	case 'i':		/* v6 address as 4 longs */
-		lp = va_arg(*arg, ulong*);
-		for(i = 0; i < 4; i++)
-			hnputl(ip+4*i, *lp++);
-		p = ip;
-		goto common;
-	case 'V':		/* v4 ip address */
-		p = va_arg(*arg, uchar*);
-		sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
-		break;
-	case 'M':		/* ip mask */
-		p = va_arg(*arg, uchar*);
-
-		/* look for a prefix mask */
-		for(i = 0; i < 16; i++)
-			if(p[i] != 0xff)
-				break;
-		if(i < 16){
-			if((prefixvals[p[i]] & Isprefix) == 0)
-				goto common;
-			for(j = i+1; j < 16; j++)
-				if(p[j] != 0)
-					goto common;
-			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
-		} else
-			n = 8*16;
-
-		/* got one, use /xx format */
-		sprint(buf, "/%d", n);
-		break;
-	default:
-		strcpy(buf, "(eipconv)");
-	}
-	strconv(buf, f);
-	return sizeof(uchar*);
-}
-
-uchar testvec[11][16] =
-{
- { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
- { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
- { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
- { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
-};
-
-void
-main(void)
-{
-	int i;
-
-	fmtinstall('I', eipconv);
-	fmtinstall('M', eipconv);
-	for(i = 0; i < 11; i++)
-		print("%I\n%M\n", testvec[i], testvec[i]);
-	exits(0);
-}
--- a/os/ip/esp.c
+++ b/os/ip/esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ *	currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ *	transport mode (host-to-host)
+ */
 #include	"u.h"
 #include	"../port/lib.h"
 #include	"mem.h"
@@ -6,47 +14,79 @@
 #include	"../port/error.h"
 
 #include	"ip.h"
+#include	"ipv6.h"
+#include	<libsec.h>
 
-#include	"libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by)  ((by) * BI2BY)
 
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
 typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
 typedef struct Esptail Esptail;
 typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
 
-#define DPRINT if(0)print
+enum {
+	Encrypt,
+	Decrypt,
 
-enum
-{
-	IP_ESPPROTO	= 50,
-	EsphdrSize	= 28,	// includes IP header
-	IphdrSize	= 20,	// options have been striped
-	EsptailSize	= 2,	// does not include pad or auth data
-	UserhdrSize	= 4,	// user visable header size - if enabled
+	IP_ESPPROTO	= 50,	/* IP v4 and v6 protocol number */
+	Esp4hdrlen	= IP4HDR + 8,
+	Esp6hdrlen	= IP6HDR + 8,
+
+	Esptaillen	= 2,	/* does not include pad or auth data */
+	Userhdrlen	= 4,	/* user-visible header size - if enabled */
+
+	Desblk	 = BITS2BYTES(64),
+	Des3keysz = BITS2BYTES(192),
+
+	Aesblk	 = BITS2BYTES(128),
+	Aeskeysz = BITS2BYTES(128),
 };
 
 struct Esphdr
 {
-	/* ip header */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+	uchar	payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ *	 enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ *	orig IP hdrs | ESP hdr |
+ *			enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+	/* ipv4 header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	espproto;	/* Protocol */
 	uchar	espplen[2];	/* Header plus data length */
 	uchar	espsrc[4];	/* Ip source */
 	uchar	espdst[4];	/* Ip destination */
 
-	/* esp header */
-	uchar	espspi[4];	/* Security parameter index */
-	uchar	espseq[4];	/* Sequence number */
+	Esphdr;
 };
 
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+	IPV6HDR;
+	Esphdr;
+};
+
 struct Esptail
 {
 	uchar	pad;
@@ -53,16 +93,28 @@
 	uchar	nexthdr;
 };
 
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+	ulong	version;
+	ulong	iphdrlen;
+	ulong	hdrlen;		/* iphdrlen + esp hdr len */
+	ulong	spi;
+	uchar	laddr[IPaddrlen];
+	uchar	raddr[IPaddrlen];
+};
+
 /* header as seen by the user */
 struct Userhdr
 {
-	uchar	nexthdr;	// next protocol
+	uchar	nexthdr;	/* next protocol */
 	uchar	unused[3];
 };
 
 struct Esppriv
 {
-	ulong	in;
+	uvlong	in;
 	ulong	inerrors;
 };
 
@@ -72,77 +124,68 @@
 struct Espcb
 {
 	int	incoming;
-	int	header;		// user user level header
+	int	header;		/* user-level header */
 	ulong	spi;
-	ulong	seq;		// last seq sent
-	ulong	window;		// for replay attacks
+	ulong	seq;		/* last seq sent */
+	ulong	window;		/* for replay attacks */
+
 	char	*espalg;
-	void	*espstate;	// other state for esp
-	int	espivlen;	// in bytes
+	void	*espstate;	/* other state for esp */
+	int	espivlen;	/* in bytes */
 	int	espblklen;
 	int	(*cipher)(Espcb*, uchar *buf, int len);
+
 	char	*ahalg;
-	void	*ahstate;	// other state for esp
-	int	ahlen;		// auth data length in bytes
+	void	*ahstate;	/* other state for esp */
+	int	ahlen;		/* auth data length in bytes */
 	int	ahblklen;
 	int	(*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+	DigestState *ds;
 };
 
 struct Algorithm
 {
 	char 	*name;
-	int	keylen;		// in bits
-	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+	int	keylen;		/* in bits */
+	void	(*init)(Espcb*, char* name, uchar *key, unsigned keylen);
 };
 
-
-enum {
-	RC4forward	= 10*1024*1024,	// maximum skip forward
-	RC4back = 100*1024,		// maximum look back
-};
-
-struct Esprc4
-{
-	ulong cseq;	// current byte sequence number
-	RC4state current;
-
-	int ovalid;	// old is valid
-	ulong lgseq; // last good sequence
-	ulong oseq;	// old byte sequence number
-	RC4state old;
-};
-
 static	Conv* convlookup(Proto *esp, ulong spi);
 static	char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static	void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
 static	void espkick(void *x);
 
+static	void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static	void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
 static Algorithm espalg[] =
 {
-	"null",			0,	nullespinit,
-	"des_56_cbc",		64,	desespinit,
-	"rc4_128",		128,	rc4espinit,
-	nil,			0,	nil,
+	"null",		0,	nullespinit,
+	"des3_cbc",	192,	des3espinit,	/* new rfc2451, des-ede3 */
+	"aes_128_cbc",	128,	aescbcespinit,	/* new rfc3602 */
+	"aes_ctr",	128,	aesctrespinit,	/* new rfc3686 */
+	"des_56_cbc",	64,	desespinit,	/* rfc2405, deprecated */
+	nil,		0,	nil,
 };
 
 static Algorithm ahalg[] =
 {
-	"null",			0,	nullahinit,
-	"hmac_sha1_96",		128,	shaahinit,
-	"hmac_md5_96",		128,	md5ahinit,
-	nil,			0,	nil,
+	"null",		0,	nullahinit,
+	"hmac_sha1_96",	128,	shaahinit,	/* rfc2404 */
+	"hmac_md5_96",	128,	md5ahinit,	/* rfc2403 */
+	nil,		0,	nil,
 };
 
 static char*
 espconnect(Conv *c, char **argv, int argc)
 {
-	char *p, *pp;
-	char *e = nil;
+	char *p, *pp, *e = nil;
 	ulong spi;
 	Espcb *ecb = (Espcb*)c->ptcl;
 
@@ -157,7 +200,10 @@
 			break;
 		}
 		*p++ = 0;
-		parseip(c->raddr, argv[1]);
+		if (parseip(c->raddr, argv[1]) == -1) {
+			e = Ebadip;
+			break;
+		}
 		findlocalip(c->p->f, c->laddr, c->raddr);
 		ecb->incoming = 0;
 		ecb->seq = 0;
@@ -215,26 +261,86 @@
 	ipmove(c->raddr, IPnoaddr);
 
 	ecb = (Espcb*)c->ptcl;
-	free(ecb->espstate);
-	free(ecb->ahstate);
+	secfree(ecb->espstate);
+	secfree(ecb->ahstate);
 	memset(ecb, 0, sizeof(Espcb));
 }
 
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+	if (*bpp == nil || BLEN(*bpp) == 0) {
+		/* get enough to identify the IP version */
+		*bpp = pullupblock(*bpp, IP4HDR);
+		if(*bpp == nil) {
+			netlog(f, Logesp, "esp: short packet\n");
+			return 0;
+		}
+	}
+	return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
 static void
+getverslens(int version, Versdep *vp)
+{
+	vp->version = version;
+	switch(vp->version) {
+	case V4:
+		vp->iphdrlen = IP4HDR;
+		vp->hdrlen   = Esp4hdrlen;
+		break;
+	case V6:
+		vp->iphdrlen = IP6HDR;
+		vp->hdrlen   = Esp6hdrlen;
+		break;
+	default:
+		panic("esp: getverslens version %d wrong", version);
+	}
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+
+	switch(vp->version) {
+	case V4:
+		eh4 = (Esp4hdr*)pkt;
+		v4tov6(vp->raddr, eh4->espsrc);
+		v4tov6(vp->laddr, eh4->espdst);
+		vp->spi = nhgetl(eh4->espspi);
+		break;
+	case V6:
+		eh6 = (Esp6hdr*)pkt;
+		ipmove(vp->raddr, eh6->src);
+		ipmove(vp->laddr, eh6->dst);
+		vp->spi = nhgetl(eh6->espspi);
+		break;
+	default:
+		panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+	}
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
 espkick(void *x)
 {
+	int nexthdr, payload, pad, align;
+	uchar *auth;
+	Block *bp;
 	Conv *c = x;
-	Esphdr *eh;
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Espcb *ecb;
 	Esptail *et;
 	Userhdr *uh;
-	Espcb *ecb;
-	Block *bp;
-	int nexthdr;
-	int payload;
-	int pad;
-	int align;
-	uchar *auth;
+	Versdep vers;
 
+	getverslens(convipvers(c), &vers);
 	bp = qget(c->wq);
 	if(bp == nil)
 		return;
@@ -244,7 +350,7 @@
 
 	if(ecb->header) {
 		/* make sure the message has a User header */
-		bp = pullupblock(bp, UserhdrSize);
+		bp = pullupblock(bp, Userhdrlen);
 		if(bp == nil) {
 			qunlock(c);
 			return;
@@ -251,15 +357,16 @@
 		}
 		uh = (Userhdr*)bp->rp;
 		nexthdr = uh->nexthdr;
-		bp->rp += UserhdrSize;
+		bp->rp += Userhdrlen;
 	} else {
-		nexthdr = 0;  // what should this be?
+		nexthdr = 0;	/* what should this be? */
 	}
 
 	payload = BLEN(bp) + ecb->espivlen;
 
 	/* Make space to fit ip header */
-	bp = padblock(bp, EsphdrSize + ecb->espivlen);
+	bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+	getpktspiaddrs(bp->rp, &vers);
 
 	align = 4;
 	if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
 		align = ecb->espblklen;
 	if(align % ecb->ahblklen != 0)
 		panic("espkick: ahblklen is important after all");
-	pad = (align-1) - (payload + EsptailSize-1)%align;
+	pad = (align-1) - (payload + Esptaillen-1)%align;
 
 	/*
 	 * Make space for tail
@@ -273,70 +380,88 @@
 	 * this is done by calling padblock with a negative size
 	 * Padblock does not change bp->wp!
 	 */
-	bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
-	bp->wp += pad+EsptailSize+ecb->ahlen;
+	bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+	bp->wp += pad+Esptaillen+ecb->ahlen;
 
-	eh = (Esphdr *)(bp->rp);
-	et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
 
-	// fill in tail
+	/* fill in tail */
 	et->pad = pad;
 	et->nexthdr = nexthdr;
 
-	ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
-	auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+	/* encrypt the payload */
+	ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+	auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
 
-	// fill in head
-	eh->vihl = IP_VER4;
-	hnputl(eh->espspi, ecb->spi);
-	hnputl(eh->espseq, ++ecb->seq);
-	v6tov4(eh->espsrc, c->laddr);
-	v6tov4(eh->espdst, c->raddr);
-	eh->espproto = IP_ESPPROTO;
-	eh->frag[0] = 0;
-	eh->frag[1] = 0;
+	/* fill in head; construct a new IP header and an ESP header */
+	if (vers.version == V4) {
+		eh4 = (Esp4hdr *)bp->rp;
+		eh4->vihl = IP_VER4;
+		v6tov4(eh4->espsrc, c->laddr);
+		v6tov4(eh4->espdst, c->raddr);
+		eh4->espproto = IP_ESPPROTO;
+		eh4->frag[0] = 0;
+		eh4->frag[1] = 0;
 
-	ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+		hnputl(eh4->espspi, ecb->spi);
+		hnputl(eh4->espseq, ++ecb->seq);
+	} else {
+		eh6 = (Esp6hdr *)bp->rp;
+		eh6->vcf[0] = IP_VER6;
+		ipmove(eh6->src, c->laddr);
+		ipmove(eh6->dst, c->raddr);
+		eh6->proto = IP_ESPPROTO;
 
+		hnputl(eh6->espspi, ecb->spi);
+		hnputl(eh6->espseq, ++ecb->seq);
+	}
+
+	/* compute secure hash */
+	ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+		payload + pad + Esptaillen, auth);
+
 	qunlock(c);
-	//print("esp: pass down: %uld\n", BLEN(bp));
-	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	/* print("esp: pass down: %uld\n", BLEN(bp)); */
+	if (vers.version == V4)
+		ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	else
+		ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
 }
 
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
 void
 espiput(Proto *esp, Ipifc*, Block *bp)
 {
-	Esphdr *eh;
-	Esptail *et;
-	Userhdr *uh;
+	int payload, nexthdr;
+	uchar *auth, *espspi;
 	Conv *c;
 	Espcb *ecb;
-	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	Esptail *et;
 	Fs *f;
-	uchar *auth;
-	ulong spi;
-	int payload, nexthdr;
+	Userhdr *uh;
+	Versdep vers;
 
 	f = esp->f;
 
-	bp = pullupblock(bp, EsphdrSize+EsptailSize);
+	getverslens(pktipvers(f, &bp), &vers);
+
+	bp = pullupblock(bp, vers.hdrlen + Esptaillen);
 	if(bp == nil) {
 		netlog(f, Logesp, "esp: short packet\n");
 		return;
 	}
+	getpktspiaddrs(bp->rp, &vers);
 
-	eh = (Esphdr*)(bp->rp);
-	spi = nhgetl(eh->espspi);
-	v4tov6(raddr, eh->espsrc);
-	v4tov6(laddr, eh->espdst);
-
 	qlock(esp);
 	/* Look for a conversation structure for this port */
-	c = convlookup(esp, spi);
+	c = convlookup(esp, vers.spi);
 	if(c == nil) {
 		qunlock(esp);
-		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		icmpnoconv(f, bp);
 		freeblist(bp);
 		return;
@@ -346,76 +471,83 @@
 	qunlock(esp);
 
 	ecb = c->ptcl;
-	// too hard to do decryption/authentication on block lists
-	if(bp->next)
+	/* too hard to do decryption/authentication on block lists */
+	if(bp->next != nil)
 		bp = concatblock(bp);
 
-	if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+	if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	eh = (Esphdr*)(bp->rp);
 	auth = bp->wp - ecb->ahlen;
-	if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+	espspi = vers.version == V4?	((Esp4hdr*)bp->rp)->espspi:
+					((Esp6hdr*)bp->rp)->espspi;
+
+	/* compute secure hash and authenticate */
+	if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
 		qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
-			laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
-	if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+	payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+	if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
-			laddr, spi, payload, BLEN(bp));
+		netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+			vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
 		freeb(bp);
 		return;
 	}
-	if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+	/* decrypt payload */
+	if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
 		qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
-			laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+			vers.raddr, vers.laddr, vers.spi, up->errstr);
 		freeb(bp);
 		return;
 	}
 
-	payload -= EsptailSize;
-	et = (Esptail*)(bp->rp + EsphdrSize + payload);
+	payload -= Esptaillen;
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload);
 	payload -= et->pad + ecb->espivlen;
 	nexthdr = et->nexthdr;
 	if(payload <= 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+			vers.raddr, vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	// trim packet
-	bp->rp += EsphdrSize + ecb->espivlen;
+	/* trim packet */
+	bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
 	bp->wp = bp->rp + payload;
 	if(ecb->header) {
-		// assume UserhdrSize < EsphdrSize
-		bp->rp -= UserhdrSize;
+		/* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+		bp->rp -= Userhdrlen;
 		uh = (Userhdr*)bp->rp;
-		memset(uh, 0, UserhdrSize);
+		memset(uh, 0, Userhdrlen);
 		uh->nexthdr = nexthdr;
 	}
 
+	/* ingress filtering here? */
+
 	if(qfull(c->rq)){
-		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeblist(bp);
 	}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
-		qpass(c->rq, bp);
+//		print("esp: pass up: %uld\n", BLEN(bp));
+		qpass(c->rq, bp);	/* pass packet up the read queue */
 	}
 
 	qunlock(c);
@@ -440,19 +572,19 @@
 	return e;
 }
 
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
 void
 espadvise(Proto *esp, Block *bp, char *msg)
 {
-	Esphdr *h;
 	Conv *c;
-	ulong spi;
+	Versdep vers;
 
-	h = (Esphdr*)(bp->rp);
+	getverslens(pktipvers(esp->f, &bp), &vers);
+	getpktspiaddrs(bp->rp, &vers);
 
-	spi = nhgets(h->espspi);
 	qlock(esp);
-	c = convlookup(esp, spi);
-	if(c != nil) {
+	c = convlookup(esp, vers.spi);
+	if(c != nil && !c->ignoreadvice) {
 		qhangup(c->rq, msg);
 		qhangup(c->wq, msg);
 	}
@@ -466,7 +598,7 @@
 	Esppriv *upriv;
 
 	upriv = esp->priv;
-	return snprint(buf, len, "%lud %lud\n",
+	return snprint(buf, len, "%llud %lud\n",
 		upriv->in,
 		upriv->inerrors);
 }
@@ -520,10 +652,10 @@
 setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
 {
 	uchar *key;
-	int i, nbyte, nchar;
-	int c;
+	int c, nbyte, nchar;
+	uint i;
 
-	if(n < 2)
+	if(n < 2 || n > 3)
 		return "bad format";
 	for(; alg->name; alg++)
 		if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
 	if(alg->name == nil)
 		return "unknown algorithm";
 
-	if(n != 3)
-		return "bad format";
 	nbyte = (alg->keylen + 7) >> 3;
-	nchar = strlen(f[2]);
+	if (n == 2)
+		nchar = 0;
+	else
+		nchar = strlen(f[2]);
+	if(nchar != 2 * nbyte)			/* TODO: maybe < is ok */
+		return "key not required length";
+	/* convert hex digits from ascii, in place */
 	for(i=0; i<nchar; i++) {
 		c = f[2][i];
 		if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
 		else if(c >= 'A' && c <= 'F')
 			f[2][i] -= 'A'-10;
 		else
-			return "bad character in key";
+			return "non-hex character in key";
 	}
-	key = smalloc(nbyte);
-	for(i=0; i<nchar && i*2<nbyte; i++) {
+	/* collapse hex digits into complete bytes in reverse order in key */
+	key = secalloc(nbyte);
+	for(i = 0; i < nchar && i/2 < nbyte; i++) {
 		c = f[2][nchar-i-1];
 		if(i&1)
 			c <<= 4;
-		key[i>>1] |= c;
+		key[i/2] |= c;
 	}
-
+	memset(f[2], 0, nchar);
 	alg->init(ecb, alg->name, key, alg->keylen);
-	free(key);
+	secfree(key);
 	return nil;
 }
 
+
+/*
+ * null encryption
+ */
+
 static int
 nullcipher(Espcb*, uchar*, int)
 {
@@ -566,7 +708,7 @@
 }
 
 static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->espalg = name;
 	ecb->espblklen = 1;
@@ -581,7 +723,7 @@
 }
 
 static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
 	ecb->auth = nullauth;
 }
 
-void
+
+/*
+ * sha1
+ */
+
+static void
 seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
 	DigestState *digest;
-	uchar innerhash[SHA1dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = sha1(ipad, 64, nil, nil);
+	digest = sha1(ipad, Hmacblksz, nil, nil);
 	sha1(t, tlen, innerhash, digest);
-	digest = sha1(opad, 64, nil, nil);
+	digest = sha1(opad, Hmacblksz, nil, nil);
 	sha1(innerhash, SHA1dlen, hash, digest);
 }
 
@@ -615,11 +759,11 @@
 static int
 shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
 {
-	uchar hash[SHA1dlen];
 	int r;
+	uchar hash[SHA1dlen];
 
 	memset(hash, 0, SHA1dlen);
-	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -626,40 +770,162 @@
 }
 
 static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("shaahinit: bad keylen");
-	klen >>= 8;	// convert to bytes
+	klen /= BI2BY;
 
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = shaauth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aeskeysz], ivec[Aeskeysz];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aeskeysz);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aescbccipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aesblk], ivec[Aesblk];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aesblk);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aesctrcipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
 seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
 	DigestState *digest;
-	uchar innerhash[MD5dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = md5(ipad, 64, nil, nil);
+	digest = md5(ipad, Hmacblksz, nil, nil);
 	md5(t, tlen, innerhash, digest);
-	digest = md5(opad, 64, nil, nil);
+	digest = md5(opad, Hmacblksz, nil, nil);
 	md5(innerhash, MD5dlen, hash, digest);
 }
 
@@ -670,7 +936,7 @@
 	int r;
 
 	memset(hash, 0, MD5dlen);
-	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -677,168 +943,102 @@
 }
 
 static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("md5ahinit: bad keylen");
-	klen >>= 3;	// convert to bytes
-
-
+	klen = BITS2BYTES(klen);
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = md5auth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
+
+/*
+ * des, single and triple
+ */
+
 static int
 descipher(Espcb *ecb, uchar *p, int n)
 {
-	uchar tmp[8];
-	uchar *pp, *tp, *ip, *eip, *ep;
 	DESstate *ds = ecb->espstate;
 
-	ep = p + n;
 	if(ecb->incoming) {
-		memmove(ds->ivec, p, 8);
-		p += 8;
-		while(p < ep){
-			memmove(tmp, p, 8);
-			block_cipher(ds->expanded, p, 1);
-			tp = tmp;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; ){
-				*p++ ^= *ip;
-				*ip++ = *tp++;
-			}
-		}
+		memmove(ds->ivec, p, Desblk);
+		desCBCdecrypt(p + Desblk, n - Desblk, ds);
 	} else {
-		memmove(p, ds->ivec, 8);
-		for(p += 8; p < ep; p += 8){
-			pp = p;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; )
-				*pp++ ^= *ip++;
-			block_cipher(ds->expanded, p, 0);
-			memmove(ds->ivec, p, 8);
-		}
+		memmove(p, ds->ivec, Desblk);
+		desCBCencrypt(p + Desblk, n - Desblk, ds);
 	}
 	return 1;
 }
-	
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+	DES3state *ds = ecb->espstate;
+
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, Desblk);
+		des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+	} else {
+		memmove(p, ds->ivec, Desblk);
+		des3CBCencrypt(p + Desblk, n - Desblk, ds);
+	}
+	return 1;
+}
+
 static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	uchar key[8];
-	uchar ivec[8];
-	int i;
-	
-	// bits to bytes
-	n = (n+7)>>3;
-	if(n > 8)
-		n = 8;
+	uchar key[Desblk], ivec[Desblk];
+
+	n = BITS2BYTES(n);
+	if(n > Desblk)
+		n = Desblk;
 	memset(key, 0, sizeof(key));
 	memmove(key, k, n);
-	for(i=0; i<8; i++)
-		ivec[i] = nrand(256);
+	prng(ivec, Desblk);
 	ecb->espalg = name;
-	ecb->espblklen = 8;
-	ecb->espivlen = 8;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
+
 	ecb->cipher = descipher;
-	ecb->espstate = smalloc(sizeof(DESstate));
+	ecb->espstate = secalloc(sizeof(DESstate));
 	setupDESstate(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	Esprc4 *esprc4;
-	RC4state tmpstate;
-	ulong seq;
-	long d, dd;
+	uchar key[3][Desblk], ivec[Desblk];
 
-	if(n < 4)
-		return 0;
+	n = BITS2BYTES(n);
+	if(n > Des3keysz)
+		n = Des3keysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Desblk);
+	ecb->espalg = name;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
 
-	esprc4 = ecb->espstate;
-	if(ecb->incoming) {
-		seq = nhgetl(p);
-		p += 4;
-		n -= 4;
-		d = seq-esprc4->cseq;
-		if(d == 0) {
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq += n;
-			if(esprc4->ovalid) {
-				dd = esprc4->cseq - esprc4->lgseq;
-				if(dd > RC4back)
-					esprc4->ovalid = 0;
-			}
-		} else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
-			// this link is hosed
-			if(d > RC4forward) {
-				strcpy(up->errstr, "rc4cipher: skipped too much");
-				return 0;
-			}
-			esprc4->lgseq = seq;
-			if(!esprc4->ovalid) {
-				esprc4->ovalid = 1;
-				esprc4->oseq = esprc4->cseq;
-				memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
-			}
-			rc4skip(&esprc4->current, d);
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq = seq+n;
-		} else {
-print("reordered packet: %uld %ld\n", seq, d);
-			dd = seq - esprc4->oseq;
-			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
-				strcpy(up->errstr, "rc4cipher: too far back");
-				return 0;
-			}
-			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
-			rc4skip(&tmpstate, dd);
-			rc4(&tmpstate, p, n);
-			return 1;
-		}
-
-		// move old state up
-		if(esprc4->ovalid) {
-			dd = esprc4->cseq - RC4back - esprc4->oseq;
-			if(dd > 0) {
-				rc4skip(&esprc4->old, dd);
-				esprc4->oseq += dd;
-			}
-		}
-	} else {
-		hnputl(p, esprc4->cseq);
-		p += 4;
-		n -= 4;
-		rc4(&esprc4->current, p, n);
-		esprc4->cseq += n;
-	}
-	return 1;
+	ecb->cipher = des3cipher;
+	ecb->espstate = secalloc(sizeof(DES3state));
+	setupDES3state(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{	
-	Esprc4 *esprc4;
 
-	// bits to bytes
-	n = (n+7)>>3;
-	esprc4 = smalloc(sizeof(Esprc4));
-	memset(esprc4, 0, sizeof(Esprc4));
-	setupRC4state(&esprc4->current, k, n);
-	ecb->espalg = name;
-	ecb->espblklen = 4;
-	ecb->espivlen = 4;
-	ecb->cipher = rc4cipher;
-	ecb->espstate = esprc4;
-}
-	
+/*
+ * interfacing to devip
+ */
 void
 espinit(Fs *fs)
 {
--- a/os/ip/ethermedium.c
+++ b/os/ip/ethermedium.c
@@ -5,9 +5,9 @@
 #include "fns.h"
 #include "../port/error.h"
 
+#include "../port/netif.h"
 #include "ip.h"
 #include "ipv6.h"
-#include "kernel.h"
 
 typedef struct Etherhdr Etherhdr;
 struct Etherhdr
@@ -18,10 +18,10 @@
 };
 
 static uchar ipbroadcast[IPaddrlen] = {
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
 	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
 };
 
 static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
 static void	etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
 static void	etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
 static void	etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void	etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 static Block*	multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
 static void	sendarp(Ipifc *ifc, Arpent *a);
-static void	sendgarp(Ipifc *ifc, uchar*);
+static void	sendndp(Ipifc *ifc, Arpent *a);
 static int	multicastea(uchar *ea, uchar *ip);
 static void	recvarpproc(void*);
-static void	resolveaddr6(Ipifc *ifc, Arpent *a);
 static void	etherpref2addr(uchar *pref, uchar *ea);
 
 Medium ethermedium =
@@ -53,8 +53,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -70,8 +69,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -94,9 +92,6 @@
  */
 enum
 {
-	ETARP		= 0x0806,
-	ETIP4		= 0x0800,
-	ETIP6		= 0x86DD,
 	ARPREQUEST	= 1,
 	ARPREPLY	= 2,
 };
@@ -127,128 +122,92 @@
 static void
 etherbind(Ipifc *ifc, int argc, char **argv)
 {
-	Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
-	char addr[Maxpath];	//char addr[2*KNAMELEN];
-	char dir[Maxpath];	//char dir[2*KNAMELEN];
-	char *buf;
-	int fd, cfd, n;
-	char *ptr;
+	char addr[Maxpath], dir[Maxpath];
 	Etherrock *er;
+	Chan *c;
+	int n;
 
 	if(argc < 2)
 		error(Ebadarg);
 
-	mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
-	buf = nil;
+	/*
+	 *  get mac address
+	 */
+	snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+	c = namec(addr, Aopen, OREAD, 0);
 	if(waserror()){
-		if(mchan4 != nil)
-			cclose(mchan4);
-		if(cchan4 != nil)
-			cclose(cchan4);
-		if(achan != nil)
-			cclose(achan);
-		if(mchan6 != nil)
-			cclose(mchan6);
-		if(cchan6 != nil)
-			cclose(cchan6);
-		if(buf != nil)
-			free(buf);
-		nexterror(); 
+		cclose(c);
+		nexterror();
 	}
+	n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+	if(n < 0)
+		error(Eio);
+	addr[n] = 0;
+	if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+		error("could not find mac address");
+	cclose(c);
+	poperror();
 
+	er = smalloc(sizeof(*er));
+	er->read4p = er->read6p = er->arpp = (void*)-1;
+	er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+	er->f = ifc->conv->p->f;
+
+	if(waserror()){
+		if(er->mchan4 != nil)
+			cclose(er->mchan4);
+		if(er->cchan4 != nil)
+			cclose(er->cchan4);
+		if(er->mchan6 != nil)
+			cclose(er->mchan6);
+		if(er->cchan6 != nil)
+			cclose(er->cchan6);
+		if(er->achan != nil)
+			cclose(er->achan);
+		free(er);
+		nexterror();
+	}
+
 	/*
-	 *  open ip converstation
+	 *  open ipv4 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x800 failed: %s", up->env->errstr);
-	mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);	/* ETIP4 */
+	er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
 
 	/*
-	 *  get mac address and speed
-	 */
-	snprint(addr, sizeof(addr), "%s/stats", dir);
-	fd = kopen(addr, OREAD);
-	if(fd < 0)
-		errorf("can't open ether stats: %s", up->env->errstr);
-
-	buf = smalloc(512);
-	n = kread(fd, buf, 511);
-	kclose(fd);
-	if(n <= 0)
-		error(Eio);
-	buf[n] = 0;
-
-	ptr = strstr(buf, "addr: ");
-	if(!ptr)
-		error(Eio);
-	ptr += 6;
-	parsemac(ifc->mac, ptr, 6);
-
-	ptr = strstr(buf, "mbps: ");
-	if(ptr){
-		ptr += 6;
-		ifc->mbps = atoi(ptr);
-	} else
-		ifc->mbps = 100;
-
-	/*
- 	 *  open arp conversation
-	 */
-	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
-	fd = kdial(addr, nil, nil, nil);
-	if(fd < 0)
-		errorf("dial 0x806 failed: %s", up->env->errstr);
-	achan = commonfdtochan(fd, ORDWR, 0, 1);
-	kclose(fd);
-
-	/*
-	 *  open ip conversation
+	 *  open ipv6 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x86DD failed: %s", up->env->errstr);
-	mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);	/* ETIP6 */
+	er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
 
-	er = smalloc(sizeof(*er));
-	er->mchan4 = mchan4;
-	er->cchan4 = cchan4;
-	er->achan = achan;
-	er->mchan6 = mchan6;
-	er->cchan6 = cchan6;
-	er->f = ifc->conv->p->f;
-	ifc->arg = er;
-
-	free(buf);
+	/*
+ 	 *  open arp conversation
+	 */
+	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);	/* ETARP */
+	er->achan = chandial(addr, nil, nil, nil);
 	poperror();
 
-	kproc("etherread4", etherread4, ifc, 0);
-	kproc("recvarpproc", recvarpproc, ifc, 0);
-	kproc("etherread6", etherread6, ifc, 0);
+	ifc->arg = er;
+
+	kproc("etherread4", etherread4, ifc);
+	kproc("etherread6", etherread6, ifc);
+	kproc("recvarpproc", recvarpproc, ifc);
 }
 
 /*
@@ -259,21 +218,35 @@
 {
 	Etherrock *er = ifc->arg;
 
-	if(er->read4p)
+	while(waserror())
+		;
+
+	/* wait for readers to start */
+	while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->read4p != nil)
 		postnote(er->read4p, 1, "unbind", 0);
-	if(er->read6p)
+	if(er->read6p != nil)
 		postnote(er->read6p, 1, "unbind", 0);
-	if(er->arpp)
+	if(er->arpp != nil)
 		postnote(er->arpp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for readers to die */
-	while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+	while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan4 != nil)
 		cclose(er->mchan4);
-	if(er->achan != nil)
-		cclose(er->achan);
 	if(er->cchan4 != nil)
 		cclose(er->cchan4);
 	if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
 		cclose(er->mchan6);
 	if(er->cchan6 != nil)
 		cclose(er->cchan6);
+	if(er->achan != nil)
+		cclose(er->achan);
 
 	free(er);
 }
@@ -297,16 +272,16 @@
 
 	/* get mac address of destination */
 	a = arpget(er->f->arp, bp, version, ifc, ip, mac);
-	if(a){
+	if(a != nil){
 		/* check for broadcast or multicast */
 		bp = multicastarp(er->f, a, ifc->m, mac);
-		if(bp==nil){
+		if(bp == nil){
 			switch(version){
 			case V4:
 				sendarp(ifc, a);
 				break;
-			case V6: 
-				resolveaddr6(ifc, a);
+			case V6:
+				sendndp(ifc, a);
 				break;
 			default:
 				panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
 
 	/* make it a single block with space for the ether header */
 	bp = padblock(bp, ifc->m->hsize);
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 	eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read4p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read4p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput4(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read4p = nil;
+	pexit("hangup", 1);
 }
 
 
@@ -397,29 +369,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read6p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read6p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput6(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read6p = nil;
+	pexit("hangup", 1);
 }
 
 static void
@@ -477,6 +448,7 @@
 	Block *bp;
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
+	uchar targ[IPv4addrlen], src[IPv4addrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
 		return;
 	}
 
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+
 	/* remove all but the last message */
 	while((bp = a->hold) != nil){
 		if(bp == a->last)
@@ -492,18 +467,20 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
+	memmove(targ, a->ip+IPv4off, IPv4addrlen);
 	arprelease(er->f->arp, a);
 
+	if(!ipv4local(ifc, src, 0, targ))
+		return;
+
 	n = sizeof(Etherarp);
-	if(n < a->type->mintu)
-		n = a->type->mintu;
+	if(n < ifc->m->mintu)
+		n = ifc->m->mintu;
 	bp = allocb(n);
 	memset(bp->rp, 0, n);
 	e = (Etherarp*)bp->rp;
-	memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
-	ipv4local(ifc, e->spa);
+	memmove(e->tpa, targ, sizeof(e->tpa));
+	memmove(e->spa, src, sizeof(e->spa));
 	memmove(e->sha, ifc->mac, sizeof(e->sha));
 	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
 	memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("arp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
 {
-	int sflag;
 	Block *bp;
 	Etherrock *er = ifc->arg;
-	uchar ipsrc[IPaddrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
-	a->rtime = NOW + ReTransTimer;
-	if(a->rxtsrem <= 0) {
-		arprelease(er->f->arp, a);
-		return;
-	}
-
-	a->rxtsrem--;
-	arprelease(er->f->arp, a);
-
-	if(sflag = ipv6anylocal(ifc, ipsrc)) 
-		icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+	ndpsendsol(er->f, ifc, a);	/* unlocks arp */
 }
 
 /*
@@ -569,10 +530,6 @@
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
 
-	/* don't arp for our initial non address */
-	if(ipcmp(ip, IPnoaddr) == 0)
-		return;
-
 	n = sizeof(Etherarp);
 	if(n < ifc->m->mintu)
 		n = ifc->m->mintu;
@@ -593,15 +550,13 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("garp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
 recvarp(Ipifc *ifc)
 {
-	int n;
+	int n, forme;
 	Block *ebp, *rbp;
 	Etherarp *e, *r;
 	uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
 	Etherrock *er = ifc->arg;
 
 	ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
-	if(ebp == nil) {
-		print("arp: rcv: %r\n");
+	if(ebp == nil)
 		return;
-	}
 
+	rlock(ifc);
+
 	e = (Etherarp*)ebp->rp;
 	switch(nhgets(e->op)) {
 	default:
@@ -620,9 +575,13 @@
 		break;
 
 	case ARPREPLY:
+		/* make sure not to enter multi/broadcat address */
+		if(e->sha[0] & 1)
+			break;
+
 		/* check for machine using my ip address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
 				print("arprep: 0x%E/0x%E also has ip addr %V\n",
 					e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
 			}
 		}
 
-		/* make sure we're not entering broadcast addresses */
-		if(ipcmp(ip, ipbroadcast) == 0 ||
-			!memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
-			print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
-				e->s, e->sha, e->spa);
-			break;
-		}
-
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+		/* refresh what we know about sender */
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
 		break;
 
 	case ARPREQUEST:
+		/* don't reply to multi/broadcat addresses */
+		if(e->sha[0] & 1)
+			break;
+
 		/* don't answer arps till we know who we are */
-		if(ifc->lifc == 0)
+		if(ifc->lifc == nil)
 			break;
 
 		/* check for machine using my ip or ether address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
-				if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+				if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
 					/* print only once */
-					print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+					print("arpreq: 0x%E also has ip addr %V\n",
+						e->sha, e->spa);
 					memmove(eprinted, e->spa, sizeof(e->spa));
 				}
+				break;
 			}
 		} else {
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
-				print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+				print("arpreq: %V also has ether addr %E\n",
+					e->spa, e->sha);
 				break;
 			}
 		}
 
-		/* refresh what we know about sender */
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
-		/* answer only requests for our address or systems we're proxying for */
+		/*
+		 * when request is for our address or systems we're proxying for,
+		 * enter senders address into arp table and reply, otherwise just
+		 * refresh the senders address.
+		 */
 		v4tov6(ip, e->tpa);
-		if(!iplocalonifc(ifc, ip))
-		if(!ipproxyifc(er->f, ifc, ip))
+		forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+		if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
 			break;
 
 		n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
 		memmove(r->s, ifc->mac, sizeof(r->s));
 		rbp->wp += n;
 
-		n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
-		if(n < 0)
-			print("arp: write: %r\n");
+		runlock(ifc);
+		freeb(ebp);
+
+		devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+		return;
 	}
+
+	runlock(ifc);
 	freeb(ebp);
 }
 
@@ -707,7 +671,7 @@
 
 	er->arpp = up;
 	if(waserror()){
-		er->arpp = 0;
+		er->arpp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;)
@@ -749,14 +713,9 @@
 multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
 {
 	/* is it broadcast? */
-	switch(ipforme(f, a->ip)){
-	case Runi:
-		return nil;
-	case Rbcast:
-		memset(mac, 0xff, 6);
+	if(ipforme(f, a->ip) == Rbcast){
+		memset(mac, 0xff, medium->maclen);
 		return arpresolve(f->arp, a, medium, mac);
-	default:
-		break;
 	}
 
 	/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
 }
 
 
-static void 
+static void
 etherpref2addr(uchar *pref, uchar *ea)
 {
-	pref[8]  = ea[0] | 0x2;
-	pref[9]  = ea[1];
+	pref[8] = ea[0] ^ 0x2;
+	pref[9] = ea[1];
 	pref[10] = ea[2];
 	pref[11] = 0xFF;
 	pref[12] = 0xFE;
@@ -789,4 +748,41 @@
 	pref[13] = ea[3];
 	pref[14] = ea[4];
 	pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+	static char tdad[] = "dad6";
+	uchar a[IPaddrlen];
+
+	if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+		return;
+
+	if(isv4(ip)){
+		sendgarp(ifc, ip);
+		return;
+	}
+
+	if((lifc->type&Rv4) != 0)
+		return;
+
+	if(!lifc->tentative){
+		icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+		return;
+	}
+
+	if(ipcmp(lifc->local, ip) != 0)
+		return;
+
+	/* temporarily add route for duplicate address detection */
+	ipv62smcast(a, ip);
+	addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+	if(waserror()){
+		remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+		nexterror();
+	}
+	icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+	remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
 }
--- a/os/ip/gre.c
+++ b/os/ip/gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,10 +10,7 @@
 
 #include "ip.h"
 
-#define DPRINT if(0)print
-
-enum
-{
+enum {
 	GRE_IPONLY	= 12,		/* size of ip header */
 	GRE_IPPLUSGRE	= 12,		/* minimum size of GRE header */
 	IP_GREPROTO	= 47,
@@ -18,10 +18,33 @@
 	GRErxms		= 200,
 	GREtickms	= 100,
 	GREmaxxmit	= 10,
+
+	K		= 1024,
+	GREqlen		= 256 * K,
+
+	GRE_cksum	= 0x8000,
+	GRE_routing	= 0x4000,
+	GRE_key		= 0x2000,
+	GRE_seq		= 0x1000,
+
+	Nring		= 1 << 10,	/* power of two, please */
+	Ringmask	= Nring - 1,
+
+	GREctlraw	= 0,
+	GREctlcooked,
+	GREctlretunnel,
+	GREctlreport,
+	GREctldlsuspend,
+	GREctlulsuspend,
+	GREctldlresume,
+	GREctlulresume,
+	GREctlforward,
+	GREctlulkey,
+	Ncmds,
 };
 
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
 	/* ip header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
@@ -28,7 +51,7 @@
 	uchar	len[2];		/* packet length (including headers) */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	ttl;
 	uchar	proto;		/* Protocol */
 	uchar	cksum[2];	/* checksum */
 	uchar	src[4];		/* Ip source */
@@ -37,21 +60,115 @@
 	/* gre header */
 	uchar	flags[2];
 	uchar	eproto[2];	/* encapsulation protocol */
-} GREhdr;
+};
 
 typedef struct GREpriv GREpriv;
-struct GREpriv
-{
-	int		raw;			/* Raw GRE mode */
-
+struct GREpriv{
 	/* non-MIB stats */
-	ulong		csumerr;		/* checksum errors */
-	ulong		lenerr;			/* short packet */
+	uvlong	lenerr;			/* short packet */
 };
 
+typedef struct Bring	Bring;
+struct Bring{
+	Block	*ring[Nring];
+	long	produced;
+	long	consumed;
+};
+
+typedef struct GREconv	GREconv;
+struct GREconv{
+	int	raw;
+
+	/* Retunnelling information.  v4 only */
+	uchar	north[4];			/* HA */
+	uchar	south[4];			/* Base station */
+	uchar	hoa[4];				/* Home address */
+	uchar	coa[4];				/* Careof address */
+	ulong	seq;				/* Current sequence # */
+	int	dlsusp;				/* Downlink suspended? */
+	int	ulsusp;				/* Uplink suspended? */
+	ulong	ulkey;				/* GRE key */
+
+	QLock	lock;				/* Lock for rings */
+	Bring	dlpending;			/* Ring of pending packets */
+	Bring	dlbuffered;			/* Received while suspended */
+	Bring	ulbuffered;			/* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+	uchar	*rp;
+	ulong	seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+	char	*cmd;
+	int	argc;
+	char	*(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw]	=	{	"raw",		1,	grectlraw,	},
+[GREctlcooked]	=	{	"cooked",	1,	grectlcooked,	},
+[GREctlretunnel]=	{	"retunnel",	5,	grectlretunnel,	},
+[GREctlreport]	=	{	"report",	2,	grectlreport,	},
+[GREctldlsuspend]=	{	"dlsuspend",	1,	grectldlsuspend,},
+[GREctlulsuspend]=	{	"ulsuspend",	1,	grectlulsuspend,},
+[GREctldlresume]=	{	"dlresume",	1,	grectldlresume,	},
+[GREctlulresume]=	{	"ulresume",	1,	grectlulresume,	},
+[GREctlforward]	=	{	"forward",	2,	grectlforward,	},
+[GREctlulkey]	=	{	"ulkey",	2,	grectlulkey,	},
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
 static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
 
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+	Block *bp;
+
+	if(r->consumed == r->produced)
+		return nil;
+
+	bp = r->ring[r->consumed & Ringmask];
+	r->ring[r->consumed & Ringmask] = nil;
+	r->consumed++;
+	return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+	Block *tbp;
+
+	if(r->produced - r->consumed > Ringmask){
+		/* Full! */
+		tbp = r->ring[r->produced & Ringmask];
+		assert(tbp);
+		freeb(tbp);
+		r->consumed++;
+	}
+	r->ring[r->produced & Ringmask] = bp;
+	r->produced++;
+}
+
+static char *
 greconnect(Conv *c, char **argv, int argc)
 {
 	Proto *p;
@@ -91,7 +208,7 @@
 static void
 grecreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->rq = qopen(GREqlen, Qmsg, 0, c);
 	c->wq = qbypass(grekick, c);
 }
 
@@ -98,44 +215,88 @@
 static int
 grestate(Conv *c, char *state, int n)
 {
-	USED(c);
-	return snprint(state, n, "%s", "Datagram");
+	GREconv *grec;
+	char *ep, *p;
+
+	grec = c->ptcl;
+	p    = state;
+	ep   = p + n;
+	p    = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+	 "pending %uld  %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+			c->inuse? "Open ": "Closed ",
+			grec->raw? "raw ": "",
+			grec->dlsusp? "DL suspended ": "",
+			grec->ulsusp? "UL suspended ": "",
+			grec->hoa, grec->north, grec->south, grec->seq,
+			grec->dlpending.consumed, grec->dlpending.produced,
+			grec->dlbuffered.consumed, grec->dlbuffered.produced,
+			grec->ulbuffered.consumed, grec->ulbuffered.produced,
+			grec->ulkey);
+	return p - state;
 }
 
 static char*
 greannounce(Conv*, char**, int)
 {
-	return "pktifc does not support announce";
+	return "gre does not support announce";
 }
 
 static void
 greclose(Conv *c)
 {
-	qclose(c->rq);
-	qclose(c->wq);
-	qclose(c->eq);
+	GREconv *grec;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	/* Make sure we don't forward any more packets */
+	memset(grec->hoa, 0, sizeof grec->hoa);
+	memset(grec->north, 0, sizeof grec->north);
+	memset(grec->south, 0, sizeof grec->south);
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->dlpending)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->dlbuffered)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->ulbuffered)) != nil)
+		freeb(bp);
+
+	grec->dlpending.produced = grec->dlpending.consumed = 0;
+	grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+	grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+	qunlock(&grec->lock);
+
+	grec->raw = 0;
+	grec->seq = 0;
+	grec->dlsusp = grec->ulsusp = 1;
+
+	qhangup(c->rq, sessend);
+	qhangup(c->wq, sessend);
+	qhangup(c->eq, sessend);
 	ipmove(c->laddr, IPnoaddr);
 	ipmove(c->raddr, IPnoaddr);
-	c->lport = 0;
-	c->rport = 0;
+	c->lport = c->rport = 0;
 }
 
-int drop;
-
 static void
 grekick(void *x, Block *bp)
 {
-	Conv *c = x;
-	GREhdr *ghp;
+	Conv *c;
+	GREconv *grec;
+	GREhdr *gre;
 	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 
 	if(bp == nil)
 		return;
 
+	c    = x;
+	grec = c->ptcl;
+
 	/* Make space to fit ip header (gre header already there) */
 	bp = padblock(bp, GRE_IPONLY);
-	if(bp == nil)
-		return;
 
 	/* make sure the message has a GRE header */
 	bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
 	if(bp == nil)
 		return;
 
-	ghp = (GREhdr *)(bp->rp);
-	ghp->vihl = IP_VER4;
+	gre = (GREhdr *)bp->rp;
+	gre->vihl = IP_VER4;
 
-	if(!((GREpriv*)c->p->priv)->raw){
-		v4tov6(raddr, ghp->dst);
+	if(grec->raw == 0){
+		v4tov6(raddr, gre->dst);
 		if(ipcmp(raddr, v4prefix) == 0)
-			memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
-		v4tov6(laddr, ghp->src);
+			memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+		v4tov6(laddr, gre->src);
 		if(ipcmp(laddr, v4prefix) == 0){
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
-				findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
-			memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+				/* pick interface closest to dest */
+				findlocalip(c->p->f, c->laddr, raddr);
+			memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
 		}
-		hnputs(ghp->eproto, c->rport);
+		hnputs(gre->eproto, c->rport);
 	}
 
-	ghp->proto = IP_GREPROTO;
-	ghp->frag[0] = 0;
-	ghp->frag[1] = 0;
+	gre->proto = IP_GREPROTO;
+	gre->frag[0] = gre->frag[1] = 0;
 
+	grepdout++;
+	grebdout += BLEN(bp);
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
 static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
 {
-	int len;
-	GREhdr *ghp;
-	Conv *c, **p;
-	ushort eproto;
+	Metablock *m;
+	GREconv *grec;
+	GREhdr *gre;
+	int hdrlen, suspended, extra;
+	ushort flags;
+	ulong seq;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1){
+		freeb(bp);
+		return;
+	}
+
+	/*
+	 * We've received a packet with a GRE header and we need to
+	 * re-adjust the packet header to strip all unwanted parts
+	 * but leave room for only a sequence number.
+	 */
+	grec   = c->ptcl;
+	flags  = nhgets(gre->flags);
+	hdrlen = 0;
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%V routing info present.  Discarding packet", gre->src);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	/*
+	 * The outgoing packet only has the sequence number set.  Make room
+	 * for the sequence number.
+	 */
+	if(hdrlen != sizeof(ulong)){
+		extra = hdrlen - sizeof(ulong);
+		if(extra < 0 && bp->rp - bp->base < -extra){
+			print("gredownlink: cannot add sequence number\n");
+			freeb(bp);
+			return;
+		}
+		memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+		bp->rp += extra;
+		assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+		gre = (GREhdr *)bp->rp;
+	}
+	seq = grec->seq++;
+	hnputs(gre->flags, GRE_seq);
+	hnputl(bp->rp + sizeof(GREhdr), seq);
+
+	/*
+	 * Keep rp and seq at the base.  ipoput4 consumes rp for
+	 * refragmentation.
+	 */
+	assert(bp->rp - bp->base >= sizeof(Metablock));
+	m = (Metablock *)bp->base;
+	m->rp  = bp->rp;
+	m->seq = seq;
+
+	/*
+	 * Here we make a decision what we're doing with the packet.  We're
+	 * doing this w/o holding a lock which means that later on in the
+	 * process we may discover we've done the wrong thing.  I don't want
+	 * to call ipoput with the lock held.
+	 */
+restart:
+	suspended = grec->dlsusp;
+	if(suspended){
+		if(!canqlock(&grec->lock)){
+			/*
+			 * just give up.  too bad, we lose a packet.  this
+			 * is just too hard and my brain already hurts.
+			 */
+			freeb(bp);
+			return;
+		}
+
+		if(!grec->dlsusp){
+			/*
+			 * suspend race.  We though we were suspended, but
+			 * we really weren't.
+			 */
+			qunlock(&grec->lock);
+			goto restart;
+		}
+
+		/* Undo the incorrect ref count addition */
+		addring(&grec->dlbuffered, bp);
+		qunlock(&grec->lock);
+		return;
+	}
+
+	/*
+	 * When we get here, we're not suspended.  Proceed to send the
+	 * packet.
+	 */
+	memmove(gre->src, grec->coa, sizeof gre->dst);
+	memmove(gre->dst, grec->south, sizeof gre->dst);
+
+	ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+	grepdout++;
+	grebdout += BLEN(bp);
+
+	/*
+	 * Now make sure we didn't do the wrong thing.
+	 */
+	if(!canqlock(&grec->lock)){
+		freeb(bp);		/* The packet just goes away */
+		return;
+	}
+
+	/* We did the right thing */
+	addring(&grec->dlpending, bp);
+	qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	ushort flags;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1)
+		return;
+
+	grec = c->ptcl;
+	memmove(gre->src, grec->coa, sizeof gre->src);
+	memmove(gre->dst, grec->north, sizeof gre->dst);
+
+	/*
+	 * Add a key, if needed.
+	 */
+	if(grec->ulkey){
+		flags = nhgets(gre->flags);
+		if(flags & (GRE_cksum|GRE_routing)){
+			print("%V routing info present.  Discarding packet\n",
+				gre->src);
+			freeb(bp);
+			return;
+		}
+
+		if((flags & GRE_key) == 0){
+			/* Make room for the key */
+			if(bp->rp - bp->base < sizeof(ulong)){
+				print("%V can't add key\n", gre->src);
+				freeb(bp);
+				return;
+			}
+
+			bp->rp -= 4;
+			memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+			gre = (GREhdr *)bp->rp;
+			hnputs(gre->flags, flags | GRE_key);
+		}
+
+		/* Add the key */
+		hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+	}
+
+	if(!canqlock(&grec->lock)){
+		freeb(bp);
+		return;
+	}
+
+	if(grec->ulsusp)
+		addring(&grec->ulbuffered, bp);
+	else{
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		grepuout++;
+		grebuout += BLEN(bp);
+	}
+	qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+	int len, hdrlen;
+	ushort eproto, flags;
 	uchar raddr[IPaddrlen];
+	Conv *c, **p;
+	GREconv *grec;
+	GREhdr *gre;
 	GREpriv *gpriv;
+	Ip4hdr *ip;
 
-	gpriv = gre->priv;
-	ghp = (GREhdr*)(bp->rp);
+	/*
+	 * We don't want to deal with block lists.  Ever.  The problem is
+	 * that when the block is forwarded, devether.c puts the block into
+	 * a queue that also uses ->next.  Just do not use ->next here!
+	 */
+	if(bp->next != nil)
+		bp = pullupblock(bp, blocklen(bp));
 
-	v4tov6(raddr, ghp->src);
-	eproto = nhgets(ghp->eproto);
-	qlock(gre);
+	gre = (GREhdr *)bp->rp;
+	if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+		freeb(bp);
+		return;
+	}
 
-	/* Look for a conversation structure for this port and address */
-	c = nil;
-	for(p = gre->conv; *p; p++) {
+	v4tov6(raddr, gre->src);
+	eproto = nhgets(gre->eproto);
+	flags  = nhgets(gre->flags);
+	hdrlen = sizeof(GREhdr);
+
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%I routing info present.  Discarding packet\n", raddr);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	qlock(proto);
+
+	if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+		ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+		/*
+		 * Look for a conversation structure for this port and address, or
+		 * match the retunnel part, or match on the raw flag.
+		 */
+		for(p = proto->conv; *p; p++) {
+			c = *p;
+
+			if(c->inuse == 0)
+				continue;
+
+			/*
+			 * Do not stop this session - blocking here
+			 * implies that etherread is blocked.
+			 */
+			grec = c->ptcl;
+			if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+				grepdin++;
+				grebdin += BLEN(bp);
+				gredownlink(c, bp);
+				qunlock(proto);
+				return;
+			}
+
+			if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+				grepuin++;
+				grebuin += BLEN(bp);
+				greuplink(c, bp);
+				qunlock(proto);
+				return;
+			}
+		}
+	}
+
+
+	/*
+	 * when we get here, none of the forwarding tunnels matched.  now
+	 * try to match on raw and conversational sessions.
+	 */
+	for(c = nil, p = proto->conv; *p; p++) {
 		c = *p;
+
 		if(c->inuse == 0)
 			continue;
-		if(c->rport == eproto && 
-			(gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+		/*
+		 * Do not stop this session - blocking here
+		 * implies that etherread is blocked.
+		 */
+		grec = c->ptcl;
+		if(c->rport == eproto &&
+		    (grec->raw || ipcmp(c->raddr, raddr) == 0))
 			break;
 	}
 
-	if(*p == nil) {
-		qunlock(gre);
-		freeblist(bp);
+	qunlock(proto);
+
+	if(*p == nil){
+		freeb(bp);
 		return;
 	}
 
-	qunlock(gre);
-
 	/*
 	 * Trim the packet down to data size
 	 */
-	len = nhgets(ghp->len) - GRE_IPONLY;
+	len = nhgets(gre->len) - GRE_IPONLY;
 	if(len < GRE_IPPLUSGRE){
-		freeblist(bp);
+		freeb(bp);
 		return;
 	}
+
 	bp = trimblock(bp, GRE_IPONLY, len);
 	if(bp == nil){
+		gpriv = proto->priv;
 		gpriv->lenerr++;
 		return;
 	}
 
-	/*
-	 *  Can't delimit packet so pull it all into one block.
-	 */
-	if(qlen(c->rq) > 64*1024)
-		freeblist(bp);
-	else{
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("greiput");
-		qpass(c->rq, bp);
-	}
+	qpass(c->rq, bp);
 }
 
 int
@@ -234,29 +649,258 @@
 	GREpriv *gpriv;
 
 	gpriv = gre->priv;
+	return snprint(buf, len,
+		"gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+		grepdin, grepdout, grepuin, grepuout,
+		grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
 
-	return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->raw = 1;
+	return nil;
 }
 
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
 {
-	GREpriv *gpriv;
+	GREconv *grec;
 
-	gpriv = c->p->priv;
-	if(n == 1){
-		if(strcmp(f[0], "raw") == 0){
-			gpriv->raw = 1;
-			return nil;
-		}
-		else if(strcmp(f[0], "cooked") == 0){
-			gpriv->raw = 0;
-			return nil;
-		}
+	grec = c->ptcl;
+	grec->raw = 0;
+	return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+	uchar ipaddr[4];
+
+	grec = c->ptcl;
+	if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+		return "tunnel already set up";
+
+	v4parseip(ipaddr, argv[1]);
+	if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+		return "bad hoa";
+	memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+	v4parseip(ipaddr, argv[2]);
+	memmove(grec->north, ipaddr, sizeof grec->north);
+	v4parseip(ipaddr, argv[3]);
+	memmove(grec->south, ipaddr, sizeof grec->south);
+	v4parseip(ipaddr, argv[4]);
+	memmove(grec->coa, ipaddr, sizeof grec->coa);
+	grec->ulsusp = 1;
+	grec->dlsusp = 0;
+
+	return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+	ulong seq;
+	Block *bp;
+	Bring *r;
+	GREconv *grec;
+	Metablock *m;
+
+	grec = c->ptcl;
+	seq  = strtoul(argv[1], nil, 0);
+
+	qlock(&grec->lock);
+	r = &grec->dlpending;
+	while(r->produced - r->consumed > 0){
+		bp = r->ring[r->consumed & Ringmask];
+
+		assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		if((long)(seq - m->seq) <= 0)
+			break;
+
+		r->ring[r->consumed & Ringmask] = nil;
+		r->consumed++;
+
+		freeb(bp);
 	}
-	return "unknown control request";
+	qunlock(&grec->lock);
+	return nil;
 }
 
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->dlsusp)
+		return "already suspended";
+
+	grec->dlsusp = 1;
+	return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->ulsusp)
+		return "already suspended";
+
+	grec->ulsusp = 1;
+	return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		qunlock(&grec->lock);
+
+		ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+		qlock(&grec->lock);
+		addring(&grec->dlpending, bp);
+	}
+	grec->dlsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	grec->ulsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+	Block *bp;
+	GREconv *grec;
+	GREhdr *gre;
+	Metablock *m;
+
+	grec = c->ptcl;
+
+	v4parseip(grec->south, argv[1]);
+	memmove(grec->north, grec->south, sizeof grec->north);
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+	grec->dlsusp = 0;
+	grec->ulsusp = 0;
+
+	while((bp = getring(&grec->dlpending)) != nil){
+
+		assert(bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		assert(m->rp >= bp->base && m->rp < bp->lim);
+
+		bp->rp = m->rp;
+
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->ulkey = strtoul(argv[1], nil, 0);
+	return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+	int i;
+
+	if(n < 1)
+		return "too few arguments";
+
+	for(i = 0; i < Ncmds; i++)
+		if(strcmp(f[0], grectls[i].cmd) == 0)
+			break;
+
+	if(i == Ncmds)
+		return "no such command";
+	if(grectls[i].argc != 0 && grectls[i].argc != n)
+		return "incorrect number of arguments";
+
+	return grectls[i].f(c, n, f);
+}
+
 void
 greinit(Fs *fs)
 {
@@ -276,7 +920,7 @@
 	gre->stats = grestats;
 	gre->ipproto = IP_GREPROTO;
 	gre->nc = 64;
-	gre->ptclsize = 0;
+	gre->ptclsize = sizeof(GREconv);
 
 	Fsproto(fs, gre);
 }
--- a/os/ip/icmp.c
+++ b/os/ip/icmp.c
@@ -44,11 +44,6 @@
 	Maxtype		= 18,
 };
 
-enum
-{
-	MinAdvise	= 24,	/* minimum needed for us to advise another protocol */ 
-};
-
 char *icmpnames[Maxtype+1] =
 {
 [EchoReply]		"EchoReply",
@@ -70,6 +65,8 @@
 	IP_ICMPPROTO	= 1,
 	ICMP_IPSIZE	= 20,
 	ICMP_HDRSIZE	= 8,
+
+	MinAdvise	= ICMP_IPSIZE+4,	/* minimum needed for us to advise another protocol */ 
 };
 
 enum
@@ -113,7 +110,7 @@
 	c->wq = qbypass(icmpkick, c);
 }
 
-extern char*
+char*
 icmpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -126,11 +123,11 @@
 	return nil;
 }
 
-extern int
+int
 icmpstate(Conv *c, char *state, int n)
 {
 	USED(c);
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		"Datagram",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
 	);
 }
 
-extern char*
+char*
 icmpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -150,7 +147,7 @@
 	return nil;
 }
 
-extern void
+void
 icmpclose(Conv *c)
 {
 	qclose(c->rq);
@@ -169,8 +166,7 @@
 
 	if(bp == nil)
 		return;
-
-	if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+	if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
 		freeblist(bp);
 		return;
 	}
@@ -190,21 +186,50 @@
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
 {
+	uchar addr[IPaddrlen];
+	int i;
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	i = ipforme(f, addr);
+	return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+	uchar addr[IPaddrlen];
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
 	Block	*nbp;
 	Icmp	*p, *np;
+	uchar	ia[IPv4addrlen];
 
 	p = (Icmp *)bp->rp;
+	if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+		return;
 
-	netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+	netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+		ia, p->src, p->dst);
+
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
 	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
 	np = (Icmp *)nbp->rp;
 	np->vihl = IP_VER4;
+	memmove(np->src, ia, sizeof(np->src));
 	memmove(np->dst, p->src, sizeof(np->dst));
-	v6tov4(np->src, ia);
 	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
 	np->type = TimeExceed;
 	np->code = 0;
@@ -214,7 +239,6 @@
 	memset(np->cksum, 0, sizeof(np->cksum));
 	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
 }
 
 static void
@@ -222,20 +246,10 @@
 {
 	Block	*nbp;
 	Icmp	*p, *np;
-	int	i;
-	uchar	addr[IPaddrlen];
 
 	p = (Icmp *)bp->rp;
-
-	/* only do this for unicast sources and destinations */
-	v4tov6(addr, p->dst);
-	i = ipforme(f, addr);
-	if((i&Runi) == 0)
+	if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
 		return;
-	v4tov6(addr, p->src);
-	i = ipforme(f, addr);
-	if(i != 0 && (i&Runi) == 0)
-		return;
 
 	netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmpnoconv(Fs *f, Block *bp)
 {
 	icmpunreachable(f, bp, 3, 0);
 }
 
-extern void
+void
 icmpcantfrag(Fs *f, Block *bp, int mtu)
 {
 	icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
 static void
 goticmpkt(Proto *icmp, Block *bp)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
-	v4tov6(dst, p->src);
+	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
-		if(ipcmp(s->raddr, dst) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+		if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
 {
 	Icmp	*q;
 	uchar	ip[4];
 
 	q = (Icmp *)bp->rp;
+	if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+		return nil;
+
 	q->vihl = IP_VER4;
 	memmove(ip, q->src, sizeof(q->dst));
 	memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
 [3]	"port unreachable",
 [4]	"fragmentation needed and DF set",
 [5]	"source route failed",
+[6]	"destination network unknown",
+[7]	"destination host unknown",
+[8]	"source host isolated",
+[9]	"network administratively prohibited",
+[10]	"host administratively prohibited",
+[11]	"network unreachable for tos",
+[12]	"host unreachable for tos",
+[13]	"communication administratively prohibited",
+[14]	"host precedence violation",
+[15]	"precedence cutoff in effect",
 };
 
 static void
 icmpiput(Proto *icmp, Ipifc*, Block *bp)
 {
-	int	n, iplen;
+	int	n;
 	Icmp	*p;
 	Block	*r;
 	Proto	*pr;
@@ -332,12 +355,10 @@
 	Icmppriv *ipriv;
 
 	ipriv = icmp->priv;
-	
 	ipriv->stats[InMsgs]++;
 
-	p = (Icmp *)bp->rp;
-	netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
-	n = blocklen(bp);
+	bp = concatblock(bp);
+	n = BLEN(bp);
 	if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
 		ipriv->stats[InErrors]++;
 		ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
 		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
 		goto raise;
 	}
-	iplen = nhgets(p->length);
-	if(iplen > n || (iplen % 1)){
-		ipriv->stats[LenErrs]++;
+	if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
 		ipriv->stats[InErrors]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto raise;
-	}
-	if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
-		ipriv->stats[InErrors]++;
 		ipriv->stats[CsumErrs]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto raise;
 	}
+	p = (Icmp *)bp->rp;
+	netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+		(p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+		p->type, p->code);
 	if(p->type <= Maxtype)
 		ipriv->in[p->type]++;
 
 	switch(p->type) {
 	case EchoRequest:
-		if (iplen < n)
-			bp = trimblock(bp, 0, iplen);
-		r = mkechoreply(bp);
+		r = mkechoreply(bp, icmp->f);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 	case Unreachable:
-		if(p->code > 5)
-			msg = unreachcode[1];
-		else
+		if(p->code >= nelem(unreachcode)) {
+			snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+				p->src, p->dst, p->code);
+			msg = m2;
+		} else
 			msg = unreachcode[p->code];
 
+	Advise:
 		bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-		if(blocklen(bp) < MinAdvise){
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs]++;
 			goto raise;
 		}
 		p = (Icmp *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
+		if((nhgets(p->frag) & IP_FO) == 0){
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr != nil && pr->advise != nil) {
+				(*pr->advise)(pr, bp, msg);
+				return;
+			}
 		}
-
 		bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
 		goticmpkt(icmp, bp);
 		break;
 	case TimeExceed:
 		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %V", p->src);
-
-			bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-			if(blocklen(bp) < MinAdvise){
-				ipriv->stats[LenErrs]++;
-				goto raise;
-			}
-			p = (Icmp *)bp->rp;
-			pr = Fsrcvpcolx(icmp->f, p->proto);
-			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
-				return;
-			}
-			bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+			goto Advise;
 		}
-
 		goticmpkt(icmp, bp);
 		break;
 	default:
@@ -419,22 +428,25 @@
 	freeblist(bp);
 }
 
-void
+static void
 icmpadvise(Proto *icmp, Block *bp, char *msg)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
 	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, src) == 0)
 		if(ipcmp(s->raddr, dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -443,7 +455,7 @@
 	freeblist(bp);
 }
 
-int
+static int
 icmpstats(Proto *icmp, char *buf, int len)
 {
 	Icmppriv *priv;
@@ -456,7 +468,7 @@
 	for(i = 0; i < Nstats; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
 	for(i = 0; i <= Maxtype; i++){
-		if(icmpnames[i])
+		if(icmpnames[i] != nil)
 			p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
 		else
 			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
 	}
 	return p - buf;
 }
-
-int
-icmpgc(Proto *icmp)
-{
-	return natgc(icmp->ipproto);
-}
-
+	
 void
 icmpinit(Fs *fs)
 {
@@ -487,7 +493,7 @@
 	icmp->stats = icmpstats;
 	icmp->ctl = nil;
 	icmp->advise = icmpadvise;
-	icmp->gc = icmpgc;
+	icmp->gc = nil;
 	icmp->ipproto = IP_ICMPPROTO;
 	icmp->nc = 128;
 	icmp->ptclsize = 0;
--- a/os/ip/icmp6.c
+++ b/os/ip/icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,41 +10,36 @@
 #include "ip.h"
 #include "ipv6.h"
 
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+	InMsgs6,
+	InErrors6,
+	OutMsgs6,
+	CsumErrs6,
+	LenErrs6,
+	HlenErrs6,
+	HoplimErrs6,
+	IcmpCodeErrs6,
+	TargetErrs6,
+	OptlenErrs6,
+	AddrmxpErrs6,
+	RouterAddrErrs6,
 
-struct ICMPpkt {
-	uchar	type;
-	uchar	code;
-	uchar	cksum[2];
-	uchar	icmpid[2];
-	uchar	seq[2];
+	Nstats6,
 };
 
-struct IPICMP {
-	Ip6hdr;
-	ICMPpkt;
+enum {
+	ICMP_USEAD6	= 40,
 };
 
-struct NdiscC
-{
-	IPICMP;
-	uchar target[IPaddrlen];
+enum {
+	Oflag	= 1<<5,
+	Sflag	= 1<<6,
+	Rflag	= 1<<7,
 };
 
-struct Ndpkt
-{
-	NdiscC;
-	uchar otype;
-	uchar olen;	// length in units of 8 octets(incl type, code),
-				// 1 for IEEE 802 addresses
-	uchar lnaddr[6];	// link-layer address
-};
-
-enum {	
-	// ICMPv6 types
+enum {
+	/* ICMPv6 types */
 	EchoReply	= 0,
 	UnreachableV6	= 1,
 	PacketTooBigV6	= 2,
@@ -69,6 +67,65 @@
 	Maxtype6	= 137,
 };
 
+enum {
+	MinAdvise	= IP6HDR+4,	/* minimum needed for us to advise another protocol */ 
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding  */
+#define ICMPHDR \
+	IPV6HDR; \
+	uchar	type; \
+	uchar	code; \
+	uchar	cksum[2]; \
+	uchar	icmpid[2]; \
+	uchar	seq[2]
+
+struct IPICMP {
+	ICMPHDR;
+	uchar	payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	otype;
+	uchar	olen;		/* length in units of 8 octets(incl type, code),
+				 * 1 for IEEE 802 addresses */
+	uchar	lnaddr[6];	/* link-layer address */
+	uchar	payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+	ulong	stats[Nstats6];
+
+	/* message counts */
+	ulong	in[Maxtype6+1];
+	ulong	out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+	uchar	headers;
+} Icmpcb6;
+
 char *icmpnames6[Maxtype6+1] =
 {
 [EchoReply]		"EchoReply",
@@ -95,24 +152,6 @@
 [RedirectV6]		"RedirectV6",
 };
 
-enum
-{
-	InMsgs6,
-	InErrors6,
-	OutMsgs6,
-	CsumErrs6,
-	LenErrs6,
-	HlenErrs6,
-	HoplimErrs6,
-	IcmpCodeErrs6,
-	TargetErrs6,
-	OptlenErrs6,
-	AddrmxpErrs6,
-	RouterAddrErrs6,
-
-	Nstats6,
-};
-
 static char *statnames6[Nstats6] =
 {
 [InMsgs6]	"InMsgs",
@@ -129,49 +168,18 @@
 [RouterAddrErrs6]	"RouterAddrErrs",
 };
 
-typedef struct Icmppriv6
-{
-	ulong	stats[Nstats6];
-
-	/* message counts */
-	ulong	in[Maxtype6+1];
-	ulong	out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6 
-{
-	QLock;
-	uchar headers;
-} Icmpcb6;
-
 static char *unreachcode[] =
 {
-[icmp6_no_route]	"no route to destination",
-[icmp6_ad_prohib]	"comm with destination administratively prohibited",
-[icmp6_unassigned]	"icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach]	"address unreachable",
-[icmp6_port_unreach]	"port unreachable",
-[icmp6_unkn_code]	"icmp unreachable: unknown code",
+[Icmp6_no_route]	"no route to destination",
+[Icmp6_ad_prohib]	"comm with destination administratively prohibited",
+[Icmp6_out_src_scope]	"beyond scope of source address",
+[Icmp6_adr_unreach]	"address unreachable",
+[Icmp6_port_unreach]	"port unreachable",
+[Icmp6_gress_src_fail]	"source address failed ingress/egress policy",
+[Icmp6_rej_route]	"reject route to destination",
+[Icmp6_unknown]		"icmp unreachable: unknown code",
 };
 
-enum {
-	ICMP_USEAD6	= 40,
-};
-
-enum {
-	Oflag	= 1<<5,
-	Sflag	= 1<<6,
-	Rflag	= 1<<7,
-};
-
-enum {
-	slladd	= 1,
-	tlladd	= 2,
-	prfinfo	= 3,
-	redhdr	= 4,
-	mtuopt	= 5,
-};
-
 static void icmpkick6(void *x, Block *bp);
 
 static void
@@ -185,13 +193,14 @@
 set_cksum(Block *bp)
 {
 	IPICMP *p = (IPICMP *)(bp->rp);
+	int n = blocklen(bp);
 
-	hnputl(p->vcf, 0);  // borrow IP header as pseudoheader
-	hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+	hnputl(p->vcf, 0);  	/* borrow IP header as pseudoheader */
+	hnputs(p->ploadlen, n - IP6HDR);
 	p->proto = 0;
-	p->ttl = ICMPv6;	// ttl gets set later
+	p->ttl = ICMPv6;	/* ttl gets set later */
 	hnputs(p->cksum, 0);
-	hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+	hnputs(p->cksum, ptclcsum(bp, 0, n));
 	p->proto = ICMPv6;
 }
 
@@ -198,7 +207,8 @@
 static Block *
 newIPICMP(int packetlen)
 {
-	Block	*nbp;
+	Block *nbp;
+
 	nbp = allocb(packetlen);
 	nbp->wp += packetlen;
 	memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
 	return nbp;
 }
 
-void
+static void
 icmpadvise6(Proto *icmp, Block *bp, char *msg)
 {
-	Conv	**c, *s;
-	IPICMP	*p;
-	ushort	recid;
+	ushort recid;
+	Conv **c, *s;
+	IPICMP *p;
 
-	p = (IPICMP *) bp->rp;
+	p = (IPICMP *)bp->rp;
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->src) == 0)
 		if(ipcmp(s->raddr, p->dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -230,9 +242,9 @@
 static void
 icmpkick6(void *x, Block *bp)
 {
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Conv *c = x;
 	IPICMP *p;
-	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Icmppriv6 *ipriv = c->p->priv;
 	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
 
@@ -249,10 +261,10 @@
 		bp->rp += IPaddrlen;
 		ipmove(raddr, bp->rp);
 		bp->rp += IPaddrlen;
-		bp = padblock(bp, sizeof(Ip6hdr));
+		bp = padblock(bp, IP6HDR);
 	}
 
-	if(blocklen(bp) < sizeof(IPICMP)){
+	if(BLEN(bp) < IPICMPSZ){
 		freeblist(bp);
 		return;
 	}
@@ -268,23 +280,20 @@
 
 	set_cksum(bp);
 	p->vcf[0] = 0x06 << 4;
-	if(p->type <= Maxtype6)	
+	if(p->type <= Maxtype6)
 		ipriv->out[p->type]++;
 	ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-char*
+static char*
 icmpctl6(Conv *c, char **argv, int argc)
 {
 	Icmpcb6 *icb;
 
 	icb = (Icmpcb6*) c->ptcl;
-
-	if(argc==1) {
-		if(strcmp(argv[0], "headers")==0) {
-			icb->headers = 6;
-			return nil;
-		}
+	if(argc==1 && strcmp(argv[0], "headers")==0) {
+		icb->headers = 6;
+		return nil;
 	}
 	return "unknown control request";
 }
@@ -292,41 +301,39 @@
 static void
 goticmpkt6(Proto *icmp, Block *bp, int muxkey)
 {
-	Conv	**c, *s;
-	IPICMP	*p = (IPICMP *)bp->rp;
-	ushort	recid; 
-	uchar 	*addr;
+	ushort recid;
+	uchar *addr;
+	Conv **c, *s;
+	IPICMP *p = (IPICMP *)bp->rp;
 
 	if(muxkey == 0) {
 		recid = nhgets(p->icmpid);
 		addr = p->src;
-	}
-	else {
+	} else {
 		recid = muxkey;
 		addr = p->dst;
 	}
-
-	for(c = icmp->conv; *c; c++){
-		s = *c;
-		if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+	for(c = icmp->conv; (s = *c) != nil; c++){
+		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
-
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
 {
+	uchar addr[IPaddrlen];
 	IPICMP *p = (IPICMP *)(bp->rp);
-	uchar	addr[IPaddrlen];
 
+	if(isv6mcast(p->src))
+		return nil;
 	ipmove(addr, p->src);
-	ipmove(p->src, p->dst);
+	if(!isv6mcast(p->dst))
+		ipmove(p->src, p->dst);
+	else if (!ipv6local(ifc, p->src, 0, addr))
+		return nil;
 	ipmove(p->dst, addr);
 	p->type = EchoReplyV6;
 	set_cksum(bp);
@@ -335,49 +342,43 @@
 
 /*
  * sends out an ICMPv6 neighbor solicitation
- * 	suni == SRC_UNSPEC or SRC_UNI, 
+ * 	suni == SRC_UNSPEC or SRC_UNI,
  *	tuni == TARG_MULTI => multicast for address resolution,
  * 	and tuni == TARG_UNI => neighbor reachability.
  */
-
-extern void
+void
 icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-
-	nbp = newIPICMP(sizeof(Ndpkt));
+	nbp = newIPICMP(NDPKTSZ);
 	np = (Ndpkt*) nbp->rp;
 
+	if(suni == SRC_UNSPEC)
+		ipmove(np->src, v6Unspecified);
+	else
+		ipmove(np->src, src);
 
-	if(suni == SRC_UNSPEC) 
-		memmove(np->src, v6Unspecified, IPaddrlen);
-	else 
-		memmove(np->src, src, IPaddrlen);
-
 	if(tuni == TARG_UNI)
-		memmove(np->dst, targ, IPaddrlen);
+		ipmove(np->dst, targ);
 	else
 		ipv62smcast(np->dst, targ);
 
 	np->type = NbrSolicit;
 	np->code = 0;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 	if(suni != SRC_UNSPEC) {
-		np->otype = SRC_LLADDRESS;
-		np->olen = 1;	/* 1+1+6 = 8 = 1 8-octet */
+		np->otype = SRC_LLADDR;
+		np->olen = 1;		/* 1+1+6 = 8 = 1 8-octet */
 		memmove(np->lnaddr, mac, sizeof(np->lnaddr));
-	}
-	else {
-		int r = sizeof(Ndpkt)-sizeof(NdiscC);
-		nbp->wp -= r;
-	}
+	} else
+		nbp->wp -= NDPKTSZ - NDISCSZ;
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
 /*
  * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
  */
-extern void
+void
 icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-	nbp = newIPICMP(sizeof(Ndpkt));
-	np = (Ndpkt*) nbp->rp;
+	nbp = newIPICMP(NDPKTSZ);
+	np = (Ndpkt*)nbp->rp;
 
-	memmove(np->src, src, IPaddrlen);
-	memmove(np->dst, dst, IPaddrlen);
+	ipmove(np->src, src);
+	ipmove(np->dst, dst);
 
 	np->type = NbrAdvert;
 	np->code = 0;
 	np->icmpid[0] = flags;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 
-	np->otype = TARGET_LLADDRESS;
-	np->olen = 1;	
+	np->otype = TARGET_LLADDR;
+	np->olen = 1;
 	memmove(np->lnaddr, mac, sizeof(np->lnaddr));
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrAdvert]++;
-	netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+	netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+		return;
 
-	if(isv6mcast(p->src)) 
-		goto clean;
+	netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
 	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
-
-	rlock(ifc);
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
-		freeblist(nbp);
-		if(free) 
-			goto clean;
-		else
-			return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = UnreachableV6;
 	np->code = code;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[UnreachableV6]++;
 
-	if(free)
+	if(tome)
 		ipiput6(f, ifc, nbp);
-	else {
+	else 
 		ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-		return;
-	}
-
-clean:
-	runlock(ifc);
-	freeblist(bp);
 }
 
-extern void
+void
 icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
+	netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
+
 	nbp = newIPICMP(sz);
 	np = (IPICMP *) nbp->rp;
-
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = TimeExceedV6;
 	np->code = 0;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
-	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
+	netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	nbp = newIPICMP(sz);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = PacketTooBigV6;
 	np->code = 0;
 	hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
  * RFC 2461, pages 39-40, pages 57-58.
  */
 static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
-	int 	sz, osz, unsp, n, ttl, iplen;
-	int 	pktsz = BLEN(bp);
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *) packet;
-	Ndpkt	*np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+	int sz, osz, unsp, ttl;
+	int pktsz = BLEN(bp);
+	uchar *packet = bp->rp;
+	IPICMP *p = (IPICMP *) packet;
+	Ndpkt *np;
 
-	USED(ifc);
-	n = blocklen(bp);
-	if(n < sizeof(IPICMP)) {
+	if(pktsz < IPICMPSZ) {
 		ipriv->stats[HlenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
 		goto err;
 	}
 
-	iplen = nhgets(p->ploadlen);
-	if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
-		ipriv->stats[LenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto err;
-	}
-
-	// Rather than construct explicit pseudoheader, overwrite IPv6 header
+	/* Rather than construct explicit pseudoheader, overwrite IPv6 header */
 	if(p->proto != ICMPv6) {
-		// This code assumes no extension headers!!!
+		/* This code assumes no extension headers!!! */
 		netlog(icmp->f, Logicmp, "icmp error: extension header\n");
 		goto err;
 	}
@@ -586,7 +553,7 @@
 	ttl = p->ttl;
 	p->ttl = p->proto;
 	p->proto = 0;
-	if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+	if(ptclcsum(bp, 0, pktsz)) {
 		ipriv->stats[CsumErrs6]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto err;
@@ -595,19 +562,16 @@
 	p->ttl = ttl;
 
 	/* additional tests for some pkt types */
-	if( (p->type == NbrSolicit) ||
-		(p->type == NbrAdvert) ||
-		(p->type == RouterAdvert) ||
-		(p->type == RouterSolicit) ||
-		(p->type == RedirectV6) ) {
-
+	if (p->type == NbrSolicit   || p->type == NbrAdvert ||
+	    p->type == RouterAdvert || p->type == RouterSolicit ||
+	    p->type == RedirectV6) {
 		if(p->ttl != HOP_LIMIT) {
-			ipriv->stats[HoplimErrs6]++; 
-			goto err; 
+			ipriv->stats[HoplimErrs6]++;
+			goto err;
 		}
 		if(p->code != 0) {
-			ipriv->stats[IcmpCodeErrs6]++; 
-			goto err; 
+			ipriv->stats[IcmpCodeErrs6]++;
+			goto err;
 		}
 
 		switch (p->type) {
@@ -615,82 +579,78 @@
 		case NbrAdvert:
 			np = (Ndpkt*) p;
 			if(isv6mcast(np->target)) {
-				ipriv->stats[TargetErrs6]++; 
-				goto err; 
+				ipriv->stats[TargetErrs6]++;
+				goto err;
 			}
-			if(optexsts(np) && (np->olen == 0)) {
-				ipriv->stats[OptlenErrs6]++; 
-				goto err; 
+			if(optexsts(np) && np->olen == 0) {
+				ipriv->stats[OptlenErrs6]++;
+				goto err;
 			}
-		
-			if(p->type == NbrSolicit) {
-				if(ipcmp(np->src, v6Unspecified) == 0) { 
-					if(!issmcast(np->dst) || optexsts(np))  {
-						ipriv->stats[AddrmxpErrs6]++; 
-						goto err;
-					}
+
+			if (p->type == NbrSolicit &&
+			    ipcmp(np->src, v6Unspecified) == 0)
+				if(!issmcast(np->dst) || optexsts(np)) {
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
-		
-			if(p->type == NbrAdvert) {
-				if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
-					ipriv->stats[AddrmxpErrs6]++; 
-					goto err; 
+
+			if(p->type == NbrAdvert)
+				if(isv6mcast(np->dst) &&
+				    (nhgets(np->icmpid) & Sflag)){
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
 			break;
-	
+
 		case RouterAdvert:
-			if(pktsz - sizeof(Ip6hdr) < 16) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 16) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			if(!islinklocal(p->src)) {
-				ipriv->stats[RouterAddrErrs6]++; 
-				goto err; 
+				ipriv->stats[RouterAddrErrs6]++;
+				goto err;
 			}
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
 				if(osz <= 0) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
-				}	
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
+				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RouterSolicit:
-			if(pktsz - sizeof(Ip6hdr) < 8) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 8) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			unsp = (ipcmp(p->src, v6Unspecified) == 0);
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
-				if((osz <= 0) ||
-					(unsp && (*(packet+sz) == slladd)) ) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
+				if(osz <= 0 ||
+				    (unsp && packet[sz] == SRC_LLADDR)) {
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
 				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RedirectV6:
-			//to be filled in
+			/* to be filled in */
 			break;
-	
+
 		default:
 			goto err;
 		}
 	}
-
 	return 1;
-
 err:
-	ipriv->stats[InErrors6]++; 
+	ipriv->stats[InErrors6]++;
 	return 0;
 }
 
@@ -700,169 +660,162 @@
 	Iplifc *lifc;
 	int t;
 
-	rlock(ifc);
-	if(ipproxyifc(f, ifc, target)) {
-		runlock(ifc);
-		return t_uniproxy;
-	}
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, target) == 0) {
-			t = (lifc->tentative) ? t_unitent : t_unirany; 
-			runlock(ifc);
-			return t;
-		}
-	}
-
-	runlock(ifc);
-	return 0;
+	if((lifc = iplocalonifc(ifc, target)) != nil)
+		t = lifc->tentative? Tunitent: Tunirany;
+	else if(ipproxyifc(f, ifc, target))
+		t = Tuniproxy;
+	else
+		t = 0;
+	return t;
 }
 
 static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
 {
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *)packet;
-	Icmppriv6 *ipriv = icmp->priv;
-	Block	*r;
-	Proto	*pr;
-	char	*msg, m2[128];
-	Ndpkt* np;
+	char *msg, m2[128];
 	uchar pktflags;
-	uchar lsrc[IPaddrlen];
-	int refresh = 1;
+	uchar ia[IPaddrlen];
+	Block *r;
+	IPICMP *p;
+	Icmppriv6 *ipriv = icmp->priv;
 	Iplifc *lifc;
+	Ndpkt* np;
+	Proto *pr;
 
-	if(!valid(icmp, ipifc, bp, ipriv)) 
-		goto raise;
+	bp = concatblock(bp);
+	p = (IPICMP*)bp->rp;
 
-	if(p->type <= Maxtype6)
-		ipriv->in[p->type]++;
-	else
+	if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
 		goto raise;
 
+	ipriv->in[p->type]++;
+
 	switch(p->type) {
 	case EchoRequestV6:
-		r = mkechoreply6(bp);
+		r = mkechoreply6(bp, ifc);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 
 	case UnreachableV6:
-		if(p->code > 4)
-			msg = unreachcode[icmp6_unkn_code];
+		if(p->code >= nelem(unreachcode))
+			msg = unreachcode[Icmp6_unknown];
 		else
 			msg = unreachcode[p->code];
-
-		bp->rp += sizeof(IPICMP);
-		if(blocklen(bp) < 8){
+	Advise:
+		bp->rp += IPICMPSZ;
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs6]++;
 			goto raise;
 		}
 		p = (IPICMP *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
-		}
 
-		bp->rp -= sizeof(IPICMP);
-		goticmpkt6(icmp, bp, 0);
-		break;
-
-	case TimeExceedV6:
-		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %I", p->src);
-
-			bp->rp += sizeof(IPICMP);
-			if(blocklen(bp) < 8){
-				ipriv->stats[LenErrs6]++;
-				goto raise;
+		/* get rid of fragment header if this is the first fragment */
+		if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+			Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+			if((nhgets(fh->offsetRM) & ~7) == 0){	/* first fragment */
+				p->proto = fh->nexthdr;
+				/* copy down payload over fragment header */
+				bp->rp += IP6HDR;
+				bp->wp -= IP6FHDR;
+				memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+				hnputs(p->ploadlen, BLEN(bp));
+				bp->rp -= IP6HDR;
 			}
-			p = (IPICMP *)bp->rp;
+		}
+		if(p->proto != FH){
 			pr = Fsrcvpcolx(icmp->f, p->proto);
 			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
+				(*pr->advise)(pr, bp, msg);
 				return;
 			}
-			bp->rp -= sizeof(IPICMP);
 		}
+		bp->rp -= IPICMPSZ;
+		goticmpkt6(icmp, bp, 0);
+		break;
 
+	case TimeExceedV6:
+		if(p->code == 0){
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+			goto Advise;
+		}
+		if(p->code == 1){
+			snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+			goto Advise;
+		}
 		goticmpkt6(icmp, bp, 0);
 		break;
 
+	case PacketTooBigV6:
+		snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+			(ulong)nhgetl(p->icmpid), p->src);
+		goto Advise;
+
 	case RouterAdvert:
 	case RouterSolicit:
-		/* using lsrc as a temp, munge hdr for goticmp6 
-		memmove(lsrc, p->src, IPaddrlen);
-		memmove(p->src, p->dst, IPaddrlen);
-		memmove(p->dst, lsrc, IPaddrlen); */
-
 		goticmpkt6(icmp, bp, p->type);
 		break;
 
 	case NbrSolicit:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 		pktflags = 0;
-		switch (targettype(icmp->f, ipifc, np->target)) {
-		case t_unirany:
+		if(ifc->sendra6)
+			pktflags |= Rflag;
+		switch (targettype(icmp->f, ifc, np->target)) {
+		case Tunirany:
 			pktflags |= Oflag;
 			/* fall through */
 
-		case t_uniproxy: 
-			if(ipcmp(np->src, v6Unspecified) != 0) {
-				arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+		case Tuniproxy:
+			if(ipv6local(ifc, ia, 0, np->src)) {
+				if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+					break;
 				pktflags |= Sflag;
-			}
-			if(ipv6local(ipifc, lsrc)) {
-				icmpna(icmp->f, lsrc, 
-				   (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
-				   np->target, ipifc->mac, pktflags); 
-			}
-			else
-				freeblist(bp);
+			} else
+				ipmove(ia, np->target);
+			icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+				np->target, ifc->mac, pktflags);
 			break;
-
-		case t_unitent:
-			/* not clear what needs to be done. send up
-			 * an icmp mesg saying don't use this address? */
-
-		default:
-			freeblist(bp);
+		case Tunitent:
+			/*
+			 * not clear what needs to be done. send up
+			 * an icmp mesg saying don't use this address?
+			 */
+			break;
 		}
-
+		freeblist(bp);
 		break;
 
 	case NbrAdvert:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 
-		/* if the target address matches one of the local interface 
-		 * address and the local interface address has tentative bit set, 
-		 * then insert into ARP table. this is so the duplication address 
-		 * detection part of ipconfig can discover duplication through 
-		 * the arp table
+		/*
+		 * if the target address matches one of the local interface
+		 * addresses and the local interface address has tentative bit
+		 * set, insert into ARP table. this is so the duplicate address
+		 * detection part of ipconfig can discover duplication through
+		 * the arp table.
 		 */
-		lifc = iplocalonifc(ipifc, np->target);
-		if(lifc && lifc->tentative)
-			refresh = 0;
-		arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+		lifc = iplocalonifc(ifc, np->target);
+		if(lifc != nil && lifc->tentative)
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+		else if(ipv6local(ifc, ia, 0, np->target))
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
 		freeblist(bp);
 		break;
 
-	case PacketTooBigV6:
-
 	default:
 		goticmpkt6(icmp, bp, 0);
 		break;
 	}
 	return;
-
 raise:
 	freeblist(bp);
-
 }
 
-int
+static int
 icmpstats6(Proto *icmp6, char *buf, int len)
 {
 	Icmppriv6 *priv;
@@ -874,23 +827,28 @@
 	e = p+len;
 	for(i = 0; i < Nstats6; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
-	for(i = 0; i <= Maxtype6; i++){
+	for(i = 0; i <= Maxtype6; i++)
 		if(icmpnames6[i])
-			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/*		else
-			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
-	}
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+				priv->in[i], priv->out[i]);
 	return p - buf;
 }
 
 
-// need to import from icmp.c
+/* import from icmp.c */
 extern int	icmpstate(Conv *c, char *state, int n);
 extern char*	icmpannounce(Conv *c, char **argv, int argc);
 extern char*	icmpconnect(Conv *c, char **argv, int argc);
 extern void	icmpclose(Conv *c);
 
+static void
+icmpclose6(Conv *c)
+{
+	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+	icb->headers = 0;
+	icmpclose(c);
+}
+
 void
 icmp6init(Fs *fs)
 {
@@ -902,7 +860,7 @@
 	icmp6->announce = icmpannounce;
 	icmp6->state = icmpstate;
 	icmp6->create = icmpcreate6;
-	icmp6->close = icmpclose;
+	icmp6->close = icmpclose6;
 	icmp6->rcv = icmpiput6;
 	icmp6->stats = icmpstats6;
 	icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
 
 	Fsproto(fs, icmp6);
 }
-
--- a/os/ip/igmp.c
+++ b/os/ip/igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -40,8 +44,12 @@
 	uchar	unused;
 	uchar	igmpcksum[2];		/* checksum of igmp portion */
 	uchar	group[IPaddrlen];	/* multicast group */
+
+	uchar	payload[];
 };
 
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
 /*
  *  lists for group reports
  */
@@ -49,7 +57,7 @@
 struct IGMPrep
 {
 	IGMPrep		*next;
-	Media		*m;
+	Medium		*m;
 	int		ticks;
 	Multicast	*multi;
 };
@@ -76,19 +84,17 @@
 } stats;
 
 void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
 {
 	IGMPpkt *p;
 	Block *bp;
 
 	bp = allocb(sizeof(IGMPpkt));
-	if(bp == nil)
-		return;
 	p = (IGMPpkt*)bp->wp;
 	p->vihl = IP_VER4;
-	bp->wp += sizeof(IGMPpkt);
-	memset(bp->rp, 0, sizeof(IGMPpkt));
-	hnputl(p->src, Mediagetaddr(m));
+	bp->wp += IGMPPKTSZ;
+	memset(bp->rp, 0, IGMPPKTSZ);
+	hnputl(p->src, Mediumgetaddr(m));
 	hnputl(p->dst, Ipallsys);
 	p->vertype = (1<<4) | IGMPreport;
 	p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
 }
 
 void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
 {
 	int n;
 	IGMPpkt *ghp;
@@ -206,7 +212,7 @@
 		if(rp != nil)
 			break;	/* already reporting */
 
-		mp = Mediacopymulti(m);
+		mp = Mediumcopymulti(m);
 		if(mp == nil)
 			break;
 
@@ -285,7 +291,7 @@
 	igmp.ptclsize = 0;
 
 	igmpreportfn = igmpsendreport;
-	kproc("igmpproc", igmpproc, 0, 0);
+	kproc("igmpproc", igmpproc, 0);
 
 	Fsproto(fs, &igmp);
 }
--- a/os/ip/ihbootp.c
+++ /dev/null
@@ -1,323 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-
-static	ulong	fsip;
-static	ulong	auip;
-static	ulong	gwip;
-static	ulong	ipmask;
-static	ulong	ipaddr;
-static	ulong	dnsip;
-
-enum
-{
-	Bootrequest = 1,
-	Bootreply   = 2,
-};
-
-typedef struct Bootp
-{
-	/* udp.c oldheader */
-	uchar	raddr[IPaddrlen];
-	uchar	laddr[IPaddrlen];
-	uchar	rport[2];
-	uchar	lport[2];
-	/* bootp itself */
-	uchar	op;		/* opcode */
-	uchar	htype;		/* hardware type */
-	uchar	hlen;		/* hardware address len */
-	uchar	hops;		/* hops */
-	uchar	xid[4];		/* a random number */
-	uchar	secs[2];	/* elapsed snce client started booting */
-	uchar	pad[2];
-	uchar	ciaddr[4];	/* client IP address (client tells server) */
-	uchar	yiaddr[4];	/* client IP address (server tells client) */
-	uchar	siaddr[4];	/* server IP address */
-	uchar	giaddr[4];	/* gateway IP address */
-	uchar	chaddr[16];	/* client hardware address */
-	uchar	sname[64];	/* server host name (optional) */
-	uchar	file[128];	/* boot file name */
-	uchar	vend[128];	/* vendor-specific goo */
-} Bootp;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d
- * dnsip d.d.d.d"
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
- */
-
-static	Bootp	req;
-static	Proc*	rcvprocp;
-static	int	recv;
-static	int	done;
-static	Rendez	bootpr;
-static	char	rcvbuf[512];
-static	int	bootpdebug;
-
-/*
- * Parse the vendor specific fields according to RFC 1084.
- * We are overloading the "cookie server" to be the Inferno 
- * authentication server and the "resource location server"
- * to be the Inferno file server.
- *
- * If the vendor specific field is formatted properly, it
- * will begin with the four bytes 99.130.83.99 and end with
- * an 0xFF byte.
- */
-static void
-parsevend(uchar* vend)
-{
-	/* The field must start with 99.130.83.99 to be compliant */
-	if ((vend[0] != 99) || (vend[1] != 130) ||
-	    (vend[2] != 83) || (vend[3] != 99)){
-		if(bootpdebug)
-			print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
-		return;
-	}
-
-	/* Skip over the magic cookie */
-	vend += 4;
-
-	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
-		if(bootpdebug){
-			int i;
-			print("vend %d [%d]", vend[0], vend[1]);
-			for(i=0; i<vend[1]; i++)
-				print(" %2.2x", vend[i]);
-			print("\n");
-		}
-		switch (vend[0]) {
-		case 1:	/* Subnet mask field */
-			/* There must be only one subnet mask */
-			if (vend[1] != 4)
-				return;
-
-			ipmask = (vend[2]<<24)|
-				 (vend[3]<<16)|
-				 (vend[4]<<8)|
-				  vend[5];
-			break;
-
-		case 3:	/* Gateway/router field */
-			/* We are only concerned with first address */
-			if (vend[1] < 4)
-				break;
-
-			gwip =	(vend[2]<<24)|
-				(vend[3]<<16)|
-				(vend[4]<<8)|
-				 vend[5];
-			break;
-
-		case 6:	/* DNS server */
-			/* We are only concerned with first address */
-			if (vend[1] < 4)
-				break;
-
-			dnsip =	(vend[2]<<24)|
-				(vend[3]<<16)|
-				(vend[4]<<8)|
-				 vend[5];
-			break;
-
-		case 8:	/* "Cookie server" (auth server) field */
-			/* We are only concerned with first address */
-			if (vend[1] < 4)
-				break;
-
-			auip =	(vend[2]<<24)|
-				(vend[3]<<16)|
-				(vend[4]<<8)|
-				 vend[5];
-			break;
-
-		case 11:	/* "Resource loc server" (file server) field */
-			/* We are only concerned with first address */
-			if (vend[1] < 4)
-				break;
-
-			fsip =	(vend[2]<<24)|
-				(vend[3]<<16)|
-				(vend[4]<<8)|
-				 vend[5];
-			break;
-
-		default:	/* Ignore everything else */
-			break;
-		}
-
-		/* Skip over the field */
-		vend += vend[1] + 2;
-	}
-}
-
-static void
-rcvbootp(void *a)
-{
-	int n, fd;
-	Bootp *rp;
-
-	if(waserror())
-		pexit("", 0);
-	rcvprocp = up;	/* store for postnote below */
-	fd = (int)a;
-	while(done == 0) {
-		n = kread(fd, rcvbuf, sizeof(rcvbuf));
-		if(n <= 0)
-			break;
-		rp = (Bootp*)rcvbuf;
-		if (memcmp(req.chaddr, rp->chaddr, 6) == 0 &&
-		   rp->htype == 1 && rp->hlen == 6) {
-			ipaddr = (rp->yiaddr[0]<<24)|
-				 (rp->yiaddr[1]<<16)|
-				 (rp->yiaddr[2]<<8)|
-				  rp->yiaddr[3];
-			parsevend(rp->vend);
-			break;
-		}
-	}
-	poperror();
-	rcvprocp = nil;
-
-	recv = 1;
-	wakeup(&bootpr);
-	pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
-	int cfd, dfd, tries, n;
-	char ia[5+3*16], im[16], *av[3];
-	uchar nipaddr[4], ngwip[4], nipmask[4];
-	char dir[Maxpath];
-	static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	cfd = kannounce("udp!*!68", dir);
-	if(cfd < 0)
-		return "bootp announce failed";
-	strcat(dir, "/data");
-	if(kwrite(cfd, "headers", 7) < 0){
-		kclose(cfd);
-		return "bootp ctl headers failed";
-	}
-	kwrite(cfd, "oldheaders", 10);
-	dfd = kopen(dir, ORDWR);
-	if(dfd < 0){
-		kclose(cfd);
-		return "bootp open data failed";
-	}
-	kclose(cfd);
-
-	/* create request */
-	memset(&req, 0, sizeof(req));
-	ipmove(req.raddr, IPv4bcast);
-	hnputs(req.rport, 67);
-	req.op = Bootrequest;
-	req.htype = 1;			/* ethernet (all we know) */
-	req.hlen = 6;			/* ethernet (all we know) */
-
-	/* Hardware MAC address */
-	memmove(req.chaddr, ifc->mac, 6);
-	/* Fill in the local IP address if we know it */
-	ipv4local(ifc, req.ciaddr);
-	memset(req.file, 0, sizeof(req.file));
-	memmove(req.vend, vend_rfc1048, 4);
-
-	done = 0;
-	recv = 0;
-
-	kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
-
-	/*
-	 * broadcast bootp's till we get a reply,
-	 * or fixed number of tries
-	 */
-	tries = 0;
-	while(recv == 0) {
-		if(kwrite(dfd, &req, sizeof(req)) < 0)
-			print("bootp: write: %r");
-
-		tsleep(&bootpr, return0, 0, 1000);
-		if(++tries > 10) {
-			print("bootp: timed out\n");
-			break;
-		}
-	}
-	kclose(dfd);
-	done = 1;
-	if(rcvprocp != nil){
-		postnote(rcvprocp, 1, "timeout", 0);
-		rcvprocp = nil;
-	}
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcrem(ifc, av, 3);
-
-	hnputl(nipaddr, ipaddr);
-	sprint(ia, "%V", nipaddr);
-	hnputl(nipmask, ipmask);
-	sprint(im, "%V", nipmask);
-	av[1] = ia;
-	av[2] = im;
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	if(gwip != 0) {
-		hnputl(ngwip, gwip);
-		n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
-		routewrite(ifc->conv->p->f, nil, ia, n);
-	}
-	return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
-	int n;
-	char *buf;
-	uchar a[4];
-
-	buf = smalloc(READSTR);
-	if(waserror()){
-		free(buf);
-		nexterror();
-	}
-	hnputl(a, fsip);
-	n = snprint(buf, READSTR, "fsip %15V\n", a);
-	hnputl(a, auip);
-	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
-	hnputl(a, gwip);
-	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
-	hnputl(a, ipmask);
-	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
-	hnputl(a, ipaddr);
-	n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
-	hnputl(a, dnsip);
-	snprint(buf + n, READSTR-n, "dnsip %15V\n", a);
-
-	len = readstr(offset, bp, len, buf);
-	poperror();
-	free(buf);
-	return len;
-}
-
-char*	(*bootp)(Ipifc*) = rbootp;
-int	(*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/il.c
+++ b/os/ip/il.c
@@ -189,7 +189,7 @@
 {
 	Ipht	ht;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 
 	ulong	csumerr;		/* checksum errors */
 	ulong	hlenerr;		/* header length error */
@@ -208,7 +208,7 @@
 
 
 void	ilrcvmsg(Conv*, Block*);
-void	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
 void	ilackq(Ilcb*, Block*);
 void	ilprocess(Conv*, Ilhdr*, Block*);
 void	ilpullup(Conv*);
@@ -251,6 +251,9 @@
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
+	if(c->ipversion != V4)
+		return "only IP version 4 supported";
+		
 	return ilstart(c, IL_CONNECT, fast);
 }
 
@@ -260,7 +263,7 @@
 	Ilcb *ic;
 
 	ic = (Ilcb*)(c->ptcl);
-	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
 		ilstates[ic->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -548,6 +551,9 @@
 
 	ih = (Ilhdr *)bp->rp;
 	plen = blocklen(bp);
+	if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+		goto raise;	/* ignore non V4 packets */
+
 	if(plen < IL_IPSIZE+IL_HDRSIZE){
 		netlog(il->f, Logil, "il: hlenerr\n");
 		ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
 		else
 			st = iltype[ih->iltype];
 		ipriv->stats[CsumErrs]++;
-		netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+		netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
 			csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
 		goto raise;
 	}
@@ -595,7 +601,7 @@
 			else
 				st = iltype[ih->iltype];
 			ilreject(il->f, ih);		/* no channel and not sync */
-			netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+			netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
 				st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); 
 			goto raise;
 		}
@@ -829,7 +835,7 @@
 
 	c = ic->conv;
 	id = nhgetl(h->ilid);
-	netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+	netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
 		ic->rexmit, ic->timeout,
 		c->raddr, c->lport, c->rport);
 
@@ -852,7 +858,7 @@
 	ic = (Ilcb*)s->ptcl;
 
 	USED(ic);
-	netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
 		ilstates[ic->state],  ic->rstart, ic->recvd, ic->start, 
 		ic->next, iltype[h->iltype], nhgetl(h->ilid), 
 		nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
 
 	_ilprocess(s, h, bp);
 
-	netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
 }
 
 void
@@ -917,17 +923,12 @@
 		bp->list = nil;
 		dlen = nhgets(oh->illen)-IL_HDRSIZE;
 		bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+			
 		/*
 		 * Upper levels don't know about multiple-block
 		 * messages so copy all into one (yick).
 		 */
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("ilpullup");
-		bp = packblock(bp);
-		if(bp == 0)
-			panic("ilpullup2");
-		qpass(s->rq, bp);
+		qpass(s->rq, packblock(concatblock(bp)));
 	}
 	qunlock(&ic->outo);
 }
@@ -948,7 +949,7 @@
 	id = nhgetl(h->ilid);
 	/* Window checks */
 	if(id <= ic->recvd || id > ic->recvd+ic->window) {
-		netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+		netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
 			id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
 		freeblist(bp);
 		return;
@@ -983,7 +984,7 @@
 	qunlock(&ic->outo);
 }
 
-void
+int
 ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
 {
 	Ilhdr *ih;
@@ -1034,7 +1035,7 @@
 		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
 
 if(ipc==nil)
-	panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+	panic("ipc is nil caller is %#p", getcallerpc(&ipc));
 if(ipc->p==nil)
 	panic("ipc->p is nil");
 
@@ -1042,7 +1043,7 @@
 		iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), 
 		nhgets(ih->ilsrc), nhgets(ih->ildst));
 
-	ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+	return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
 }
 
 void
@@ -1145,6 +1146,8 @@
 
 	il = x;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Iltickms);
 	for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
 		qlock(&ipriv->apl);
 		if(ipriv->ackprocstarted == 0){
 			sprint(kpname, "#I%dilack", c->p->f->dev);
-			kproc(kpname, ilackproc, c->p, 0);
+			kproc(kpname, ilackproc, c->p);
 			ipriv->ackprocstarted = 1;
 		}
 		qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
 	case IL_CONNECT:
 		ic->state = Ilsyncer;
 		iphtadd(&ipriv->ht, c);
-		ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+		if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+			ilhangup(c, "no route");
 		break;
 	}
 
@@ -1332,6 +1336,8 @@
 		if(s->lport == psource)
 		if(ipcmp(s->laddr, source) == 0)
 		if(ipcmp(s->raddr, dest) == 0){
+			if(s->ignoreadvice)
+				break;
 			qunlock(il);
 			ic = (Ilcb*)s->ptcl;
 			switch(ic->state){
@@ -1380,12 +1386,6 @@
 	}
 }
 
-int
-ilgc(Proto *il)
-{
-	return natgc(il->ipproto);
-}
-
 void
 ilinit(Fs *f)
 {
@@ -1406,7 +1406,7 @@
 	il->advise = iladvise;
 	il->stats = ilxstats;
 	il->inuse = ilinuse;
-	il->gc = ilgc;
+	il->gc = nil;
 	il->ipproto = IP_ILPROTO;
 	il->nc = scalednconv();
 	il->ptclsize = sizeof(Ilcb);
--- a/os/ip/ip.c
+++ b/os/ip/ip.c
@@ -7,94 +7,6 @@
 
 #include	"ip.h"
 
-typedef struct IP		IP;
-typedef struct Fragment4	Fragment4;
-typedef struct Fragment6	Fragment6;
-typedef struct Ipfrag		Ipfrag;
-
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= 64*1024,	/* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp)	(((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
-
-	Nstats,
-};
-
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
-
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
-
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 static char *statnames[] =
 {
 [Forwarding]	"Forwarding",
@@ -118,45 +30,11 @@
 [FragCreates]	"FragCreates",
 };
 
-#define BLKIP(xp)	((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
+static Block*		ip4reassemble(IP*, int, Block*);
+static void		ipfragfree4(IP*, Fragment4*);
+static Fragment4*	ipfragallo4(IP*);
 
-ushort		ipcsum(uchar*);
-Block*		ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void		ipfragfree4(IP*, Fragment4*);
-Fragment4*	ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
-	V6params *v6p;
-
-	v6p = smalloc(sizeof(V6params));
-	
-	v6p->rp.mflag		= 0;		// default not managed
-	v6p->rp.oflag		= 0;
-	v6p->rp.maxraint	= 600000;	// millisecs
-	v6p->rp.minraint	= 200000;
-	v6p->rp.linkmtu		= 0;		// no mtu sent
-	v6p->rp.reachtime	= 0;
-	v6p->rp.rxmitra		= 0;
-	v6p->rp.ttl		= MAXTTL;
-	v6p->rp.routerlt	= 3*(v6p->rp.maxraint);	
-
-	v6p->hp.rxmithost	= 1000;		// v6 RETRANS_TIMER
-
-	v6p->cdrouter 		= -1;
-
-	f->v6p			= v6p;
-
-}
-
-void
+static void
 initfrag(IP *ip, int size)
 {
 	Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
 	IP *ip;
 
 	ip = smalloc(sizeof(IP));
+	ip->stats[DefaultTTL] = MAXTTL;
 	initfrag(ip, 100);
 	f->ip = ip;
 
@@ -202,11 +81,11 @@
 	if(f->ip->iprouting==0)
 		f->ip->stats[Forwarding] = 2;
 	else
-		f->ip->stats[Forwarding] = 1;	
+		f->ip->stats[Forwarding] = 1;
 }
 
 int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
 	Ipifc *ifc;
 	uchar *gate;
@@ -213,66 +92,41 @@
 	ulong fragoff;
 	Block *xp, *nb;
 	Ip4hdr *eh, *feh;
-	int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
-	Route *r, *sr;
+	int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+	Route *r;
 	IP *ip;
 	int rv = 0;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip4hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip4hdr*)bp->rp;
+	assert(BLEN(bp) >= IP4HDR);
 	len = blocklen(bp);
-
-	if(gating){
-		chunk = nhgets(eh->length);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk < len)
-			len = chunk;
-	}
 	if(len >= IP_MAX){
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v4lookup(f, eh->dst, c);
-	if(r == nil){
+	r = v4lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v4lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v4.gate;
 
-	if(!gating)
-		eh->vihl = IP_VER4|IP_HLEN4;
-	eh->ttl = ttl;
-	if(!gating)
-		eh->tos = tos;
-
-	if(!canrlock(ifc))
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
+	}
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
@@ -280,17 +134,18 @@
 	if(ifc->m == nil)
 		goto raise;
 
-	/* Output NAT */
-	if(nato(bp, ifc, f) != 0)
-		goto raise;
+	if(!gating){
+		eh->vihl = IP_VER4|IP_HLEN4;
+		eh->tos = tos;
+	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		if(!gating)
-			hnputs(eh->id, incref(&ip->id4));
 		hnputs(eh->length, len);
 		if(!gating){
+			hnputs(eh->id, incref(&ip->id4));
 			eh->frag[0] = 0;
 			eh->frag[1] = 0;
 		}
@@ -297,31 +152,31 @@
 		eh->cksum[0] = 0;
 		eh->cksum[1] = 0;
 		hnputs(eh->cksum, ipcsum(&eh->vihl));
-		ifc->m->bwrite(ifc, bp, V4, gate);
+
+		ipifcoput(ifc, bp, V4, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
 	if(eh->frag[0] & (IP_DF>>8)){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
 		icmpcantfrag(f, bp, medialen);
-		netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	seglen = (medialen - IP4HDR) & ~7;
+	hlen = (eh->vihl & 0xF)<<2;
+	seglen = (medialen - hlen) & ~7;
 	if(seglen < 8){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	dlen = len - IP4HDR;
+	dlen = len - hlen;
 	xp = bp;
 	if(gating)
 		lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
 	else
 		lid = incref(&ip->id4);
 
-	offset = IP4HDR;
-	while(xp != nil && offset && offset >= BLEN(xp)) {
+	offset = hlen;
+	while(offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
@@ -341,30 +196,30 @@
 		fragoff = 0;
 	dlen += fragoff;
 	for(; fragoff < dlen; fragoff += seglen) {
-		nb = allocb(IP4HDR+seglen);
-		feh = (Ip4hdr*)(nb->rp);
+		nb = allocb(hlen+seglen);
+		feh = (Ip4hdr*)nb->rp;
 
-		memmove(nb->wp, eh, IP4HDR);
-		nb->wp += IP4HDR;
+		memmove(nb->wp, eh, hlen);
+		nb->wp += hlen;
 
 		if((fragoff + seglen) >= dlen) {
 			seglen = dlen - fragoff;
 			hnputs(feh->frag, fragoff>>3);
 		}
-		else	
+		else
 			hnputs(feh->frag, (fragoff>>3)|IP_MF);
 
-		hnputs(feh->length, seglen + IP4HDR);
+		hnputs(feh->length, seglen + hlen);
 		hnputs(feh->id, lid);
 
 		/* Copy up the data area */
 		chunk = seglen;
 		while(chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk %d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -376,12 +231,13 @@
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
 				xp = xp->next;
-		} 
+		}
 
 		feh->cksum[0] = 0;
 		feh->cksum[1] = 0;
 		hnputs(feh->cksum, ipcsum(&feh->vihl));
-		ifc->m->bwrite(ifc, nb, V4, gate);
+
+		ipifcoput(ifc, nb, V4, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
 void
 ipiput4(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos, proto, olen;
+	int hl, len, hop, tos;
+	uchar v6dst[IPaddrlen];
+	ushort frag;
 	Ip4hdr *h;
 	Proto *p;
-	ushort frag;
-	int notforme;
-	uchar *dp, v6dst[IPaddrlen];
 	IP *ip;
-	Route *r;
 
-	if(BLKIPVER(bp) != IP_VER4) {
+	if((bp->rp[0]&0xF0) != IP_VER4) {
 		ipiput6(f, ifc, bp);
 		return;
 	}
@@ -430,58 +283,45 @@
 			return;
 	}
 
-	h = (Ip4hdr*)(bp->rp);
-
-	/* Input NAT */
-	nati(bp, ifc);
-
-	/* dump anything that whose header doesn't checksum */
+	h = (Ip4hdr*)bp->rp;
+	hl = (h->vihl & 0xF)<<2;
+	if(hl < IP4HDR || hl > BLEN(bp)) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+		goto drop;
+	}
 	if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
 		ip->stats[InHdrErrors]++;
-		netlog(f, Logip, "ip: checksum error %V\n", h->src);
-		freeblist(bp);
+		netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+		goto drop;
+	}
+	len = nhgets(h->length);
+	if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+		if(bp != nil)
+			goto drop;
 		return;
 	}
-	v4tov6(v6dst, h->dst);
-	notforme = ipforme(f, v6dst) == 0;
+	h = (Ip4hdr*)bp->rp;
 
-	/* Check header length and version */
-	if((h->vihl&0x0F) != IP_HLEN4) {
-		hl = (h->vihl&0xF)<<2;
-		if(hl < (IP_HLEN4<<2)) {
-			ip->stats[InHdrErrors]++;
-			netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
-			freeblist(bp);
-			return;
-		}
-	  /* If this is not routed strip off the options */
-		if(notforme == 0) {
-			olen = nhgets(h->length);
-			dp = bp->rp + (hl - (IP_HLEN4<<2));
-			memmove(dp, h, IP_HLEN4<<2);
-			bp->rp = dp;
-			h = (Ip4hdr*)(bp->rp);
-			h->vihl = (IP_VER4|IP_HLEN4);
-			hnputs(h->length, olen-hl+(IP_HLEN4<<2));
-		}
-	}
-
 	/* route */
-	if(notforme) {
-		Conv conv;
+	v4tov6(v6dst, h->dst);
+	if(!ipforme(f, v6dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
 
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
-		}
+		if(!ip->iprouting)
+			goto drop;
 
 		/* don't forward to source's network */
-		conv.r = nil;
-		r = v4lookup(f, h->dst, &conv);
-		if(r == nil || r->ifc == ifc){
+		rh.r = nil;
+		r = v4lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
 		hop = h->ttl;
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
-			icmpttlexceeded(f, ifc->lifc->local, bp);
-			freeblist(bp);
-			return;
+			icmpttlexceeded(f, ifc, bp);
+			goto drop;
 		}
 
 		/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
-		if(r->ifc->reassemble){
+		if(nifc->reassemble){
 			frag = nhgets(h->frag);
-			if(frag) {
-				h->tos = 0;
-				if(frag & IP_MF)
-					h->tos = 1;
-				bp = ip4reassemble(ip, frag, bp, h);
+			if(frag & (IP_MF|IP_FO)) {
+				bp = ip4reassemble(ip, frag, bp);
 				if(bp == nil)
 					return;
-				h = (Ip4hdr*)(bp->rp);
+				h = (Ip4hdr*)bp->rp;
 			}
 		}
 
@@ -511,27 +346,30 @@
 		ip->stats[ForwDatagrams]++;
 		tos = h->tos;
 		hop = h->ttl;
-		ipoput4(f, bp, 1, hop - 1, tos, &conv);
+		ipoput4(f, bp, 1, hop - 1, tos, &rh);
 		return;
 	}
 
+	/* If this is not routed strip off the options */
+	if(hl > IP4HDR) {
+		hl -= IP4HDR;
+		len -= hl;
+		bp->rp += hl;
+		memmove(bp->rp, h, IP4HDR);
+		h = (Ip4hdr*)bp->rp;
+		h->vihl = IP_VER4|IP_HLEN4;
+		hnputs(h->length, len);
+	}
+
 	frag = nhgets(h->frag);
-	if(frag) {
-		h->tos = 0;
-		if(frag & IP_MF)
-			h->tos = 1;
-		bp = ip4reassemble(ip, frag, bp, h);
+	if(frag & (IP_MF|IP_FO)) {
+		bp = ip4reassemble(ip, frag, bp);
 		if(bp == nil)
 			return;
-		h = (Ip4hdr*)(bp->rp);
+		h = (Ip4hdr*)bp->rp;
 	}
 
-	/* don't let any frag info go up the stack */
-	h->frag[0] = 0;
-	h->frag[1] = 0;
-
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
 	}
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -550,45 +389,43 @@
 	int i;
 
 	ip = f->ip;
-	ip->stats[DefaultTTL] = MAXTTL;
-
 	p = buf;
 	e = p+len;
-	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+	for(i = 0; i < Nipstats; i++)
+		p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
 	return p - buf;
 }
 
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
 {
-	int fend;
+	int ovlap, fragsize, len;
+	ulong src, dst;
 	ushort id;
+	Block *bl, **l, *prev;
 	Fragment4 *f, *fnext;
-	ulong src, dst;
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Ipfrag *fp, *fq;
+	Ip4hdr *ih;
 
+	/*
+	 *  block lists are too hard, concatblock into a single block
+	 */
+	bp = concatblock(bp);
+
+	ih = (Ip4hdr*)bp->rp;
 	src = nhgetl(ih->src);
 	dst = nhgetl(ih->dst);
 	id = nhgets(ih->id);
+	fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
 
-	/*
-	 *  block lists are too hard, pullupblock into a single block
-	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip4hdr*)(bp->rp);
-	}
-
 	qlock(&ip->fraglock4);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead4; f; f = fnext){
+	for(f = ip->flisthead4; f != nil; f = fnext){
 		fnext = f->next;	/* because ipfragfree4 changes the list */
-		if(f->src == src && f->dst == dst && f->id == id)
+		if(f->id == id && f->src == src && f->dst == dst)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+	if((offset & (IP_MF|IP_FO)) == 0) {
 		if(f != nil) {
-			ipfragfree4(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree4(ip, f);
 		}
 		qunlock(&ip->fraglock4);
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset<<3;
-	BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = (offset & IP_FO)<<3;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -627,8 +465,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock4);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock4);
+
 		return nil;
 	}
 
@@ -638,7 +477,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -645,15 +484,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock4);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -662,26 +502,26 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 		/* Take completely covered segments out */
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
 			if(ovlap <= 0)
 				break;
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
-				(*l)->rp += ovlap;
+			if(ovlap < fq->flen) {
+				/* move up ip header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
 				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -689,35 +529,50 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  without IP_MF set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-		if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
-			bl = f->blist;
-			len = nhgets(BLKIP(bl)->length);
-			bl->wp = bl->rp + len;
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += IP4HDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		ih = (Ip4hdr*)bl->rp;
+		if(ih->frag[0]&(IP_MF>>8))
+			continue;
 
-			bl = f->blist;
-			f->blist = nil;
+		bl = f->blist;
+		len = BLEN(bl);
+
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
+
+		if(len >= IP_MAX){
 			ipfragfree4(ip, f);
-			ih = BLKIP(bl);
-			hnputs(ih->length, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock4);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree4(ip, f);
+
+		ih = (Ip4hdr*)bl->rp;
+		ih->frag[0] = 0;
+		ih->frag[1] = 0;
+		hnputs(ih->length, len);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock4);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock4);
 	return nil;
@@ -726,20 +581,20 @@
 /*
  * ipfragfree4 - Free a list of fragments - assume hold fraglock4
  */
-void
+static void
 ipfragfree4(IP *ip, Fragment4 *frag)
 {
 	Fragment4 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	frag->src = 0;
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	frag->src = 0;
+	frag->dst = 0;
 
 	l = &ip->flisthead4;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -755,7 +610,7 @@
 /*
  * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
  */
-Fragment4 *
+static Fragment4*
 ipfragallo4(IP *ip)
 {
 	Fragment4 *f;
@@ -762,7 +617,7 @@
 
 	while(ip->fragfree4 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead4; f->next; f = f->next)
+		for(f = ip->flisthead4; f->next != nil; f = f->next)
 			;
 		ipfragfree4(ip, f);
 	}
--- a/os/ip/ip.h
+++ b/os/ip/ip.h
@@ -1,35 +1,33 @@
 typedef struct	Conv	Conv;
+typedef struct	Fragment4 Fragment4;
+typedef struct	Fragment6 Fragment6;
 typedef struct	Fs	Fs;
 typedef union	Hwaddr	Hwaddr;
 typedef struct	IP	IP;
 typedef struct	IPaux	IPaux;
+typedef struct	Ip4hdr	Ip4hdr;
+typedef struct	Ipfrag	Ipfrag;
 typedef struct	Ipself	Ipself;
 typedef struct	Ipselftab	Ipselftab;
 typedef struct	Iplink	Iplink;
 typedef struct	Iplifc	Iplifc;
 typedef struct	Ipmulti	Ipmulti;
-typedef struct	IProuter IProuter;
 typedef struct	Ipifc	Ipifc;
 typedef struct	Iphash	Iphash;
 typedef struct	Ipht	Ipht;
 typedef struct	Netlog	Netlog;
-typedef struct	Ifclog	Ifclog;
 typedef struct	Medium	Medium;
 typedef struct	Proto	Proto;
 typedef struct	Arpent	Arpent;
 typedef struct	Arp Arp;
 typedef struct	Route	Route;
+typedef struct	Routehint Routehint;
 
 typedef struct	Routerparams	Routerparams;
 typedef struct 	Hostparams	Hostparams;
-typedef struct 	V6router	V6router;
-typedef struct	V6params	V6params;
+typedef struct	v6params	v6params;
 
-typedef struct Ip4hdr     Ip4hdr;
-typedef struct Nat	Nat;
-
 #pragma incomplete Arp
-#pragma	incomplete Ifclog
 #pragma incomplete Ipself
 #pragma incomplete Ipselftab
 #pragma incomplete IP
@@ -39,10 +37,9 @@
 {
 	Addrlen=	64,
 	Maxproto=	20,
-	Nhash=		64,
-	Maxincall=	5,
-	Nchans=		16383,
-	MAClen=		16,		/* longest mac address */
+	Maxincall=	10,
+	Nchans=		1024,
+	MAClen=		8,		/* longest mac address */
 
 	MAXTTL=		255,
 	DFLTTOS=	0,
@@ -57,6 +54,12 @@
 	V6=		6,
 	IP_VER4= 	0x40,
 	IP_VER6=	0x60,
+	IP_HLEN4=	5,		/* v4: Header length in words */
+	IP_DF=		0x4000,		/* v4: Don't fragment */
+	IP_MF=		0x2000,		/* v4: More fragments */
+	IP_FO=		0x1fff,		/* v4: Fragment offset */
+	IP4HDR=		IP_HLEN4<<2,	/* sizeof(Ip4hdr) */
+	IP_MAX=		64*1024,	/* Max. Internet packet size, v4 & v6 */
 
 	/* 2^Lroot trees in the root table */
 	Lroot=		10,
@@ -73,6 +76,79 @@
 	Connected=	4,
 };
 
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nipstats,
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+	uchar	payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+	uvlong		stats[Nipstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
 /* on the wire packet header */
 struct Ip4hdr
 {
@@ -86,9 +162,14 @@
 	uchar	cksum[2];	/* Header checksum */
 	uchar	src[4];		/* IP source */
 	uchar	dst[4];		/* IP destination */
-	uchar	data[1];	/* start of data */
 };
 
+struct Routehint
+{
+	Route	*r;			/* last route used */
+	ulong	rgen;			/* routetable generation for *r */
+};
+
 /*
  *  one per conversation directory
  */
@@ -100,9 +181,9 @@
 	Proto*	p;
 
 	int	restricted;		/* remote port is restricted */
+	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 	uint	ttl;			/* max time to live */
 	uint	tos;			/* type of service */
-	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 
 	uchar	ipversion;
 	uchar	laddr[IPaddrlen];	/* local IP address */
@@ -139,8 +220,7 @@
 
 	void*	ptcl;			/* protocol specific stuff */
 
-	Route	*r;			/* last route used */
-	ulong	rgen;			/* routetable generation for *r */
+	Routehint;
 };
 
 struct Medium
@@ -161,18 +241,8 @@
 	/* process packets written to 'data' */
 	void	(*pktin)(Fs *f, Ipifc *ifc, Block *bp);
 
-	/* routes for router boards */
-	void	(*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
-	void	(*remroute)(Ipifc *ifc, int, uchar*, uchar*);
-	void	(*flushroutes)(Ipifc *ifc);
-
-	/* for routing multicast groups */
-	void	(*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
-	void	(*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
 	/* address resolution */
-	void	(*ares)(Fs*, int, uchar*, uchar*, int, int);	/* resolve */
-	void	(*areg)(Ipifc*, uchar*);			/* register */
+	void	(*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 
 	/* v6 address generation */
 	void	(*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
 	uchar	mask[IPaddrlen];
 	uchar	remote[IPaddrlen];
 	uchar	net[IPaddrlen];
+	uchar	type;		/* route type */
 	uchar	tentative;	/* =1 => v6 dup disc on, =0 => confirmed unique */
 	uchar	onlink;		/* =1 => onlink, =0 offlink. */
 	uchar	autoflag;	/* v6 autonomous flag */
-	long 	validlt;	/* v6 valid lifetime */
-	long 	preflt;		/* v6 preferred lifetime */
-	long	origint;	/* time when addr was added */
+	ulong 	validlt;	/* v6 valid lifetime */
+	ulong 	preflt;		/* v6 preferred lifetime */
+	ulong	origint;	/* time when addr was added */
 	Iplink	*link;		/* addresses linked to this lifc */
 	Iplifc	*next;
 };
@@ -203,25 +274,25 @@
 	Ipself	*self;
 	Iplifc	*lifc;
 	Iplink	*selflink;	/* next link for this local address */
-	Iplink	*lifclink;	/* next link for this ifc */
-	ulong	expire;
+	Iplink	*lifclink;	/* next link for this lifc */
 	Iplink	*next;		/* free list */
+	ulong	expire;
 	int	ref;
 };
 
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
 
 /* default values, one per stack */
 struct Routerparams {
-	int	mflag;
-	int	oflag;
-	int 	maxraint;
-	int	minraint;
-	int	linkmtu;
-	int	reachtime;
-	int	rxmitra;
-	int	ttl;
-	int	routerlt;	
+	int	mflag;		/* flag: managed address configuration */
+	int	oflag;		/* flag: other stateful configuration */
+	int 	maxraint;	/* max. router adv interval (ms) */
+	int	minraint;	/* min. router adv interval (ms) */
+	int	linkmtu;	/* mtu options */
+	int	reachtime;	/* reachable time */
+	int	rxmitra;	/* retransmit interval */
+	int	ttl;		/* cur hop count limit */
+	int	routerlt;	/* router lifetime */
 };
 
 struct Hostparams {
@@ -231,22 +302,18 @@
 struct Ipifc
 {
 	RWlock;
-	
+
 	Conv	*conv;		/* link to its conversation structure */
 	char	dev[64];	/* device we're attached to */
 	Medium	*m;		/* Media pointer */
 	int	maxtu;		/* Maximum transfer unit */
 	int	mintu;		/* Minumum tranfer unit */
-	int	mbps;		/* megabits per second */
 	void	*arg;		/* medium specific */
-	int	reassemble;	/* reassemble IP packets before forwarding */
 
-	/* these are used so that we can unbind on the fly */
-	Lock	idlock;
+	uchar	reflect;	/* allow forwarded packets to go out the same interface */
+	uchar	reassemble;	/* reassemble IP packets before forwarding to this interface */
+	
 	uchar	ifcid;		/* incremented each 'bind/unbind/add/remove' */
-	int	ref;		/* number of proc's using this ipifc */
-	Rendez	wait;		/* where unbinder waits for ref == 0 */
-	int	unbinding;
 
 	uchar	mac[MAClen];	/* MAC address */
 
@@ -255,10 +322,16 @@
 	ulong	in, out;	/* message statistics */
 	ulong	inerr, outerr;	/* ... */
 
-	uchar	sendra6;	/* == 1 => send router advs on this ifc	*/
-	uchar	recvra6;	/* == 1 => recv router advs on this ifc */
-	Routerparams rp;	/* router parameters as in RFC 2461, pp.40--43. 
+	uchar	sendra6;	/* flag: send router advs on this ifc */
+	uchar	recvra6;	/* flag: recv router advs on this ifc */
+	Routerparams rp;	/* router parameters as in RFC 2461, pp.40—43.
 					used only if node is router */
+
+	int	speed;		/* link speed in bits per second */
+	int	delay;		/* burst delay in ms */
+	int	burst;		/* burst delay in bytes */
+	int	load;		/* bytes in flight */
+	ulong	ticks;
 };
 
 /*
@@ -330,20 +403,11 @@
 	int		nc;		/* number of conversations */
 	int		ac;
 	Qid		qid;		/* qid for protocol directory */
-	ushort		nextport;
 	ushort		nextrport;
 
 	void		*priv;
 };
 
-/*
- *  Stream for sending packets to user level
- */
-struct IProuter {
-	QLock;
-	int	opens;
-	Queue	*q;
-};
 
 /*
  *  one per IP protocol stack
@@ -362,8 +426,7 @@
 	IP	*ip;
 	Ipselftab	*self;
 	Arp	*arp;
-	V6params	*v6p;
-	IProuter iprouter;
+	v6params	*v6p;
 
 	Route	*v4root[1<<Lroot];	/* v4 routing forest */
 	Route	*v6root[1<<Lroot];	/* v6 routing forest */
@@ -370,7 +433,6 @@
 	Route	*queue;			/* used as temp when reinjecting routes */
 
 	Netlog	*alog;
-	Ifclog	*ilog;
 
 	char	ndb[1024];		/* an ndb entry for this interface */
 	int	ndbvers;
@@ -377,23 +439,10 @@
 	long	ndbmtime;
 };
 
-/* one per default router known to host */
-struct V6router {
-	uchar	inuse;
-	Ipifc	*ifc;
-	int	ifcid;
-	uchar	routeraddr[IPaddrlen];
-	long	ltorigin;
-	Routerparams	rp;
-};
-
-struct V6params
+struct v6params
 {
 	Routerparams	rp;		/* v6 params, one copy per node now */
 	Hostparams	hp;
-	V6router	v6rlist[3];	/* max 3 default routers, currently */
-	int		cdrouter;	/* uses only v6rlist[cdrouter] if   */ 
-					/* cdrouter >= 0. */
 };
 
 
@@ -410,8 +459,7 @@
 char*	Fsstdbind(Conv*, char**, int);
 ulong	scalednconv(void);
 void	closeconv(Conv*);
-
-/* 
+/*
  *  logging
  */
 enum
@@ -434,7 +482,6 @@
 	Logrudpmsg=	1<<16,
 	Logesp=		1<<17,
 	Logtcpwin=	1<<18,
-	Lognat=		1<<19,
 };
 
 void	netloginit(Fs*);
@@ -449,17 +496,17 @@
 void	ifclogopen(Fs*, Chan*);
 void	ifclogclose(Fs*, Chan*);
 
+#pragma varargck argpos netlog	3
+
 /*
  *  iproute.c
  */
 typedef	struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
 typedef struct V4route V4route;
 typedef struct V6route V6route;
 
 enum
 {
-
 	/* type bits */
 	Rv4=		(1<<0),		/* this is a version 4 route */
 	Rifc=		(1<<1),		/* this route is a directly connected interface */
@@ -468,27 +515,18 @@
 	Rbcast=		(1<<4),		/* a broadcast self address */
 	Rmulti=		(1<<5),		/* a multicast self address */
 	Rproxy=		(1<<6),		/* this route should be proxied */
+	Rsrc=		(1<<7),		/* source specific route */
 };
 
-struct Routewalk
-{
-	int	o;
-	int	h;
-	char*	p;
-	char*	e;
-	void*	state;
-	void	(*walk)(Route*, Routewalk*);
-};
-
 struct	RouteTree
 {
-	Route*	right;
-	Route*	left;
-	Route*	mid;
+	Route	*mid;
+	Route	*left;
+	Route	*right;
+	Ipifc	*ifc;
+	uchar	ifcid;		/* must match ifc->id */
 	uchar	depth;
 	uchar	type;
-	uchar	ifcid;		/* must match ifc->id */
-	Ipifc	*ifc;
 	char	tag[4];
 	int	ref;
 };
@@ -497,6 +535,10 @@
 {
 	ulong	address;
 	ulong	endaddress;
+
+	ulong	source;
+	ulong	endsource;
+
 	uchar	gate[IPv4addrlen];
 };
 
@@ -504,6 +546,10 @@
 {
 	ulong	address[IPllen];
 	ulong	endaddress[IPllen];
+
+	ulong	source[IPllen];
+	ulong	endsource[IPllen];
+
 	uchar	gate[IPaddrlen];
 };
 
@@ -516,17 +562,16 @@
 		V4route v4;
 	};
 };
-extern void	v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void	v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route*	v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route*	v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void	addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void	remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route*	v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v4source(Fs *f, uchar *a, uchar *s);
+extern Route*	v6source(Fs *f, uchar *a, uchar *s);
 extern long	routeread(Fs *f, char*, ulong, int);
 extern long	routewrite(Fs *f, Chan*, char*, int);
-extern void	routetype(int, char*);
-extern void	ipwalkroutes(Fs*, Routewalk*);
-extern void	convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void	routetype(int type, char p[8]);
 
 /*
  *  devip.c
@@ -543,7 +588,6 @@
 };
 
 extern IPaux*	newipaux(char*, char*);
-extern void	setlport(Conv*);
 
 /*
  *  arp.c
@@ -552,18 +596,16 @@
 {
 	uchar	ip[IPaddrlen];
 	uchar	mac[MAClen];
-	Medium	*type;			/* media type */
-	Arpent*	hash;
-	Block*	hold;
-	Block*	last;
-	uint	ctime;			/* time entry was created or refreshed */
-	uint	utime;			/* time entry was last used */
-	uchar	state;
+	Arpent	*hash;
 	Arpent	*nextrxt;		/* re-transmit chain */
-	uint	rtime;			/* time for next retransmission */
-	uchar	rxtsrem;
+	Block	*hold;
+	Block	*last;
 	Ipifc	*ifc;
 	uchar	ifcid;			/* must match ifc->id */
+	uchar	state;
+	uchar	rxtsrem;		/* re-tranmissions remaining */
+	ulong	ctime;			/* time entry was created or refreshed */
+	ulong	utime;			/* time entry was last used */
 };
 
 extern void	arpinit(Fs*);
@@ -572,15 +614,17 @@
 extern Arpent*	arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
 extern void	arprelease(Arp*, Arpent *a);
 extern Block*	arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void	arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int	arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void	ndpsendsol(Fs*, Ipifc*, Arpent*);
 
 /*
  * ipaux.c
  */
 
-extern int	myetheraddr(uchar*, char*);
-extern ulong	parseip(uchar*, char*);
-extern ulong	parseipmask(uchar*, char*);
+extern int	parseether(uchar*, char*);
+extern vlong	parseip(uchar*, char*);
+extern vlong	parseipmask(uchar*, char*, int);
+extern vlong	parseipandmask(uchar*, uchar*, char*, char*);
 extern char*	v4parseip(uchar*, char*);
 extern void	maskip(uchar *from, uchar *mask, uchar *to);
 extern int	parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
 extern void	v4tov6(uchar *v6, uchar *v4);
 extern int	v6tov4(uchar *v4, uchar *v6);
 extern int	eipfmt(Fmt*);
+extern int	convipvers(Conv *c);
 
 #define	ipmove(x, y) memmove(x, y, IPaddrlen)
 #define	ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
- 
-#define	ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define	ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
 
 extern uchar IPv4bcast[IPaddrlen];
 extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
 extern Medium	ethermedium;
 extern Medium	nullmedium;
 extern Medium	pktmedium;
-extern Medium	tripmedium;
 
 /*
  *  ipifc.c
@@ -619,33 +660,24 @@
  */
 extern Medium*	ipfindmedium(char *name);
 extern void	addipmedium(Medium *med);
+extern void	ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
 extern int	ipforme(Fs*, uchar *addr);
-extern int	iptentative(Fs*, uchar *addr);
-extern int	ipisbm(uchar *);
-extern int	ipismulticast(uchar *);
-extern Ipifc*	findipifc(Fs*, uchar *remote, int type);
-extern void	findprimaryip(Fs*, uchar*);
+extern int	ipismulticast(uchar *ip);
+extern Ipifc*	findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc*	findipifcstr(Fs *f, char *s);
 extern void	findlocalip(Fs*, uchar *local, uchar *remote);
-extern int	ipv4local(Ipifc *ifc, uchar *addr);
-extern int	ipv6local(Ipifc *ifc, uchar *addr);
-extern int	ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int	ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int	ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
 extern Iplifc*	iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc*	ipremoteonifc(Ipifc *ifc, uchar *ip);
 extern int	ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int	ipismulticast(uchar *ip);
-extern int	ipisbooting(void);
-extern int	ipifccheckin(Ipifc *ifc, Medium *med);
-extern void	ipifccheckout(Ipifc *ifc);
-extern int	ipifcgrab(Ipifc *ifc);
-extern void	ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void	ipifcremroute(Fs*, int, uchar*, uchar*);
 extern void	ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
 extern void	ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
 extern char*	ipifcrem(Ipifc *ifc, char **argv, int argc);
 extern char*	ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
 extern long	ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char*	ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void	ipsendra6(Fs *f, int on);
-
+extern char*	ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char*	ipifcremove6(Ipifc *ifc, char**argv, int argc);
 /*
  *  ip.c
  */
@@ -652,37 +684,26 @@
 extern void	iprouting(Fs*, int);
 extern void	icmpnoconv(Fs*, Block*);
 extern void	icmpcantfrag(Fs*, Block*, int);
-extern void	icmpttlexceeded(Fs*, uchar*, Block*);
+extern void	icmpttlexceeded(Fs*, Ipifc*, Block*);
 extern ushort	ipcsum(uchar*);
 extern void	ipiput4(Fs*, Ipifc*, Block*);
 extern void	ipiput6(Fs*, Ipifc*, Block*);
-extern int	ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int	ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int	ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int	ipoput6(Fs*, Block*, int, int, int, Routehint*);
 extern int	ipstats(Fs*, char*, int);
 extern ushort	ptclbsum(uchar*, int);
 extern ushort	ptclcsum(Block*, int, int);
 extern void	ip_init(Fs*);
-extern void	update_mtucache(uchar*, ulong);
-extern ulong	restrict_mtu(uchar*, ulong);
+extern void	ip_init_6(Fs*);
 
 /*
  * bootp.c
  */
-char*	(*bootp)(Ipifc*);
-int	(*bootpread)(char*, ulong, int);
+extern int	bootpread(char*, ulong, int);
 
 /*
- *  iprouter.c
- */
-void	useriprouter(Fs*, Ipifc*, Block*);
-void	iprouteropen(Fs*);
-void	iprouterclose(Fs*);
-long	iprouterread(Fs*, void*, int);
-
-/*
  *  resolving inferno/plan9 differences
  */
-Chan*		commonfdtochan(int, int, int, int);
 char*		commonuser(void);
 char*		commonerror(void);
 
@@ -695,15 +716,3 @@
  *  global to all of the stack
  */
 extern void	(*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int	nato(Block*, Ipifc*, Fs*);
-extern void	nati(Block*, Ipifc*);
-extern int	natgc(uchar);
-
-extern int	addnataddr(uchar*, uchar*, Iplifc*);
-extern int	removenataddr(uchar*, uchar*, Iplifc*);
-extern void	shownataddr(void);
-extern void flushnataddr(void);
--- a/os/ip/ipaux.c
+++ b/os/ip/ipaux.c
@@ -5,49 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 #include	"ip.h"
-#include  "ipv6.h"
+#include	"ipv6.h"
 
-/*
- *  well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- *  prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0, 0, 0, 0
-};
-
-
 char *v6hdrtypes[Maxhdrtype] =
 {
 	[HBH]		"HopbyHop",
@@ -54,7 +13,7 @@
 	[ICMP]		"ICMP",
 	[IGMP]		"IGMP",
 	[GGP]		"GGP",
-	[IPINIP]		"IP",
+	[IPINIP]	"IP",
 	[ST]		"ST",
 	[TCP]		"TCP",
 	[UDP]		"UDP",
@@ -87,6 +46,7 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+
 uchar v6linklocal[IPaddrlen] = {
 	0xfe, 0x80, 0, 0,
 	0, 0, 0, 0,
@@ -99,26 +59,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6llpreflen = 8;	// link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
-	0xfe, 0xc0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6slpreflen = 6;	// site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
-	0x08, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
+int v6llpreflen = 8;	/* link-local prefix length in bytes */
+
 uchar v6multicast[IPaddrlen] = {
 	0xff, 0, 0, 0,
 	0, 0, 0, 0,
@@ -131,7 +73,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6mcpreflen = 1;	// multicast prefix length
+int v6mcpreflen = 1;	/* multicast prefix length */
+
 uchar v6allnodesN[IPaddrlen] = {
 	0xff, 0x01, 0, 0,
 	0, 0, 0, 0,
@@ -138,6 +81,12 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+uchar v6allroutersN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
 uchar v6allnodesNmask[IPaddrlen] = {
 	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
@@ -144,7 +93,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6aNpreflen = 2;	// all nodes (N) prefix
+int v6aNpreflen = 2;	/* all nodes (N) prefix */
+
 uchar v6allnodesL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -151,19 +101,6 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
-uchar v6allnodesLmask[IPaddrlen] = {
-	0xff, 0xff, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6aLpreflen = 2;	// all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
-	0xff, 0x01, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0x02
-};
 uchar v6allroutersL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -170,12 +107,14 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x02
 };
-uchar v6allroutersS[IPaddrlen] = {
-	0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
 	0, 0, 0, 0,
-	0, 0, 0, 0x02
+	0, 0, 0, 0
 };
+int v6aLpreflen = 2;	/* all nodes (L) prefix */
+
 uchar v6solicitednode[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -190,9 +129,6 @@
 };
 int v6snpreflen = 13;
 
-
-
-
 ushort
 ptclcsum(Block *bp, int offset, int len)
 {
@@ -215,7 +151,7 @@
 	if(bp->next == nil) {
 		if(blocklen < len)
 			len = blocklen;
-		return ~ptclbsum(addr, len) & 0xffff;
+		return ptclbsum(addr, len) ^ 0xffff;
 	}
 
 	losum = 0;
@@ -247,7 +183,7 @@
 	while((csum = losum>>16) != 0)
 		losum = csum + (losum & 0xffff);
 
-	return ~losum & 0xffff;
+	return losum ^ 0xffff;
 }
 
 enum
@@ -255,306 +191,9 @@
 	Isprefix= 16,
 };
 
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
-	char buf[5*8];
-	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
-	static char *ifmt = "%d.%d.%d.%d";
-	uchar *p, ip[16];
-	ulong *lp;
-	ushort s;
-	int i, j, n, eln, eli;
-
-	switch(f->r) {
-	case 'E':		/* Ethernet address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
-	case 'I':		/* Ip address */
-		p = va_arg(f->args, uchar*);
-common:
-		if(memcmp(p, v4prefix, 12) == 0)
-			return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
-		/* find longest elision */
-		eln = eli = -1;
-		for(i = 0; i < 16; i += 2){
-			for(j = i; j < 16; j += 2)
-				if(p[j] != 0 || p[j+1] != 0)
-					break;
-			if(j > i && j - i > eln){
-				eli = i;
-				eln = j - i;
-			}
-		}
-
-		/* print with possible elision */
-		n = 0;
-		for(i = 0; i < 16; i += 2){
-			if(i == eli){
-				n += sprint(buf+n, "::");
-				i += eln;
-				if(i >= 16)
-					break;
-			} else if(i != 0)
-				n += sprint(buf+n, ":");
-			s = (p[i]<<8) + p[i+1];
-			n += sprint(buf+n, "%ux", s);
-		}
-		return fmtstrcpy(f, buf);
-
-	case 'i':		/* v6 address as 4 longs */
-		lp = va_arg(f->args, ulong*);
-		for(i = 0; i < 4; i++)
-			hnputl(ip+4*i, *lp++);
-		p = ip;
-		goto common;
-
-	case 'V':		/* v4 ip address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
-	case 'M':		/* ip mask */
-		p = va_arg(f->args, uchar*);
-
-		/* look for a prefix mask */
-		for(i = 0; i < 16; i++)
-			if(p[i] != 0xff)
-				break;
-		if(i < 16){
-			if((prefixvals[p[i]] & Isprefix) == 0)
-				goto common;
-			for(j = i+1; j < 16; j++)
-				if(p[j] != 0)
-					goto common;
-			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
-		} else
-			n = 8*16;
-
-		/* got one, use /xx format */
-		return fmtprint(f, "/%d", n);
-	}
-	return fmtstrcpy(f, "(eipfmt)");
-}
-
 #define CLASS(p) ((*(uchar*)(p))>>6)
 
-extern char*
-v4parseip(uchar *to, char *from)
-{
-	int i;
-	char *p;
-
-	p = from;
-	for(i = 0; i < 4 && *p; i++){
-		to[i] = strtoul(p, &p, 0);
-		if(*p == '.')
-			p++;
-	}
-	switch(CLASS(to)){
-	case 0:	/* class A - 1 uchar net */
-	case 1:
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = to[1];
-			to[1] = 0;
-		} else if(i == 2){
-			to[3] = to[1];
-			to[1] = 0;
-		}
-		break;
-	case 2:	/* class B - 2 uchar net */
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = 0;
-		}
-		break;
-	}
-	return p;
-}
-
-int
-isv4(uchar *ip)
-{
-	return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- *  the following routines are unrolled with no memset's to speed
- *  up the usual case
- */
 void
-v4tov6(uchar *v6, uchar *v4)
-{
-	v6[0] = 0;
-	v6[1] = 0;
-	v6[2] = 0;
-	v6[3] = 0;
-	v6[4] = 0;
-	v6[5] = 0;
-	v6[6] = 0;
-	v6[7] = 0;
-	v6[8] = 0;
-	v6[9] = 0;
-	v6[10] = 0xff;
-	v6[11] = 0xff;
-	v6[12] = v4[0];
-	v6[13] = v4[1];
-	v6[14] = v4[2];
-	v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
-	if(v6[0] == 0
-	&& v6[1] == 0
-	&& v6[2] == 0
-	&& v6[3] == 0
-	&& v6[4] == 0
-	&& v6[5] == 0
-	&& v6[6] == 0
-	&& v6[7] == 0
-	&& v6[8] == 0
-	&& v6[9] == 0
-	&& v6[10] == 0xff
-	&& v6[11] == 0xff)
-	{
-		v4[0] = v6[12];
-		v4[1] = v6[13];
-		v4[2] = v6[14];
-		v4[3] = v6[15];
-		return 0;
-	} else {
-		memset(v4, 0, 4);
-		return -1;
-	}
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
-	int i, elipsis = 0, v4 = 1;
-	ulong x;
-	char *p, *op;
-
-	memset(to, 0, IPaddrlen);
-	p = from;
-	for(i = 0; i < 16 && *p; i+=2){
-		op = p;
-		x = strtoul(p, &p, 16);
-		if(*p == '.' || (*p == 0 && i == 0)){
-			p = v4parseip(to+i, op);
-			i += 4;
-			break;
-		} else {
-			to[i] = x>>8;
-			to[i+1] = x;
-		}
-		if(*p == ':'){
-			v4 = 0;
-			if(*++p == ':'){
-				elipsis = i+2;
-				p++;
-			}
-		}
-	}
-	if(i < 16){
-		memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
-		memset(&to[elipsis], 0, 16-i);
-	}
-	if(v4){
-		to[10] = to[11] = 0xff;
-		return nhgetl(to+12);
-	} else
-		return 6;
-}
-
-/*
- *  hack to allow ip v4 masks to be entered in the old
- *  style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
-	ulong x;
-	int i;
-	uchar *p;
-
-	if(*from == '/'){
-		/* as a number of prefix bits */
-		i = atoi(from+1);
-		if(i < 0)
-			i = 0;
-		if(i > 128)
-			i = 128;
-		memset(to, 0, IPaddrlen);
-		for(p = to; i >= 8; i -= 8)
-			*p++ = 0xff;
-		if(i > 0)
-			*p = ~((1<<(8-i))-1);
-		x = nhgetl(to+IPv4off);
-	} else {
-		/* as a straight bit mask */
-		x = parseip(to, from);
-		if(memcmp(to, v4prefix, IPv4off) == 0)
-			memset(to, 0xff, IPv4off);
-	}
-	return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
-	int i;
-
-	for(i = 0; i < IPaddrlen; i++)
-		to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
-	if(isv4(ip))
-		return classmask[ip[IPv4off]>>6];
-	else {
-		if(ipcmp(ip, v6loopback) == 0)
-			return IPallbits;
-		else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
-			return v6linklocalmask;
-		else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
-			return v6sitelocalmask;
-		else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
-			return v6solicitednodemask;
-		else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
-			return v6multicastmask;
-		return IPallbits;
-	}
-}
-
-void
 ipv62smcast(uchar *smcast, uchar *a)
 {
 	assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
 ulong
 iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
 {
-	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
 }
 
 void
@@ -678,7 +317,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address and port */
 	hv = iphash(IPnoaddr, 0, da, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match just port */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address */
 	hv = iphash(IPnoaddr, 0, da, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
 			return c;
 		}
 	}
-	
+
 	/* look for something that matches anything */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
 	}
 	unlock(ht);
 	return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+	if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+		return V4;
+	else
+		return V6;
 }
--- a/os/ip/ipifc.c
+++ b/os/ip/ipifc.c
@@ -11,17 +11,14 @@
 #define DPRINT if(0)print
 
 enum {
-	Maxmedia = 32,
-	Nself = Maxmedia*5,
-	NHASH = (1<<6),
-	NCACHE = 256,
-	QMAX = 64*1024-1,
+	Maxmedia	= 32,
+	Nself		= Maxmedia*5,
+	NHASH		= 1<<6,
+	NCACHE		= 256,
+	QMAX		= 192*1024-1,
 };
 
-Medium *media[Maxmedia] =
-{
-	0
-};
+Medium *media[Maxmedia] = { 0 };
 
 /*
  *  cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
 struct Ipself
 {
 	uchar	a[IPaddrlen];
-	Ipself	*hnext;		/* next address in the hash table */
+	Ipself	*next;		/* next address in the hash table */
 	Iplink	*link;		/* binding twixt Ipself and Ipifc */
 	ulong	expire;
 	uchar	type;		/* type of address */
-	int	ref;
-	Ipself	*next;		/* free list */
 };
 
 struct Ipselftab
@@ -64,11 +59,47 @@
 
 static void	addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
 static void	remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char*	ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char*	ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char*	ipifcremlifc(Ipifc*, Iplifc*);
+static void	ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char*	ipifcremlifc(Ipifc*, Iplifc**);
 
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+	unknownv6,		/* UGH */
+	unspecifiedv6,
+	linklocalv6,
+	globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+	if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+		return unknownv6;
+	else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+	    isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+		return linklocalv6;
+	else
+		return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+	int i, c;
+
+	for(i = 0; i < n; i++){
+		if((c = a[i] ^ b[i]) == 0)
+			continue;
+		for(i <<= 3; (c & 0x80) == 0; i++)
+			c <<= 1;
+		return i;
+	}
+	return i << 3;
+}
+
 /*
  *  link in a new medium
  */
@@ -121,7 +152,7 @@
 	wlock(ifc);
 	if(ifc->m != nil){
 		wunlock(ifc);
-		return "interface already bound";	
+		return Ebound;
 	}
 	if(waserror()){
 		wunlock(ifc);
@@ -142,18 +173,14 @@
 	ifc->m = m;
 	ifc->mintu = ifc->m->mintu;
 	ifc->maxtu = ifc->m->maxtu;
+	ifc->delay = 40;
+	ifc->speed = 0;
 	if(ifc->m->unbindonclose == 0)
 		ifc->conv->inuse++;
-	ifc->rp.mflag = 0;		// default not managed
-	ifc->rp.oflag = 0;
-	ifc->rp.maxraint = 600000;	// millisecs
-	ifc->rp.minraint = 200000;
-	ifc->rp.linkmtu = 0;		// no mtu sent
-	ifc->rp.reachtime = 0;
-	ifc->rp.rxmitra = 0;
-	ifc->rp.ttl = MAXTTL;
-	ifc->rp.routerlt = 3*(ifc->rp.maxraint);
 
+	/* default router paramters */
+	ifc->rp = c->p->f->v6p->rp;
+
 	/* any ancillary structures (like routes) no longer pertain */
 	ifc->ifcid++;
 
@@ -170,29 +197,44 @@
 
 /*
  *  detach a device from an interface, close the interface
- *  called with ifc->conv closed
  */
 static char*
 ipifcunbind(Ipifc *ifc)
 {
-	char *err;
+	Medium *m;
 
-	if(waserror()){
+	wlock(ifc);
+	m = ifc->m;
+	if(m == nil){
 		wunlock(ifc);
-		nexterror();
+		return Eunbound;
 	}
-	wlock(ifc);
 
-	/* dissociate routes */
-	if(ifc->m != nil && ifc->m->unbindonclose == 0)
-		ifc->conv->inuse--;
-	ifc->ifcid++;
+	/* disassociate logical interfaces (before zeroing ifc->arg) */
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 
 	/* disassociate device */
-	if(ifc->m != nil && ifc->m->unbind)
-		(*ifc->m->unbind)(ifc);
+	if(m->unbind != nil){
+		extern Medium nullmedium;
+
+		/*
+		 * unbind() might unlock the ifc, so change the medium
+		 * to the nullmedium to prevent packets from getting
+		 * sent while the medium is shutting down.
+		 */
+		ifc->m = &nullmedium;
+
+		if(!waserror()){
+			(*m->unbind)(ifc);
+			poperror();
+		}
+	}
+
 	memset(ifc->dev, 0, sizeof(ifc->dev));
 	ifc->arg = nil;
+
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 
 	/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
 	qclose(ifc->conv->wq);
 	qclose(ifc->conv->sq);
 
-	/* disassociate logical interfaces */
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
-
+	/* dissociate routes */
+	ifc->ifcid++;
+	if(m->unbindonclose == 0)
+		ifc->conv->inuse--;
 	ifc->m = nil;
 	wunlock(ifc);
-	poperror();
+
 	return nil;
 }
 
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
 
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
 char slineformat[] = "	%-40I %-10M %-40I %-12lud %-12lud\n";
 
-
 static int
 ipifcstate(Conv *c, char *state, int n)
 {
@@ -228,19 +266,18 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
 	m = snprint(state, n, sfixedformat,
 		ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
 		ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
 		ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
 		ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
-		ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+		ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+		ifc->speed, ifc->delay);
 
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
-		m += snprint(state+m, n - m, slineformat,
-			lifc->local, lifc->mask, lifc->remote,
-			lifc->validlt, lifc->preflt);
+	for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+		m += snprint(state+m, n - m, slineformat, lifc->local,
+			lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
 	if(ifc->lifc == nil)
 		m += snprint(state+m, n - m, "\n");
 	runlock(ifc);
@@ -256,13 +293,11 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
-	m = 0;
-
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+	m = 0;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 		m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
-		for(link = lifc->link; link; link = link->lifclink)
+		for(link = lifc->link; link != nil; link = link->lifclink)
 			m += snprint(state+m, n - m, " %-40.40I", link->self->a);
 		m += snprint(state+m, n - m, "\n");
 	}
@@ -279,6 +314,59 @@
 	return ifc->m != nil;
 }
 
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+	int burst;
+
+	burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+	if(burst < ifc->maxtu)
+		burst = ifc->maxtu;
+	ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+	if(delay < 0)
+		delay = 0;
+	else if(delay > 1000)
+		delay = 1000;
+	ifc->delay = delay;
+	ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+	if(speed < 0)
+		speed = 0;
+	ifc->speed = speed;
+	ifc->load = 0;
+	ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	if(ifc->speed){
+		ulong now = MACHP(0)->ticks;
+		int dt = TK2MS(now - ifc->ticks);
+		ifc->ticks = now;
+		ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+		if(ifc->load < 0 || dt < 0 || dt > 1000)
+			ifc->load = 0;
+		else if(ifc->load > ifc->burst){
+			freeblist(bp);
+			return;
+		}
+	}
+	bp = concatblock(bp);
+	ifc->load += BLEN(bp);
+	ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
 /*
  *  called when a process writes to an interface's 'data'
  */
@@ -294,18 +382,15 @@
 		return;
 
 	ifc = (Ipifc*)c->ptcl;
-	if(!canrlock(ifc)){
-		freeb(bp);
-		return;
-	}
+	rlock(ifc);
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
-	if(ifc->m == nil || ifc->m->pktin == nil)
-		freeb(bp);
-	else
+	if(ifc->m != nil && ifc->m->pktin != nil)
 		(*ifc->m->pktin)(c->p->f, ifc, bp);
+	else
+		freeb(bp);
 	runlock(ifc);
 	poperror();
 }
@@ -319,27 +404,26 @@
 	Ipifc *ifc;
 
 	c->rq = qopen(QMAX, 0, 0, 0);
-	c->sq = qopen(2*QMAX, 0, 0, 0);
 	c->wq = qopen(QMAX, Qkick, ipifckick, c);
+	c->sq = qopen(QMAX, 0, 0, 0);
+	if(c->rq == nil || c->wq == nil || c->sq == nil)
+		error(Enomem);
 	ifc = (Ipifc*)c->ptcl;
 	ifc->conv = c;
-	ifc->unbinding = 0;
 	ifc->m = nil;
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 }
 
 /*
  *  called after last close of ipifc data or ctl
- *  called with c locked, we must unlock
  */
 static void
 ipifcclose(Conv *c)
 {
-	Ipifc *ifc;
-	Medium *m;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
+	Medium *m = ifc->m;
 
-	ifc = (Ipifc*)c->ptcl;
-	m = ifc->m;
 	if(m != nil && m->unbindonclose)
 		ipifcunbind(ifc);
 }
@@ -347,19 +431,17 @@
 /*
  *  change an interface's mtu
  */
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
 {
-	int mtu;
+	Medium *m = ifc->m;
 
-	if(argc < 2)
+	if(m == nil)
+		return Eunbound;
+	if(mtu < m->mintu || mtu > m->maxtu)
 		return Ebadarg;
-	if(ifc->m == nil)
-		return Ebadarg;
-	mtu = strtoul(argv[1], 0, 0);
-	if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
-		return Ebadarg;
 	ifc->maxtu = mtu;
+	ipifcadjustburst(ifc);
 	return nil;
 }
 
@@ -374,13 +456,8 @@
 	Iplifc *lifc, **l;
 	int i, type, mtu;
 	Fs *f;
-	int sendnbrdisc = 0;
 
-	if(ifc->m == nil)
-		return "ipifc not yet bound to device";
-
-	f = ifc->conv->p->f;
-
+	mtu = 0;
 	type = Rifc;
 	memset(ip, 0, IPaddrlen);
 	memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
 		/* fall through */
 	case 5:
 		mtu = strtoul(argv[4], 0, 0);
-		if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
-			ifc->maxtu = mtu;
 		/* fall through */
 	case 4:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
-		parseip(rem, argv[3]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+			return Ebadip;
 		maskip(rem, mask, net);
 		break;
 	case 3:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+			return Ebadip;
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
 		break;
 	case 2:
-		parseip(ip, argv[1]);
+		if (parseip(ip, argv[1]) == -1)
+			return Ebadip;
 		memmove(mask, defmask(ip), IPaddrlen);
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
@@ -415,26 +490,55 @@
 		break;
 	default:
 		return Ebadarg;
-		break;
 	}
-	if(isv4(ip))
+
+	/* check for point-to-point interface */
+	if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+	if(ipcmp(mask, IPallbits) == 0)
+		type |= Rptpt;
+
+	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+		type |= Rv4;
 		tentative = 0;
+	}
+
 	wlock(ifc);
+	if(ifc->m == nil){
+		wunlock(ifc);
+		return Eunbound;
+	}
+	f = ifc->conv->p->f;
+	if(waserror()){
+		wunlock(ifc);
+		return up->errstr;
+	}
 
+	if(mtu > 0)
+		ipifcsetmtu(ifc, mtu);
+
 	/* ignore if this is already a local address for this ifc */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, ip) == 0) {
-			if(lifc->tentative != tentative)
-				lifc->tentative = tentative;
-			if(lifcp != nil) {
-				lifc->onlink = lifcp->onlink;
-				lifc->autoflag = lifcp->autoflag;
-				lifc->validlt = lifcp->validlt;
-				lifc->preflt = lifcp->preflt;
-				lifc->origint = lifcp->origint;
+	if((lifc = iplocalonifc(ifc, ip)) != nil){
+		if(lifcp != nil) {
+			if(!lifc->onlink && lifcp->onlink){
+				lifc->onlink = 1;
+				addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+					lifc->remote, lifc->type, ifc, tifc);
+				if(v6addrtype(ip) != linklocalv6)
+					addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+						lifc->remote, lifc->type, ifc, tifc);
 			}
-			goto out;
+			lifc->autoflag = lifcp->autoflag;
+			lifc->validlt = lifcp->validlt;
+			lifc->preflt = lifcp->preflt;
+			lifc->origint = lifcp->origint;
 		}
+		if(lifc->tentative != tentative){
+			lifc->tentative = tentative;
+			goto done;
+		}
+		wunlock(ifc);
+		poperror();
+		return nil;
 	}
 
 	/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
 	ipmove(lifc->mask, mask);
 	ipmove(lifc->remote, rem);
 	ipmove(lifc->net, net);
+	lifc->type = type;
 	lifc->tentative = tentative;
 	if(lifcp != nil) {
 		lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
 		lifc->validlt = lifcp->validlt;
 		lifc->preflt = lifcp->preflt;
 		lifc->origint = lifcp->origint;
+	} else {		/* default values */
+		lifc->onlink = lifc->autoflag = 1;
+		lifc->validlt = lifc->preflt = ~0UL;
+		lifc->origint = NOW / 1000;
 	}
-	else {		// default values
-		lifc->onlink = 1;
-		lifc->autoflag = 1;
-		lifc->validlt = 0xffffffff;
-		lifc->preflt = 0xffffffff;
-		lifc->origint = NOW / 10^3;
-	}
 	lifc->next = nil;
 
-	for(l = &ifc->lifc; *l; l = &(*l)->next)
+	for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
 		;
 	*l = lifc;
 
-	/* check for point-to-point interface */
-	if(ipcmp(ip, v6loopback))  /* skip v6 loopback, it's a special address */
-	if(ipcmp(mask, IPallbits) == 0)
-		type |= Rptpt;
+	/* add route for this logical interface */
+	if(lifc->onlink){
+		addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+		if(v6addrtype(ip) != linklocalv6)
+			addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+	}
 
-	/* add local routes */
-	if(isv4(ip))
-		v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
-	else
-		v6addroute(f, tifc, rem, mask, rem, type);
-
 	addselfcache(f, ifc, lifc, ip, Runi);
 
-	if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
-		ipifcregisterproxy(f, ifc, rem);
-		goto out;
+	/* register proxy */
+	if(type & Rptpt){
+		if(type & Rproxy)
+			ipifcregisterproxy(f, ifc, rem, 1);
+		goto done;
 	}
 
-	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+	if(type & Rv4) {
 		/* add subnet directed broadcast address to the self cache */
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) & mask[i];
 		addselfcache(f, ifc, lifc, bcast, Rbcast);
-		
+
 		addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
-	}
-	else {
+	} else {
 		if(ipcmp(ip, v6loopback) == 0) {
 			/* add node-local mcast address */
 			addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
 
 			/* add route for all node multicast */
-			v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+			addroute(f, v6allnodesN, v6allnodesNmask,
+				ip, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
 		}
 
 		/* add all nodes multicast address */
 		addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-		
+
 		/* add route for all nodes multicast */
-		v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-		
+		addroute(f, v6allnodesL, v6allnodesLmask,
+			ip, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
+
 		/* add solicited-node multicast address */
 		ipv62smcast(bcast, ip);
 		addselfcache(f, ifc, lifc, bcast, Rmulti);
-
-		sendnbrdisc = 1;
 	}
 
-	/* register the address on this network for address resolution */
-	if(isv4(ip) && ifc->m->areg != nil)
-		(*ifc->m->areg)(ifc, ip);
-
-out:
+done:
 	wunlock(ifc);
-	if(tentative && sendnbrdisc)
-		icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+
+	rlock(ifc);
+	ipifcregisteraddr(f, ifc, lifc, ip);
+	runlock(ifc);
+
 	return nil;
 }
 
 /*
  *  remove a logical interface from an ifc
- *  always called with ifc wlock'd
+ *	called with ifc wlock'd
  */
 static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
 {
-	Iplifc **l;
-	Fs *f;
+	Iplifc *lifc = *l;
+	Fs *f = ifc->conv->p->f;
 
-	f = ifc->conv->p->f;
-
-	/*
-	 *  find address on this interface and remove from chain.
-	 *  for pt to pt we actually specify the remote address as the
-	 *  addresss to remove.
-	 */
-	for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
-		;
-	if(*l == nil)
+	if(lifc == nil)
 		return "address not on this interface";
 	*l = lifc->next;
 
 	/* disassociate any addresses */
-	while(lifc->link)
+	while(lifc->link != nil)
 		remselfcache(f, ifc, lifc, lifc->link->self->a);
 
 	/* remove the route for this logical interface */
-	if(isv4(lifc->local))
-		v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
-	else {
-		v6delroute(f, lifc->remote, lifc->mask, 1);
+	if(lifc->onlink){
+		remroute(f, lifc->remote, lifc->mask,
+			lifc->local, IPallbits,
+			lifc->remote, lifc->type, ifc, tifc);
+		if(v6addrtype(lifc->local) != linklocalv6)
+			remroute(f, lifc->remote, lifc->mask,
+				lifc->local, IPnoaddr,
+				lifc->remote, lifc->type, ifc, tifc);
+	}
+
+	/* unregister proxy */
+	if(lifc->type & Rptpt){
+		if(lifc->type & Rproxy)
+			ipifcregisterproxy(f, ifc, lifc->remote, 0);
+		goto done;
+	}
+
+	/* remove route for all nodes multicast */
+	if((lifc->type & Rv4) == 0){
 		if(ipcmp(lifc->local, v6loopback) == 0)
-			/* remove route for all node multicast */
-			v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
-		else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
-			/* remove route for all link multicast */
-			v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+			remroute(f, v6allnodesN, v6allnodesNmask,
+				lifc->local, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
+
+		remroute(f, v6allnodesL, v6allnodesLmask,
+			lifc->local, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
 	}
 
+done:
 	free(lifc);
 	return nil;
-
 }
 
 /*
  *  remove an address from an interface.
- *  called with c locked
  */
 char*
 ipifcrem(Ipifc *ifc, char **argv, int argc)
 {
-	uchar ip[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar rem[IPaddrlen];
-	Iplifc *lifc;
-	char *rv;
+	uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+	Iplifc *lifc, **l;
+	char *err;
 
 	if(argc < 3)
 		return Ebadarg;
-
-	parseip(ip, argv[1]);
-	parseipmask(mask, argv[2]);
+	if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+		return Ebadip;
 	if(argc < 4)
 		maskip(ip, mask, rem);
-	else
-		parseip(rem, argv[3]);
+	else if(parseip(rem, argv[3]) == -1)
+		return Ebadip;
 
-	wlock(ifc);
-
 	/*
 	 *  find address on this interface and remove from chain.
 	 *  for pt to pt we actually specify the remote address as the
 	 *  addresss to remove.
 	 */
+	wlock(ifc);
+	l = &ifc->lifc;
 	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
-		if (memcmp(ip, lifc->local, IPaddrlen) == 0
-		&& memcmp(mask, lifc->mask, IPaddrlen) == 0
-		&& memcmp(rem, lifc->remote, IPaddrlen) == 0)
+		if(ipcmp(ip, lifc->local) == 0
+		&& ipcmp(mask, lifc->mask) == 0
+		&& ipcmp(rem, lifc->remote) == 0)
 			break;
+		l = &lifc->next;
 	}
-
-	rv = ipifcremlifc(ifc, lifc);
+	err = ipifcremlifc(ifc, l);
 	wunlock(ifc);
-	return rv;
+	return err;
 }
 
 /*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->addroute != nil)
-				m->addroute(ifc, vers, addr, mask, gate, type);
-		}
-	}
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->remroute != nil)
-				m->remroute(ifc, vers, addr, mask);
-		}
-	}
-}
-
-/*
  *  associate an address with the interface.  This wipes out any previous
  *  addresses.  This is a macro that means, remove all the old interfaces
  *  and add a new one.
@@ -679,170 +740,89 @@
 static char*
 ipifcconnect(Conv* c, char **argv, int argc)
 {
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 	char *err;
-	Ipifc *ifc;
 
-	ifc = (Ipifc*)c->ptcl;
-
-	if(ifc->m == nil)
-		 return "ipifc not yet bound to device";
-
-	if(waserror()){
-		wunlock(ifc);
-		nexterror();
-	}
 	wlock(ifc);
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 	wunlock(ifc);
-	poperror();
 
 	err = ipifcadd(ifc, argv, argc, 0, nil);
-	if(err)
+	if(err != nil)
 		return err;
 
 	Fsconnected(c, nil);
-
 	return nil;
 }
 
 char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
 {
-	int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+	int i, argsleft;
+	uchar sendra, recvra;
+	Routerparams rp;
 
-	argsleft = argc - 1;
 	i = 1;
-
-	if(argsleft % 2 != 0)
+	argsleft = argc - 1;
+	if((argsleft % 2) != 0)
 		return Ebadarg;
 
+	sendra = ifc->sendra6;
+	recvra = ifc->recvra6;
+	rp = ifc->rp;
+
 	while (argsleft > 1) {
-		if(strcmp(argv[i],"recvra")==0)
-			ifc->recvra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"sendra")==0)
-			ifc->sendra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"mflag")==0)
-			ifc->rp.mflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"oflag")==0)
-			ifc->rp.oflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"maxraint")==0)
-			ifc->rp.maxraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"minraint")==0)
-			ifc->rp.minraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"linkmtu")==0)
-			ifc->rp.linkmtu = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"reachtime")==0)
-			ifc->rp.reachtime = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"rxmitra")==0)
-			ifc->rp.rxmitra = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"ttl")==0)
-			ifc->rp.ttl = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"routerlt")==0)
-			ifc->rp.routerlt = atoi(argv[i+1]);
+		if(strcmp(argv[i], "recvra") == 0)
+			recvra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "sendra") == 0)
+			sendra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "mflag") == 0)
+			rp.mflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "oflag") == 0)
+			rp.oflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "maxraint") == 0)
+			rp.maxraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "minraint") == 0)
+			rp.minraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "linkmtu") == 0)
+			rp.linkmtu = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "reachtime") == 0)
+			rp.reachtime = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "rxmitra") == 0)
+			rp.rxmitra = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "ttl") == 0)
+			rp.ttl = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "routerlt") == 0)
+			rp.routerlt = atoi(argv[i+1]);
 		else
-			return Ebadarg;	
+			return Ebadarg;
 
 		argsleft -= 2;
 		i += 2;
 	}
 
-	// consistency check
-	if(ifc->rp.maxraint < ifc->rp.minraint) {
-		ifc->rp.maxraint = vmax;
-		ifc->rp.minraint = vmin;
+	/* consistency check */
+	if(rp.maxraint < rp.minraint)
 		return Ebadarg;
-	}
 
-	return nil;
-}
+	ifc->rp = rp;
+	ifc->sendra6 = sendra;
+	ifc->recvra6 = recvra;
 
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->sendra6 = (i!=0);
 	return nil;
 }
 
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->recvra6 = (i!=0);	
-	return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
-	uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
-	Iplifc *lifc;
-
-	if(argc == 2){
-		if((strcmp(argv[1], "show") == 0)){
-			shownataddr();
-			return nil;
-		}else if((strcmp(argv[1], "flush") == 0)){
-			flushnataddr();
-			return nil;
-		}else
-			return Ebadarg;
-	}
-
-	if(argc != 5)
-		return Ebadarg;
-
-	if (parseip(src, argv[2]) == -1)
-		return Ebadip;
-
-	if (parseipmask(mask, argv[3]) == -1)
-		return Ebadip;
-
-	if (parseip(dst, argv[4]) == -1)
-		return Ebadip;
-
-	if((lifc=iplocalonifc(ifc, dst)) == nil)
-		return Ebadip;
-
-	if(strcmp(argv[1], "add") == 0){
-		if(addnataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else if(strcmp(argv[1], "remove") == 0){
-		if(removenataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else
-		return Ebadarg;
-
-	return nil;
-}
-
 /*
  *  non-standard control messages.
- *  called with c locked.
  */
 static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
 {
-	Ipifc *ifc;
-	int i;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 
-	ifc = (Ipifc*)c->ptcl;
 	if(strcmp(argv[0], "add") == 0)
 		return ipifcadd(ifc, argv, argc, 0, nil);
-	else if(strcmp(argv[0], "bootp") == 0)
-		return bootp(ifc);
 	else if(strcmp(argv[0], "try") == 0)
 		return ipifcadd(ifc, argv, argc, 1, nil);
 	else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
 		return ipifcrem(ifc, argv, argc);
 	else if(strcmp(argv[0], "unbind") == 0)
 		return ipifcunbind(ifc);
-	else if(strcmp(argv[0], "joinmulti") == 0)
-		return ipifcjoinmulti(ifc, argv, argc);
-	else if(strcmp(argv[0], "leavemulti") == 0)
-		return ipifcleavemulti(ifc, argv, argc);
 	else if(strcmp(argv[0], "mtu") == 0)
-		return ipifcsetmtu(ifc, argv, argc);
-	else if(strcmp(argv[0], "reassemble") == 0){
-		ifc->reassemble = 1;
+		return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+	else if(strcmp(argv[0], "speed") == 0){
+		ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
 		return nil;
 	}
+	else if(strcmp(argv[0], "delay") == 0){
+		ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+		return nil;
+	}
 	else if(strcmp(argv[0], "iprouting") == 0){
-		i = 1;
-		if(argc > 1)
-			i = atoi(argv[1]);
-		iprouting(c->p->f, i);
+		iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
 		return nil;
 	}
-	else if(strcmp(argv[0], "addpref6") == 0)
-		return ipifcaddpref6(ifc, argv, argc);
-	else if(strcmp(argv[0], "setpar6") == 0)
-		return ipifcsetpar6(ifc, argv, argc);
-	else if(strcmp(argv[0], "sendra6") == 0)
-		return ipifcsendra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "recvra6") == 0)
-		return ipifcrecvra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "nat") == 0)
-		return ipifcnat(ifc, argv, argc);
+	else if(strcmp(argv[0], "reflect") == 0){
+		ifc->reflect = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "reassemble") == 0){
+		ifc->reassemble = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "add6") == 0)
+		return ipifcadd6(ifc, argv, argc);
+	else if(strcmp(argv[0], "remove6") == 0)
+		return ipifcremove6(ifc, argv, argc);
+	else if(strcmp(argv[0], "ra6") == 0)
+		return ipifcra6(ifc, argv, argc);
 	return "unsupported ctl";
 }
 
+int
 ipifcstats(Proto *ipifc, char *buf, int len)
 {
 	return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
 	ipifc->nc = Maxmedia;
 	ipifc->ptclsize = sizeof(Ipifc);
 
-	f->ipifc = ipifc;			/* hack for ipifcremroute, findipifc, ... */
+	f->ipifc = ipifc;	/* hack for ipifcremroute, findipifc, ... */
 	f->self = smalloc(sizeof(Ipselftab));	/* hack for ipforme */
 
 	Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
 
 /*
  *  add to self routing cache
- *	called with c locked
  */
 static void
 addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
 {
-	Ipself *p;
 	Iplink *lp;
+	Ipself *p;
 	int h;
 
+	type |= (lifc->type & Rv4);
 	qlock(f->self);
+	if(waserror()){
+		qunlock(f->self);
+		nexterror();
+	}
 
 	/* see if the address already exists */
 	h = hashipa(a);
-	for(p = f->self->hash[h]; p; p = p->next)
-		if(memcmp(a, p->a, IPaddrlen) == 0)
+	for(p = f->self->hash[h]; p != nil; p = p->next)
+		if(ipcmp(a, p->a) == 0)
 			break;
 
 	/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
 	}
 
 	/* look for a link for this lifc */
-	for(lp = p->link; lp; lp = lp->selflink)
+	for(lp = p->link; lp != nil; lp = lp->selflink)
 		if(lp->lifc == lifc)
 			break;
 
@@ -962,18 +948,19 @@
 		lifc->link = lp;
 
 		/* add to routing table */
-		if(isv4(a))
-			v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
-		else
-			v6addroute(f, tifc, a, IPallbits, a, type);
+		addroute(f, a, IPallbits,
+			lifc->local, 
+			((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+				IPallbits : IPnoaddr,
+			a, type, ifc, tifc);
 
 		if((type & Rmulti) && ifc->m->addmulti != nil)
 			(*ifc->m->addmulti)(ifc, a, lifc->local);
-	} else {
+	} else
 		lp->ref++;
-	}
 
 	qunlock(f->self);
+	poperror();
 }
 
 /*
@@ -992,8 +979,8 @@
 	ulong now = NOW;
 
 	l = &freeiplink;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1000,10 +987,11 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
+
 static void
 ipselffree(Ipself *p)
 {
@@ -1011,8 +999,8 @@
 	ulong now = NOW;
 
 	l = &freeipself;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1019,7 +1007,7 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
@@ -1027,7 +1015,6 @@
 /*
  *  Decrement reference for this address on this link.
  *  Unlink from selftab if this is the last ref.
- *	called with c locked
  */
 static void
 remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
 
 	/* find the unique selftab entry */
 	l = &f->self->hash[hashipa(a)];
-	for(p = *l; p; p = *l){
+	for(p = *l; p != nil; p = *l){
 		if(ipcmp(p->a, a) == 0)
 			break;
 		l = &p->next;
@@ -1053,7 +1040,7 @@
 	 *  that matches the selftab entry
 	 */
 	l_lifc = &lifc->link;
-	for(link = *l_lifc; link; link = *l_lifc){
+	for(link = *l_lifc; link != nil; link = *l_lifc){
 		if(link->self == p)
 			break;
 		l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
 	 *  the one we just found
 	 */
 	l_self = &p->link;
-	for(link = *l_self; link; link = *l_self){
-		if(link == *(l_lifc))
+	for(link = *l_self; link != nil; link = *l_self){
+		if(link == *l_lifc)
 			break;
 		l_self = &link->selflink;
 	}
@@ -1079,9 +1066,20 @@
 	if(--(link->ref) != 0)
 		goto out;
 
-	if((p->type & Rmulti) && ifc->m->remmulti != nil)
-		(*ifc->m->remmulti)(ifc, a, lifc->local);
+	/* remove from routing table */
+	remroute(f, a, IPallbits,
+		lifc->local, 
+		((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+			IPallbits : IPnoaddr,
+		a, p->type, ifc, tifc);
 
+	if((p->type & Rmulti) && ifc->m->remmulti != nil){
+		if(!waserror()){
+			(*ifc->m->remmulti)(ifc, a, lifc->local);
+			poperror();
+		}
+	}
+
 	/* ref == 0, remove from both chains and free the link */
 	*l_lifc = link->lifclink;
 	*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
 	if(p->link != nil)
 		goto out;
 
-	/* remove from routing table */
-	if(isv4(a))
-		v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
-	else
-		v6delroute(f, a, IPallbits, 1);
-	
+	/* if null address, forget */
+	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+		f->self->acceptall = 0;
+
 	/* no more links, remove from hash and free */
 	*l = p->next;
 	ipselffree(p);
 
-	/* if IPnoaddr, forget */
-	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
-		f->self->acceptall = 0;
-
 out:
 	qunlock(f->self);
 }
 
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
-	Nstformat= 41,
-};
-
 long
 ipselftabread(Fs *f, char *cp, ulong offset, int n)
 {
@@ -1124,14 +1110,14 @@
 
 	m = 0;
 	off = offset;
-	qlock(f->self);
 	for(i = 0; i < NHASH && m < n; i++){
 		for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
 			nifc = 0;
-			for(link = p->link; link; link = link->selflink)
+			for(link = p->link; link != nil; link = link->selflink)
 				nifc++;
 			routetype(p->type, state);
-			m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+			m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+				p->a, nifc, state);
 			if(off > 0){
 				off -= m;
 				m = 0;
@@ -1138,30 +1124,15 @@
 			}
 		}
 	}
-	qunlock(f->self);
 	return m;
 }
 
-int
-iptentative(Fs *f, uchar *addr)
-{
- 	Ipself *p;
-
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
-		if(ipcmp(addr, p->a) == 0) {
-			return p->link->lifc->tentative;
-		}
-	}
-	return 0;
-}
-
 /*
  *  returns
  *	0		- no match
  *	Runi
  *	Rbcast
- *	Rmcast
+ *	Rmulti
  */
 int
 ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
 {
 	Ipself *p;
 
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
+	for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
 		if(ipcmp(addr, p->a) == 0)
-			return p->type;
-	}
+			return p->type & (Runi|Rbcast|Rmulti);
 
 	/* hack to say accept anything */
 	if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
  *  return nil.
  */
 Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
 {
+	uchar gnet[IPaddrlen];
+	int spec, xspec;
 	Ipifc *ifc, *x;
 	Iplifc *lifc;
-	Conv **cp, **e;
-	uchar gnet[IPaddrlen];
-	uchar xmask[IPaddrlen];
+	Conv **cp;
 
-	x = nil; memset(xmask, 0, IPaddrlen);
-
-	/* find most specific match */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
-
+	x = nil;
+	xspec = 0;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!canrlock(ifc))
+			continue;
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if(type & Runi){
+				if(ipcmp(remote, lifc->local) == 0){
+				Found:
+					runlock(ifc);
+					return ifc;
+				}
+			} else if(type & (Rbcast|Rmulti)) {
+				if(ipcmp(local, lifc->local) == 0)
+					goto Found;
+			}
 			maskip(remote, lifc->mask, gnet);
 			if(ipcmp(gnet, lifc->net) == 0){
-				if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+				spec = comprefixlen(remote, lifc->local, IPaddrlen);
+				if(spec > xspec){
 					x = ifc;
-					ipmove(xmask, lifc->mask);
+					xspec = spec;
 				}
 			}
 		}
+		runlock(ifc);
 	}
-	if(x != nil)
-		return x;
+	return x;
+}
 
-	/* for now for broadcast and multicast, just use first interface */
-	if(type & (Rbcast|Rmulti)){
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == 0)
-				continue;
-			ifc = (Ipifc*)(*cp)->ptcl;
-			if(ifc->lifc != nil)
-				return ifc;
-		}
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+	uchar ip[IPaddrlen];
+	Conv *c;
+	char *p;
+	long x;
+
+	x = strtol(s, &p, 10);
+	if(p > s && *p == '\0'){
+		if(x < 0)
+			return nil;
+		if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+			return (Ipifc*)c->ptcl;
 	}
-		
+	if(parseip(ip, s) != -1)
+		return findipifc(f, ip, ip, Runi);
 	return nil;
 }
 
-enum {
-	unknownv6,
-	multicastv6,
-	unspecifiedv6,
-	linklocalv6,
-	sitelocalv6,
-	globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
-	if(isv6global(addr))
-		return globalv6;
-	if(islinklocal(addr))
-		return linklocalv6;
-	if(isv6mcast(addr))
-		return multicastv6;
-	if(issitelocal(addr))
-		return sitelocalv6;
-	return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ *  find "best" (global > link local > unspecified)
+ *  local address; address must be current.
+ */
 static void
 findprimaryipv6(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
-	Iplifc *lifc;
+	ulong now = NOW/1000;
 	int atype, atypel;
+	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	ipmove(local, v6Unspecified);
 	atype = unspecifiedv6;
 
-	/* find "best" (global > sitelocal > link local > unspecified)
-	 * local address; address must be current */
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 			atypel = v6addrtype(lifc->local);
 			if(atypel > atype)
-			if(v6addrcurr(lifc)) {
+			if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
 				ipmove(local, lifc->local);
 				atype = atypel;
-				if(atype == globalv6)
+				if(atype == globalv6){
+					runlock(ifc);
 					return;
+				}
 			}
 		}
+		runlock(ifc);
 	}
 }
 
 /*
- *  returns first ip address configured
+ *  returns first v4 address configured
  */
 static void
 findprimaryipv4(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
 	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	/* find first ifc local address */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		if((lifc = ifc->lifc) != nil){
-			ipmove(local, lifc->local);
-			return;
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if((lifc->type & Rv4) != 0){
+				ipmove(local, lifc->local);
+				runlock(ifc);
+				return;
+			}
 		}
+		runlock(ifc);
 	}
+	ipmove(local, IPnoaddr);
 }
 
 /*
- *  find the local address 'closest' to the remote system, copy it to
- *  local and return the ifc for that address
+ * ipv4local, ipv6local:
+ *  return a local address associated with an interface close to remote.
+ *  prefixlen is the number of leading bits in the local address that
+ *  have to match an interface address to be considered. this is used
+ *  by source specific routes to filter on the source address.
+ *  return non-zero on success or zero when no address was found.
+ *
+ *  for ipv4local, all addresses are 4 byte format.
  */
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
-	Ipifc *ifc;
 	Iplifc *lifc;
-	Route *r;
-	uchar gate[IPaddrlen];
-	uchar gnet[IPaddrlen];
-	int version;
-	int atype = unspecifiedv6, atypel = unknownv6;
+	int a, b;
 
-	USED(atype);
-	USED(atypel);
-	qlock(f->ipifc);
-	r = v6lookup(f, remote, nil);
- 	version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-	
-	if(r != nil){
-		ifc = r->ifc;
-		if(r->type & Rv4)
-			v4tov6(gate, r->v4.gate);
-		else {
-			ipmove(gate, r->v6.gate);
-			ipmove(local, v6Unspecified);
-		}
+	b = -1;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+			continue;
 
-		/* find ifc address closest to the gateway to use */
-		switch(version) {
-		case V4:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0){
-					ipmove(local, lifc->local);
-					goto out;
-				}
-			}
-			break;
-		case V6:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				atypel = v6addrtype(lifc->local);
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0)
-				if(atypel > atype)
-				if(v6addrcurr(lifc)) {
-					ipmove(local, lifc->local);
-					atype = atypel;
-					if(atype == globalv6)
-						break;
-				}
-			}
-			if(atype > unspecifiedv6)
-				goto out;
-			break;
-		default:
-			panic("findlocalip: version %d", version);
+		if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+			continue;
+		
+		a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+		if(a > b){
+			b = a;
+			memmove(local, lifc->local+IPv4off, IPv4addrlen);
 		}
 	}
-
-	switch(version){
-	case V4:
-		findprimaryipv4(f, local);
-		break;
-	case V6:
-		findprimaryipv6(f, local);
-		break;
-	default:
-		panic("findlocalip2: version %d", version);
-	}
-
-out:
-	qunlock(f->ipifc);
+	return b >= 0;
 }
 
-/*
- *  return first v4 address associated with an interface
- */
 int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
+	struct {
+		int	atype;
+		int	deprecated;
+		int	comprefixlen;
+	} a, b;
+	int atype;
+	ulong now;
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(isv4(lifc->local)){
-			memmove(addr, lifc->local+IPv4off, IPv4addrlen);
-			return 1;
-		}
+	if(isv4(remote)){
+		memmove(local, v4prefix, IPv4off);
+		if((prefixlen -= IPv4off*8) < 0)
+			prefixlen = 0;
+		return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
 	}
-	return 0;
-}
 
-/*
- *  return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
-	Iplifc *lifc;
+	atype = v6addrtype(remote);
+	b.atype = unknownv6;
+	b.deprecated = 1;
+	b.comprefixlen = 0;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local) && !(lifc->tentative)){
-			ipmove(addr, lifc->local);
-			return 1;
+	now = NOW/1000;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if(lifc->tentative)
+			continue;
+
+		if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+			continue;
+
+		a.atype = v6addrtype(lifc->local);
+		a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+		a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+		/* prefer appropriate scope */
+		if(a.atype != b.atype){
+			if(a.atype > b.atype && b.atype < atype ||
+			   a.atype < b.atype && b.atype > atype)
+				goto Good;
+			continue;
 		}
+		/* prefer non-deprecated addresses */
+		if(a.deprecated != b.deprecated){
+			if(b.deprecated)
+				goto Good;
+			continue;
+		}
+		/* prefer longer common prefix */
+		if(a.comprefixlen != b.comprefixlen){
+			if(a.comprefixlen > b.comprefixlen)
+				goto Good;
+			continue;
+		}
+		continue;
+	Good:
+		b = a;
+		ipmove(local, lifc->local);
 	}
-	return 0;
+
+	return b.atype >= atype;
 }
 
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ *  find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
 {
-	Iplifc *lifc;
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local)){
-			ipmove(addr, lifc->local);
-			return SRC_UNI;
-		}
+	if(isv4(remote)) {
+		memmove(local, v4prefix, IPv4off);
+		if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+			findprimaryipv4(f, local);
+	} else {
+		if(v6source(f, remote, local) == nil)
+			findprimaryipv6(f, local);
 	}
-	return SRC_UNSPEC;
 }
 
 /*
@@ -1444,13 +1396,28 @@
 {
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
 		if(ipcmp(ip, lifc->local) == 0)
 			return lifc;
+
 	return nil;
 }
 
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+	uchar net[IPaddrlen];
+	Iplifc *lifc;
 
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		maskip(ip, lifc->mask, net);
+		if(ipcmp(net, lifc->remote) == 0)
+			return lifc;
+	}
+	return nil;
+}
+
+
 /*
  *  See if we're proxying for this address on this interface
  */
@@ -1458,24 +1425,13 @@
 ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
 {
 	Route *r;
-	uchar net[IPaddrlen];
-	Iplifc *lifc;
 
 	/* see if this is a direct connected pt to pt address */
-	r = v6lookup(f, ip, nil);
-	if(r == nil)
+	r = v6lookup(f, ip, ip, nil);
+	if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
 		return 0;
-	if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
-		return 0;
 
-	/* see if this is on the right interface */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		maskip(ip, lifc->mask, net);
-		if(ipcmp(net, lifc->remote) == 0)
-			return 1;
-	}
-
-	return 0;
+	return ipremoteonifc(ifc, ip) != nil;
 }
 
 /*
@@ -1487,73 +1443,53 @@
 	if(isv4(ip)){
 		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
 			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
 	}
+	else if(ip[0] == 0xff)
+		return V6;
 	return 0;
 }
 
-int
-ipisbm(uchar *ip)
-{
-	if(isv4(ip)){
-		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
-			return V4;
-		if(ipcmp(ip, IPv4bcast) == 0)
-			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
-	}
-	return 0;
-}
-
-
 /*
- *  add a multicast address to an interface, called with c locked
+ *  add a multicast address to an interface.
  */
 void
 ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
 {
-	Ipifc *ifc;
-	Iplifc *lifc;
-	Conv **p;
 	Ipmulti *multi, **l;
+	Iplifc *lifc;
+	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	if(isv4(ma) != isv4(ia))
+		error("incompatible multicast/interface ip address");
+
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			return;		/* it's already there */
 
-	multi = *l = smalloc(sizeof(*multi));
-	ipmove(multi->ma, ma);
-	ipmove(multi->ia, ia);
-	multi->next = nil;
-
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-		ifc = (Ipifc*)(*p)->ptcl;
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
 		if(waserror()){
-			wunlock(ifc);
+			runlock(ifc);
 			nexterror();
 		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
-				addselfcache(f, ifc, lifc, ma, Rmulti);
-		wunlock(ifc);
+		if((lifc = iplocalonifc(ifc, ia)) != nil)
+			addselfcache(f, ifc, lifc, ma, Rmulti);
+		runlock(ifc);
 		poperror();
 	}
+
+	multi = smalloc(sizeof(*multi));
+	ipmove(multi->ma, ma);
+	ipmove(multi->ia, ia);
+	multi->next = nil;
+	*l = multi;
 }
 
 
 /*
- *  remove a multicast address from an interface, called with c locked
+ *  remove a multicast address from an interface.
  */
 void
 ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
 {
 	Ipmulti *multi, **l;
 	Iplifc *lifc;
-	Conv **p;
 	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			break;
 
 	multi = *l;
@@ -1576,161 +1508,101 @@
 		return; 	/* we don't have it open */
 
 	*l = multi->next;
+	multi->next = nil;
 
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-
-		ifc = (Ipifc*)(*p)->ptcl;
-		if(waserror()){
-			wunlock(ifc);
-			nexterror();
-		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
+		if(!waserror()){
+			if((lifc = iplocalonifc(ifc, ia)) != nil)
 				remselfcache(f, ifc, lifc, ma);
-		wunlock(ifc);
-		poperror();
+			poperror();
+		}
+		runlock(ifc);
 	}
-
 	free(multi);
 }
 
-/*
- *  make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
 {
-	USED(ifc, argv, argc);
-	return nil;
+	if(waserror()){
+		print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+		return;
+	}
+	if(ifc->m != nil && ifc->m->areg != nil)
+		(*ifc->m->areg)(f, ifc, lifc, ip);
+	poperror();
 }
 
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
-	USED(ifc, argv, argc);
-	return nil;
-}
-
 static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
 {
-	Conv **cp, **e;
-	Ipifc *nifc;
+	uchar a[IPaddrlen];
 	Iplifc *lifc;
-	Medium *m;
-	uchar net[IPaddrlen];
+	Ipifc *nifc;
+	Conv **cp;
 
-	/* register the address on any network that will proxy for us */
-	e = &f->ipifc->conv[f->ipifc->nc];
+	/* register the address on any interface that will proxy for the ip */
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
+		nifc = (Ipifc*)(*cp)->ptcl;
+		if(nifc == ifc || !canrlock(nifc))
+			continue;
 
-	if(!isv4(ip)) { // V6
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->addmulti == nil) {
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
-					ipv62smcast(net, ip);
-					addselfcache(f, nifc, lifc, net, Rmulti);
-					arpenter(f, V6, ip, nifc->mac, 6, 0);
-					//(*m->addmulti)(nifc, net, ip);
-					break;
-				}
-			}
+		if(nifc->m == nil
+		|| (lifc = ipremoteonifc(nifc, ip)) == nil
+		|| (lifc->type & Rptpt) != 0
+		|| waserror()){
 			runlock(nifc);
+			continue;
 		}
-		return;
-	}
-	else { // V4
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->areg == nil){
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0){
-					(*m->areg)(nifc, ip);
-					break;
-				}
-			}
-			runlock(nifc);
+		if((lifc->type & Rv4) == 0){
+			/* add solicited-node multicast addr */
+			ipv62smcast(a, ip);
+			if(add)
+				addselfcache(f, nifc, lifc, a, Rmulti);
+			else
+				remselfcache(f, nifc, lifc, a);
 		}
+		if(add)
+			ipifcregisteraddr(f, nifc, lifc, ip);
+		runlock(nifc);
+		poperror();
 	}
 }
 
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
-	Route *r;
-
-	r = v6lookup(f, v6Unspecified, nil);
-	if(r!=nil)
-	if(!(force) && (strcmp(r->tag,"ra")!=0))	// route entries generated
-		return;			// by all other means take
-					// precedence over router annc
-
-	v6delroute(f, v6Unspecified, v6Unspecified, 1);
-	v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
-	Ngates = 3,
-};
-
 char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
 {
-	uchar	onlink = 1;
-	uchar	autoflag = 1;
-	long 	validlt = 0xffffffff;
-	long 	preflt = 0xffffffff;
-	long	origint = NOW / 10^3;
-	uchar	prefix[IPaddrlen];
-	int	plen = 64;
-	Iplifc	*lifc;
-	char	addr[40], preflen[6];
-	char	*params[3];
+	int plen = 64;
+	char addr[40], preflen[6];
+	char *params[3];
+	uchar prefix[IPaddrlen];
+	Iplifc lifc;
+	Medium *m;
 
+	lifc.onlink = 1;
+	lifc.autoflag = 1;
+	lifc.validlt = lifc.preflt = ~0UL;
+	lifc.origint = NOW / 1000;
+
 	switch(argc) {
 	case 7:
-		preflt = atoi(argv[6]);
+		lifc.preflt = strtoul(argv[6], 0, 10);
 		/* fall through */
 	case 6:
-		validlt = atoi(argv[5]);
+		lifc.validlt = strtoul(argv[5], 0, 10);
 		/* fall through */
 	case 5:
-		autoflag =  atoi(argv[4]);
+		lifc.autoflag = atoi(argv[4]) != 0;
 		/* fall through */
 	case 4:
-		onlink = atoi(argv[3]);
+		lifc.onlink = atoi(argv[3]) != 0;
 		/* fall through */
 	case 3:
 		plen = atoi(argv[2]);
+		/* fall through */
 	case 2:
 		break;
 	default:
@@ -1737,25 +1609,16 @@
 		return Ebadarg;
 	}
 
-	if((parseip(prefix, argv[1])!=6) ||
-	 	(validlt < preflt) ||
-		(plen < 0) || (plen > 64) ||
-		(islinklocal(prefix))
-	)
+	if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+	    plen > 64 || islinklocal(prefix))
 		return Ebadarg;
 
-	lifc = smalloc(sizeof(Iplifc));
-	lifc->onlink = (onlink!=0);
-	lifc->autoflag = (autoflag!=0);
-	lifc->validlt = validlt;
-	lifc->preflt = preflt;
-	lifc->origint = origint;
+	/* issue "add" ctl msg for v6 link-local addr and prefix len */
+	m = ifc->m;
+	if(m == nil || m->pref2addr == nil)
+		return Eunbound;
+	(*m->pref2addr)(prefix, ifc->mac);	/* mac → v6 link-local addr */
 
-	if(ifc->m->pref2addr!=nil)
-		ifc->m->pref2addr(prefix, ifc->mac);
-	else
-		return Ebadarg;
-	
 	sprint(addr, "%I", prefix);
 	sprint(preflen, "/%d", plen);
 	params[0] = "add";
@@ -1762,6 +1625,28 @@
 	params[1] = addr;
 	params[2] = preflen;
 
-	return ipifcadd(ifc, params, 3, 0, lifc);
+	return ipifcadd(ifc, params, 3, 0, &lifc);
 }
 
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+	Iplifc *lifc, **l;
+	ulong now;
+
+	if(argc != 1)
+		return Ebadarg;
+
+	wlock(ifc);
+	now = NOW/1000;
+	for(l = &ifc->lifc; (lifc = *l) != nil;) {
+		if((lifc->type & Rv4) == 0)
+		if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+			if(ipifcremlifc(ifc, l) == nil)
+				continue;
+		l = &lifc->next;
+	}
+	wunlock(ifc);
+
+	return nil;
+}
--- a/os/ip/ipmux.c
+++ b/os/ip/ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -6,30 +9,14 @@
 #include "../port/error.h"
 
 #include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
 
 typedef struct Ipmuxrock  Ipmuxrock;
 typedef struct Ipmux      Ipmux;
-typedef struct Ip6hdr     Ip6hdr;
 
 enum
 {
-	IPHDR		= 20,		/* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
-	uchar vcf[4];		/* version, class label, and flow label */ 
-	uchar ploadlen[2];	/* payload length */
-	uchar proto;		/* next header, i.e. proto */
-	uchar ttl;		/* hop limit, i.e. ttl */
-	uchar src[16];		/* IP source */
-	uchar dst[16];		/* IP destination */
-};
-
-
-enum
-{
+	Tver,
 	Tproto,
 	Tdata,
 	Tiph,
@@ -36,28 +23,8 @@
 	Tdst,
 	Tsrc,
 	Tifc,
-
-	Cother = 0,
-	Cbyte,		/* single byte */
-	Cmbyte,		/* single byte with mask */
-	Cshort,		/* single short */
-	Cmshort,	/* single short with mask */
-	Clong,		/* single long */
-	Cmlong,		/* single long with mask */
-	Cifc,
-	Cmifc,
 };
 
-char *ftname[] = 
-{
-[Tproto]	"proto",
-[Tdata]		"data",
-[Tiph]	 	"iph",
-[Tdst]		"dst",
-[Tsrc]		"src",
-[Tifc]		"ifc",
-};
-
 /*
  *  a node in the decision tree
  */
@@ -66,16 +33,12 @@
 	Ipmux	*yes;
 	Ipmux	*no;
 	uchar	type;		/* type of field(Txxxx) */
-	uchar	ctype;		/* tupe of comparison(Cxxxx) */
 	uchar	len;		/* length in bytes of item to compare */
 	uchar	n;		/* number of items val points to */
-	short	off;		/* offset of comparison */
-	short	eoff;		/* end offset of comparison */
-	uchar	skiphdr;	/* should offset start after ipheader */
+	int	off;		/* offset of comparison */
 	uchar	*val;
 	uchar	*mask;
 	uchar	*e;		/* val+n*len*/
-
 	int	ref;		/* so we can garbage collect */
 	Conv	*conv;
 };
@@ -90,6 +53,7 @@
 
 static int	ipmuxsprint(Ipmux*, int, char*, int);
 static void	ipmuxkick(void *x);
+static void	ipmuxfree(Ipmux *f);
 
 static char*
 skipwhite(char *p)
@@ -122,27 +86,33 @@
 	Ipmux *f;
 
 	p = skipwhite(p);
-	if(strncmp(p, "dst", 3) == 0){
+	if(strncmp(p, "ver", 3) == 0){
+		type = Tver;
+		off = 0;
+		len = 1;
+		p += 3;
+	}
+	else if(strncmp(p, "dst", 3) == 0){
 		type = Tdst;
-		off = offsetof(Ip4hdr, dst[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, dst[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "src", 3) == 0){
 		type = Tsrc;
-		off = offsetof(Ip4hdr, src[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, src[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "ifc", 3) == 0){
 		type = Tifc;
-		off = -IPv4addrlen;
-		len = IPv4addrlen;
+		off = -IPaddrlen;
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "proto", 5) == 0){
 		type = Tproto;
-		off = offsetof(Ip4hdr, proto);
+		off = offsetof(Ip6hdr, proto);
 		len = 1;
 		p += 5;
 	}
@@ -160,7 +130,7 @@
 			return nil;
 		p++;
 		off = strtoul(p, &p, 0);
-		if(off < 0 || off > (64-IPHDR))
+		if(off < 0)
 			return nil;
 		p = skipwhite(p);
 		if(*p != ':')
@@ -189,11 +159,6 @@
 	f->mask = nil;
 	f->n = 1;
 	f->ref = 1;
-	if(type == Tdata)
-		f->skiphdr = 1;
-	else
-		f->skiphdr = 0;
-
 	return f;	
 }
 
@@ -229,7 +194,7 @@
 static Ipmux*
 parsemux(char *p)
 {
-	int n, nomask;
+	int n;
 	Ipmux *f;
 	char *val;
 	char *mask;
@@ -247,7 +212,7 @@
 		goto parseerror;
 
 	/* parse mask */
-	mask = follows(val, '&');
+	mask = follows(p, '&');
 	if(mask != nil){
 		switch(f->type){
 		case Tsrc:
@@ -254,7 +219,7 @@
 		case Tdst:
 		case Tifc:
 			f->mask = smalloc(f->len);
-			v4parseip(f->mask, mask);
+			parseipmask(f->mask, mask, 0);
 			break;
 		case Tdata:
 		case Tiph:
@@ -264,15 +229,13 @@
 		default:
 			goto parseerror;
 		}
-		nomask = 0;
-	} else {
-		nomask = 1;
+	} else if(f->type == Tver){
 		f->mask = smalloc(f->len);
-		memset(f->mask, 0xff, f->len);
+		f->mask[0] = 0xF0;
 	}
 
 	/* parse vals */
-	f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+	f->n = getfields(val, vals, nelem(vals), 1, "|");
 	if(f->n == 0)
 		goto parseerror;
 	f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
 	v = f->val;
 	for(n = 0; n < f->n; n++){
 		switch(f->type){
+		case Tver:
+			if(f->n != 1)
+				goto parseerror;
+			if(strcmp(vals[n], "6") == 0)
+				*v = IP_VER6;
+			else if(strcmp(vals[n], "4") == 0)
+				*v = IP_VER4;
+			else
+				goto parseerror;
+			break;
 		case Tsrc:
 		case Tdst:
 		case Tifc:
-			v4parseip(v, vals[n]);
+			if(parseip(v, vals[n]) == -1)
+				goto parseerror;
 			break;
 		case Tproto:
 		case Tdata:
@@ -292,34 +266,11 @@
 		}
 		v += f->len;
 	}
-
-	f->eoff = f->off + f->len;
 	f->e = f->val + f->n*f->len;
-	f->ctype = Cother;
-	if(f->n == 1){
-		switch(f->len){
-		case 1:
-			f->ctype = nomask ? Cbyte : Cmbyte;
-			break;
-		case 2:
-			f->ctype = nomask ? Cshort : Cmshort;
-			break;
-		case 4:
-			if(f->type == Tifc)
-				f->ctype = nomask ? Cifc : Cmifc;
-			else
-				f->ctype = nomask ? Clong : Cmlong;
-			break;
-		}
-	}
 	return f;
 
 parseerror:
-	if(f->mask)
-		free(f->mask);
-	if(f->val)
-		free(f->val);
-	free(f);
+	ipmuxfree(f);
 	return nil;
 }
 
@@ -342,8 +293,7 @@
 		return n;
 
 	/* compare offsets, call earlier ones more specific */
-	n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) - 
-		(b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+	n = a->off - b->off;
 	if(n != 0)
 		return n;
 
@@ -413,6 +363,10 @@
 	*nf = *f;
 	nf->no = ipmuxcopy(f->no);
 	nf->yes = ipmuxcopy(f->yes);
+	if(f->mask != nil){
+		nf->mask = smalloc(f->len);
+		memmove(nf->mask, f->mask, f->len);
+	}
 	nf->val = smalloc(f->n*f->len);
 	nf->e = nf->val + f->len*f->n;
 	memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
 static void
 ipmuxfree(Ipmux *f)
 {
-	if(f->val != nil)
-		free(f->val);
+	if(f == nil)
+		return;
+	free(f->val);
+	free(f->mask);
 	free(f);
 }
 
@@ -432,10 +388,8 @@
 {
 	if(f == nil)
 		return;
-	if(f->no != nil)
-		ipmuxfree(f->no);
-	if(f->yes != nil)
-		ipmuxfree(f->yes);
+	ipmuxfree(f->no);
+	ipmuxfree(f->yes);
 	ipmuxfree(f);
 }
 
@@ -510,6 +464,8 @@
 		return ipmuxremove(&ft->no, f);
 	}
 
+	ipmuxremove(&ft->no, f->no);
+
 	/* we found a match */
 	if(--(ft->ref) == 0){
 		/*
@@ -531,8 +487,55 @@
 }
 
 /*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+	int i, n;
+
+	if(f == nil)
+		return nil;
+
+	switch(f->type){
+	case Tproto:
+		f->off = offsetof(Ip4hdr, proto);
+		break;
+	case Tdst:
+		f->off = offsetof(Ip4hdr, dst[0]);
+		if(0){
+	case Tsrc:
+		f->off = offsetof(Ip4hdr, src[0]);
+		}
+		if(f->len != IPaddrlen)
+			break;
+		n = 0;
+		for(i = 0; i < f->n; i++){
+			if(isv4(f->val + i*IPaddrlen)){
+				memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+				n++;
+			}
+		}
+		if(n == 0){
+			ipmuxtreefree(f);
+			return nil;
+		}
+		f->n = n;
+		f->len = IPv4addrlen;
+		if(f->mask != nil)
+			memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+	}
+	f->e = f->val + f->n*f->len;
+
+	f->yes = ipmuxconv4(f->yes);
+	f->no = ipmuxconv4(f->no);
+
+	return f;
+}
+
+/*
  *  connection request is a semi separated list of filters
- *  e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *  e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
  *
  *  there's no protection against overlapping specs.
  */
@@ -568,6 +571,18 @@
 		return Ebadarg;
 	mux->conv = c;
 
+	if(chain->type != Tver) {
+		char ver6[] = "ver=6";
+		mux = parsemux(ver6);
+		mux->yes = chain;
+		mux->no = ipmuxcopy(chain);
+		chain = mux;
+	}
+	if(*chain->val == IP_VER4)
+		chain->yes = ipmuxconv4(chain->yes);
+	else
+		chain->no = ipmuxconv4(chain->no);
+
 	/* save a copy of the chain so we can later remove it */
 	mux = ipmuxcopy(chain);
 	r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
 	Block *bp;
 
 	bp = qget(c->wq);
-	if(bp == nil)
-		return;
-	else {
+	if(bp != nil) {
 		Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
-		if((ih4->vihl)&0xF0 != 0x60)
+
+		if((ih4->vihl & 0xF0) != IP_VER6)
 			ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
-		else {
-			Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
-			ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
-		}
+		else
+			ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
 	}
 }
 
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+	int i;
+
+	if(m == nil)
+		return memcmp(v, c, n) != 0;
+
+	for(i = 0; i < n; i++)
+		if((v[i] & m[i]) != c[i])
+			return 1;
+	return 0;
+}
+
 static void
 ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
 {
-	int len, hl;
 	Fs *f = p->f;
-	uchar *m, *h, *v, *e, *ve, *hp;
 	Conv *c;
+	Iplifc *lifc;
 	Ipmux *mux;
-	Ip4hdr *ip;
+	uchar *v;
+	Ip4hdr *ip4;
 	Ip6hdr *ip6;
+	int off, hl;
 
-	ip = (Ip4hdr*)bp->rp;
-	hl = (ip->vihl&0x0F)<<2;
+	ip4 = (Ip4hdr*)bp->rp;
+	if((ip4->vihl & 0xF0) == IP_VER4) {
+		hl = (ip4->vihl&0x0F)<<2;
+		ip6 = nil;
+	} else {
+		hl = IP6HDR;
+		ip6 = (Ip6hdr*)ip4;
+	}
 
 	if(p->priv == nil)
 		goto nomatch;
 
-	h = bp->rp;
-	len = BLEN(bp);
+	c = nil;
+	lifc = nil;
 
-	/* run the v4 filter */
+	/* run the filter */
 	rlock(f);
-	c = nil;
 	mux = f->ipmux->priv;
 	while(mux != nil){
-		if(mux->eoff > len){
-			mux = mux->no;
-			continue;
-		}
-		hp = h + mux->off + ((int)mux->skiphdr)*hl;
-		switch(mux->ctype){
-		case Cbyte:
-			if(*mux->val == *hp)
-				goto yes;
+		switch(mux->type){
+		case Tifc:
+			if(mux->len != IPaddrlen)
+				goto no;
+			for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+				for(v = mux->val; v < mux->e; v += IPaddrlen)
+					if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+						goto yes;
+			goto no;
+		case Tdata:
+			off = hl;
 			break;
-		case Cmbyte:
-			if((*hp & *mux->mask) == *mux->val)
-				goto yes;
-			break;
-		case Cshort:
-			if(*((ushort*)mux->val) == *(ushort*)hp)
-				goto yes;
-			break;
-		case Cmshort:
-			if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
-				goto yes;
-			break;
-		case Clong:
-			if(*((ulong*)mux->val) == *(ulong*)hp)
-				goto yes;
-			break;
-		case Cmlong:
-			if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
-		case Cifc:
-			if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
-				goto yes;
-			break;
-		case Cmifc:
-			if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
 		default:
-			v = mux->val;
-			for(e = mux->e; v < e; v = ve){
-				m = mux->mask;
-				hp = h + mux->off;
-				for(ve = v + mux->len; v < ve; v++){
-					if((*hp++ & *m++) != *v)
-						break;
-				}
-				if(v == ve)
-					goto yes;
-			}
+			off = 0;
+			break;
 		}
+		off += mux->off;
+		if(off < 0 || off + mux->len > BLEN(bp))
+			goto no;
+		for(v = mux->val; v < mux->e; v += mux->len)
+			if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+				goto yes;
+no:
 		mux = mux->no;
 		continue;
 yes:
@@ -743,28 +747,24 @@
 	if(c != nil){
 		/* tack on interface address */
 		bp = padblock(bp, IPaddrlen);
-		ipmove(bp->rp, ifc->lifc->local);
-		bp = concatblock(bp);
-		if(bp != nil)
-			if(qpass(c->rq, bp) < 0)
-				print("Q");
+		if(lifc == nil)
+			lifc = ifc->lifc;
+		ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+		qpass(c->rq, concatblock(bp));
 		return;
 	}
 
 nomatch:
 	/* doesn't match any filter, hand it to the specific protocol handler */
-	ip = (Ip4hdr*)bp->rp;
-	if((ip->vihl&0xF0)==0x40) {
-		p = f->t2p[ip->proto];
-	} else {
-		ip6 = (Ip6hdr*)bp->rp;
+	if(ip6 != nil)
 		p = f->t2p[ip6->proto];
-	}
-	if(p && p->rcv)
-		(*p->rcv)(p, ifc, bp);
 	else
-		freeblist(bp);
-	return;
+		p = f->t2p[ip4->proto];
+	if(p != nil && p->rcv != nil){
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+	freeblist(bp);
 }
 
 static int
@@ -780,11 +780,14 @@
 		n += snprint(buf+n, len-n, "\n");
 		return n;
 	}
-	n += snprint(buf+n, len-n, "h[%d:%d]&", 
-               mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])), 
-               mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
-	for(i = 0; i < mux->len; i++)
-		n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	n += snprint(buf+n, len-n, "%s[%d:%d]", 
+		mux->type == Tdata ? "data": "iph",
+		mux->off, mux->off+mux->len-1);
+	if(mux->mask != nil){
+		n += snprint(buf+n, len-n, "&");
+		for(i = 0; i < mux->len; i++)
+			n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	}
 	n += snprint(buf+n, len-n, "=");
 	v = mux->val;
 	for(j = 0; j < mux->n; j++){
--- a/os/ip/iproute.c
+++ b/os/ip/iproute.c
@@ -12,10 +12,10 @@
 static void	calcd(Route*);
 
 /* these are used for all instances of IP */
-Route*	v4freelist;
-Route*	v6freelist;
-RWlock	routelock;
-ulong	v4routegeneration, v6routegeneration;
+static Route*	v4freelist;
+static Route*	v6freelist;
+static RWlock	routelock;
+static ulong	v4routegeneration, v6routegeneration;
 
 static void
 freeroute(Route *r)
@@ -22,6 +22,7 @@
 {
 	Route **l;
 
+	r->ref = 0;
 	r->left = nil;
 	r->right = nil;
 	if(r->type & Rv4)
@@ -35,9 +36,8 @@
 static Route*
 allocroute(int type)
 {
-	Route *r;
+	Route *r, **l;
 	int n;
-	Route **l;
 
 	if(type & Rv4){
 		n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
 		return;
 
 	l = allocroute(r->type);
+	l->left = r;
 	l->mid = *q;
 	*q = l;
-	l->left = r;
 }
 
 /*
@@ -99,11 +99,11 @@
  */
 enum
 {
-	Rpreceeds,
-	Rfollows,
-	Requals,
-	Rcontains,
-	Rcontained,
+	Rpreceeds,	/* a left of b */
+	Rfollows,	/* a right of b */
+	Requals,	/* a equals b */
+	Rcontains,	/* a contians b */
+	Roverlaps,	/* a overlaps b */
 };
 
 static int
@@ -112,44 +112,88 @@
 	if(a->type & Rv4){
 		if(a->v4.endaddress < b->v4.address)
 			return Rpreceeds;
-
 		if(a->v4.address > b->v4.endaddress)
 			return Rfollows;
-
 		if(a->v4.address <= b->v4.address
 		&& a->v4.endaddress >= b->v4.endaddress){
 			if(a->v4.address == b->v4.address
-			&& a->v4.endaddress == b->v4.endaddress)
-				return Requals;
+			&& a->v4.endaddress == b->v4.endaddress){
+				if(a->v4.source <= b->v4.source
+				&& a->v4.endsource >= b->v4.endsource){
+					if(a->v4.source == b->v4.source
+					&& a->v4.endsource == b->v4.endsource)
+						return Requals;
+					return Rcontains;
+				}
+				return Roverlaps;
+			}
 			return Rcontains;
 		}
-		return Rcontained;
+		return Roverlaps;
 	}
 
 	if(lcmp(a->v6.endaddress, b->v6.address) < 0)
 		return Rpreceeds;
-
 	if(lcmp(a->v6.address, b->v6.endaddress) > 0)
 		return Rfollows;
-
 	if(lcmp(a->v6.address, b->v6.address) <= 0
 	&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
 		if(lcmp(a->v6.address, b->v6.address) == 0
-		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
-				return Requals;
+		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+			if(lcmp(a->v6.source, b->v6.source) <= 0
+			&& lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+				if(lcmp(a->v6.source, b->v6.source) == 0
+				&& lcmp(a->v6.endsource, b->v6.endsource) == 0)
+					return Requals;
+				return Rcontains;
+			}
+			return Roverlaps;
+		}
 		return Rcontains;
 	}
+	return Roverlaps;
+}
 
-	return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+	if(a == b)
+		return 1;
+
+	if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+		return 0;
+
+	if(a->type & Rv4){
+		if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+		&& memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+			return 0;
+	} else {
+		if(ipcmp(a->v6.gate, IPnoaddr) != 0
+		&& ipcmp(a->v6.gate, b->v6.gate) != 0)
+			return 0;
+	}
+
+	if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+		return 0;
+
+	if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+		return 0;
+
+	return 1;
 }
 
 static void
 copygate(Route *old, Route *new)
 {
+	old->type = new->type;
+	old->ifc = new->ifc;
+	old->ifcid = new->ifcid;
 	if(new->type & Rv4)
 		memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
 	else
-		memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+		ipmove(old->v6.gate, new->v6.gate);
+	strncpy(old->tag, new->tag, sizeof(new->tag));
 }
 
 /*
@@ -162,12 +206,12 @@
 
 	l = p->left;
 	r = p->right;
-	p->left = 0;
-	p->right = 0;
+	p->left = nil;
+	p->right = nil;
 	addnode(f, root, p);
-	if(l)
+	if(l != nil)
 		walkadd(f, root, l);
-	if(r)
+	if(r != nil)
 		walkadd(f, root, r);
 }
 
@@ -180,16 +224,16 @@
 	Route *q;
 	int d;
 
-	if(p) {
+	if(p != nil) {
 		d = 0;
 		q = p->left;
-		if(q)
+		if(q != nil)
 			d = q->depth;
 		q = p->right;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		q = p->mid;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		p->depth = d+1;
 	}
@@ -210,8 +254,8 @@
 	 * rotate tree node
 	 */
 	p = *cur;
-	dl = 0; if(l = p->left) dl = l->depth;
-	dr = 0; if(r = p->right) dr = r->depth;
+	dl = 0; if((l = p->left) != nil) dl = l->depth;
+	dr = 0; if((r = p->right) != nil) dr = r->depth;
 
 	if(dl > dr+1) {
 		p->left = l->right;
@@ -239,7 +283,7 @@
 	Route *p;
 
 	p = *cur;
-	if(p == 0) {
+	if(p == nil) {
 		*cur = new;
 		new->depth = 1;
 		return;
@@ -269,15 +313,13 @@
 		 *  supercede the old entry if the old one isn't
 		 *  a local interface.
 		 */
-		if((p->type & Rifc) == 0){
-			p->type = new->type;
-			p->ifcid = -1;
+		if((p->type & Rifc) == 0)
 			copygate(p, new);
-		} else if(new->type & Rifc)
+		else if(new->type & Rifc)
 			p->ref++;
 		freeroute(new);
 		break;
-	case Rcontained:
+	case Roverlaps:
 		addnode(f, &p->mid, new);
 		break;
 	}
@@ -285,270 +327,477 @@
 	balancetree(cur);
 }
 
-#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ *  find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
 {
 	Route *p;
-	ulong sa;
-	ulong m;
-	ulong ea;
-	int h, eh;
 
-	m = nhgetl(mask);
-	sa = nhgetl(a) & m;
-	ea = sa | ~m;
+	for(;;){
+		p = *cur;
+		if(p == nil)
+			return nil;
+		switch(rangecompare(r, p)){
+		case Rcontains:
+			return nil;
+		case Rpreceeds:
+			cur = &p->left;
+			break;
+		case Rfollows:
+			cur = &p->right;
+			break;
+		case Roverlaps:
+			cur = &p->mid;
+			break;
+		case Requals:
+			if((p->type & Rifc) == 0 && !matchroute(r, p))
+				return nil;
+			return cur;
+		}
+	}
+}
 
-	eh = V4H(ea);
-	for(h=V4H(sa); h<=eh; h++) {
-		p = allocroute(Rv4 | type);
-		p->v4.address = sa;
-		p->v4.endaddress = ea;
-		memmove(p->v4.gate, gate, sizeof(p->v4.gate));
-		memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+	Route *x;
 
-		wlock(&routelock);
-		addnode(f, &f->v4root[h], p);
-		while(p = f->queue) {
-			f->queue = p->mid;
-			walkadd(f, &f->v4root[h], p->left);
-			freeroute(p);
-		}
-		wunlock(&routelock);
+	if(r == nil)
+		return nil;
+
+	if((x = looknodetag(r->mid, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->left, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->right, tag)) != nil)
+		return x;
+
+	if((r->type & Rifc) == 0){
+		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+			return r;
 	}
-	v4routegeneration++;
 
-	ipifcaddroute(f, Rv4, a, mask, gate, type);
+	return nil;
 }
 
-#define	V6H(a)	(((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
+#define	V6H(a)	(((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
 
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
 {
-	Route *p;
-	ulong sa[IPllen], ea[IPllen];
-	ulong x, y;
-	int h, eh;
+	Route **h, **e, *p;
 
-	/*
-	if(ISDFLT(a, mask, tag))
-		f->v6p->cdrouter = -1;
-	*/
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
 
+	for(; h <= e; h++) {
+		p = allocroute(r->type);
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		sa[h] = x & y;
-		ea[h] = x | ~y;
-	}
+		p->ifc = r->ifc;
+		p->ifcid = r->ifcid;
 
-	eh = V6H(ea);
-	for(h = V6H(sa); h <= eh; h++) {
-		p = allocroute(type);
-		memmove(p->v6.address, sa, IPaddrlen);
-		memmove(p->v6.endaddress, ea, IPaddrlen);
-		memmove(p->v6.gate, gate, IPaddrlen);
-		memmove(p->tag, tag, sizeof(p->tag));
+		if(r->type & Rv4)
+			memmove(&p->v4, &r->v4, sizeof(r->v4));
+		else
+			memmove(&p->v6, &r->v6, sizeof(r->v6));
 
-		wlock(&routelock);
-		addnode(f, &f->v6root[h], p);
-		while(p = f->queue) {
+		memmove(p->tag, r->tag, sizeof(r->tag));
+
+		addnode(f, h, p);
+		while((p = f->queue) != nil) {
 			f->queue = p->mid;
-			walkadd(f, &f->v6root[h], p->left);
+			walkadd(f, h, p->left);
 			freeroute(p);
 		}
-		wunlock(&routelock);
 	}
-	v6routegeneration++;
 
-	ipifcaddroute(f, 0, a, mask, gate, type);
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
 {
-	Route *p;
+	Route **h, **e, **l, *p;
 
-	for(;;){
-		p = *cur;
-		if(p == 0)
-			return 0;
-	
-		switch(rangecompare(r, p)){
-		case Rcontains:
-			return 0;
-		case Rpreceeds:
-			cur = &p->left;
-			break;
-		case Rfollows:
-			cur = &p->right;
-			break;
-		case Rcontained:
-			cur = &p->mid;
-			break;
-		case Requals:
-			return cur;
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
+
+	for(; h <= e; h++) {
+		if((l = looknode(h, r)) == nil)
+			continue;
+		p = *l;
+		if(--(p->ref) != 0)
+			continue;
+		*l = nil;
+		addqueue(&f->queue, p->left);
+		addqueue(&f->queue, p->mid);
+		addqueue(&f->queue, p->right);
+		freeroute(p);
+
+		while((p = f->queue) != nil) {
+			f->queue = p->mid;
+			walkadd(f, h, p->left);
+			freeroute(p);
 		}
 	}
+
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong m;
+	ulong x, y;
+	Route r;
+	int h;
 
-	m = nhgetl(mask);
-	rt.v4.address = nhgetl(a) & m;
-	rt.v4.endaddress = rt.v4.address | ~m;
-	rt.type = Rv4;
+	memset(&r, 0, sizeof(r));
 
-	eh = V4H(rt.v4.endaddress);
-	for(h=V4H(rt.v4.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v4root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v4root[h], p->left);
-					freeroute(p);
-				}
-			}
+	r.type = type;
+
+	if(type & Rv4){
+		x = nhgetl(a+IPv4off);
+		y = nhgetl(mask+IPv4off);
+		r.v4.address = x & y;
+		r.v4.endaddress = x | ~y;
+
+		x = nhgetl(s+IPv4off);
+		y = nhgetl(smask+IPv4off);
+		if(y != 0)
+			r.type |= Rsrc;
+		r.v4.source = x & y;
+		r.v4.endsource = x | ~y;
+
+		memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+	} else {
+		for(h = 0; h < IPllen; h++){
+			x = nhgetl(a+4*h);
+			y = nhgetl(mask+4*h);
+			r.v6.address[h] = x & y;
+			r.v6.endaddress[h] = x | ~y;
+
+			x = nhgetl(s+4*h);
+			y = nhgetl(smask+4*h);
+			if(y != 0)
+				r.type |= Rsrc;
+			r.v6.source[h] = x & y;
+			r.v6.endsource[h] = x | ~y;
 		}
-		if(dolock)
-			wunlock(&routelock);
+
+		memmove(r.v6.gate, gate, IPaddrlen);
 	}
-	v4routegeneration++;
 
-	ipifcremroute(f, Rv4, a, mask);
+	if(ifc != nil){
+		r.ifc = ifc;
+		r.ifcid = ifc->ifcid;
+	}
+
+	if(tag != nil)
+		strncpy(r.tag, tag, sizeof(r.tag));
+
+	return r;
 }
 
 void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong x, y;
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routeadd(f, &r);
+	wunlock(&routelock);
+}
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		rt.v6.address[h] = x & y;
-		rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routerem(f, &r);
+	wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+	uchar local[IPaddrlen], gate[IPaddrlen];
+	Ipifc *ifc;
+	int i;
+
+	ifc = r->ifc;
+	if(ifc != nil && ifc->ifcid == r->ifcid)
+		return ifc;
+
+	if(r->type & Rsrc) {
+		if(r->type & Rv4) {
+			hnputl(local+IPv4off, r->v4.source);
+			memmove(local, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(local+4*i, r->v6.source[i]);
+		}
+	} else {
+		ipmove(local, IPnoaddr);
 	}
-	rt.type = 0;
 
-	eh = V6H(rt.v6.endaddress);
-	for(h=V6H(rt.v6.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v6root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v6root[h], p->left);
-					freeroute(p);
-				}
-			}
+	if(r->type & Rifc) {
+		if(r->type & Rv4) {
+			hnputl(gate+IPv4off, r->v4.address);
+			memmove(gate, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(gate+4*i, r->v6.address[i]);
 		}
-		if(dolock)
-			wunlock(&routelock);
+	} else {
+		if(r->type & Rv4)
+			v4tov6(gate, r->v4.gate);
+		else
+			ipmove(gate, r->v6.gate);
 	}
-	v6routegeneration++;
 
-	ipifcremroute(f, 0, a, mask);
+	if((ifc = findipifc(f, local, gate, r->type)) == nil)
+		return nil;
+
+	r->ifc = ifc;
+	r->ifcid = ifc->ifcid;
+	return ifc;
 }
 
+/*
+ * v4lookup, v6lookup:
+ *  lookup a route to destination address a from source address s
+ *  and return the route. returns nil if no route was found.
+ *  an optional Routehint can be passed in rh to cache the lookup.
+ *
+ *  for v4lookup, addresses are in 4 byte format.
+ */
 Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
+	ulong la, ls;
 	Route *p, *q;
-	ulong la;
-	uchar gate[IPaddrlen];
 	Ipifc *ifc;
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v4routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
 	la = nhgetl(a);
+	ls = nhgetl(s);
 	q = nil;
-	for(p=f->v4root[V4H(la)]; p;)
-		if(la >= p->v4.address) {
-			if(la <= p->v4.endaddress) {
-				q = p;
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
+			p = p->left;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		if(p->type & Rsrc){
+			if(ls < p->v4.source){
 				p = p->mid;
-			} else
+				continue;
+			}
+			if(ls > p->v4.endsource){
+				p = p->mid;
+				continue;
+			}
+		}
+		q = p;
+		p = p->mid;
+	}
+
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v4routegeneration;
+	}
+
+	return q;
+}
+
+Route*
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
+{
+	ulong la[IPllen], ls[IPllen];
+	ulong x, y;
+	Route *p, *q;
+	Ipifc *ifc;
+	int h;
+
+	if(isv4(s)){
+		if(isv4(a))
+			return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+		return nil;
+	}
+
+	if(rh != nil
+	&& rh->rgen == v6routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
+
+	for(h = 0; h < IPllen; h++){
+		la[h] = nhgetl(a+4*h);
+		ls[h] = nhgetl(s+4*h);
+	}
+
+	q = nil;
+	for(p = f->v6root[V6H(la)]; p != nil;){
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.address[h];
+			if(x == y)
+				continue;
+			if(x < y){
+				p = p->left;
+				goto next;
+			}
+			break;
+		}
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.endaddress[h];
+			if(x == y)
+				continue;
+			if(x > y){
 				p = p->right;
-		} else
-			p = p->left;
-
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			hnputl(gate+IPv4off, q->v4.address);
-			memmove(gate, v4prefix, IPv4off);
-		} else
-			v4tov6(gate, q->v4.gate);
-		ifc = findipifc(f, gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+				goto next;
+			}
+			break;
+		}
+		if(p->type & Rsrc){
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.source[h];
+				if(x == y)
+					continue;
+				if(x < y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.endsource[h];
+				if(x == y)
+					continue;
+				if(x > y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+		}
+		q = p;
+		p = p->mid;
+next:		;
 	}
 
-	if(c != nil){
-		c->r = q;
-		c->rgen = v4routegeneration;
-	}
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
 
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v6routegeneration;
+	}
+	
 	return q;
 }
 
+/*
+ * v4source, v6source:
+ *  lookup a route to destination address a and also find
+ *  a suitable source address s on the outgoing interface.
+ *  return the route on success or nil when no route
+ *  was found.
+ *
+ *  for v4source, addresses are in 4 byte format.
+ */
 Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v4source(Fs *f, uchar *a, uchar *s)
 {
+	uchar src[IPv4addrlen];
+	int splen;
+	ulong x, la;
 	Route *p, *q;
-	ulong la[IPllen];
-	int h;
-	ulong x, y;
-	uchar gate[IPaddrlen];
 	Ipifc *ifc;
 
-	if(memcmp(a, v4prefix, IPv4off) == 0){
-		q = v4lookup(f, a+IPv4off, c);
-		if(q != nil)
-			return q;
+	q = nil;
+	la = nhgetl(a);
+	rlock(&routelock);
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
+			p = p->left;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+				splen++;
+			hnputl(src, p->v4.source);
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+			p = p->mid;
+			continue;
+		}
+		memmove(s, src, IPv4addrlen);
+		q = p;
+		p = p->mid;
 	}
+	runlock(&routelock);
+	return q;
+}
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
-		return c->r;
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPaddrlen];
+	int splen, h;
+	ulong x, y, la[IPllen];
+	Route *p, *q;
+	Ipifc *ifc;
 
+	q = nil;
 	for(h = 0; h < IPllen; h++)
 		la[h] = nhgetl(a+4*h);
-
-	q = 0;
-	for(p=f->v6root[V6H(la)]; p;){
+	rlock(&routelock);
+	for(p = f->v6root[V6H(la)]; p != nil;){
 		for(h = 0; h < IPllen; h++){
 			x = la[h];
 			y = p->v6.address[h];
@@ -571,42 +820,76 @@
 			}
 			break;
 		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(h = 0; h < IPllen; h++){
+				hnputl(src+4*h, p->v6.source[h]);
+				if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+					for(; x & 0x80000000UL; x <<= 1)
+						splen++;
+					break;
+				}
+				splen += 32;
+			}
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv6local(ifc, src, splen, a)){
+			p = p->mid;
+			continue;
+		}
+		ipmove(s, src);
 		q = p;
 		p = p->mid;
 next:		;
 	}
+	runlock(&routelock);
+	return q;
+}
 
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			for(h = 0; h < IPllen; h++)
-				hnputl(gate+4*h, q->v6.address[h]);
-			ifc = findipifc(f, gate, q->type);
-		} else
-			ifc = findipifc(f, q->v6.gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+static int
+parseroutetype(char *p)
+{
+	int type = 0;
+	switch(*p++){
+	default:	return -1;	
+	case '4':	type |= Rv4;
+	case '6':	break;
 	}
-	if(c != nil){
-		c->r = q;
-		c->rgen = v6routegeneration;
+	for(;;) switch(*p++){
+	default: 
+		return -1;
+	case 'i':
+		if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+		break;
+	case 'u':
+		if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+		break;
+	case 'b':
+		if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+		break;
+	case 'm':
+		if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+		break;
+	case 'p':
+		if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+		break;
+	case '\0':
+		return type;
 	}
-	
-	return q;
 }
 
 void
-routetype(int type, char *p)
+routetype(int type, char p[8])
 {
-	memset(p, ' ', 4);
-	p[4] = 0;
 	if(type & Rv4)
 		*p++ = '4';
 	else
 		*p++ = '6';
+
 	if(type & Rifc)
 		*p++ = 'i';
+
 	if(type & Runi)
 		*p++ = 'u';
 	else if(type & Rbcast)
@@ -613,14 +896,14 @@
 		*p++ = 'b';
 	else if(type & Rmulti)
 		*p++ = 'm';
+
 	if(type & Rptpt)
-		*p = 'p';
+		*p++ = 'p';
+	*p = 0;
 }
 
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
 {
 	int i;
 
@@ -627,8 +910,16 @@
 	if(r->type & Rv4){
 		memmove(addr, v4prefix, IPv4off);
 		hnputl(addr+IPv4off, r->v4.address);
+
 		memset(mask, 0xff, IPv4off);
 		hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+		memmove(src, v4prefix, IPv4off);
+		hnputl(src+IPv4off, r->v4.source);
+
+		memset(smask, 0xff, IPv4off);
+		hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
 		memmove(gate, v4prefix, IPv4off);
 		memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
 	} else {
@@ -635,162 +926,186 @@
 		for(i = 0; i < IPllen; i++){
 			hnputl(addr + 4*i, r->v6.address[i]);
 			hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+			hnputl(src + 4*i, r->v6.source[i]);
+			hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
 		}
 		memmove(gate, r->v6.gate, IPaddrlen);
 	}
+}
 
-	routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+	uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+	char type[8], ifbuf[4], *iname;
 
-	if(r->ifc)
-		*nifc = r->ifc->conv->x;
+	convroute(r, addr, mask, src, smask, gate);
+	routetype(r->type, type);
+	if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+		snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
 	else
-		*nifc = -1;
+		iname = "-";
+	return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+		addr, mask, gate, type, r->tag, iname, src, smask);
 }
 
-/*
- *  this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
 {
-	int nifc, n;
-	char t[5], *iname, ifbuf[5];
-	uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
-	char *p;
+	int	o;
+	int	h;
+	char*	p;
+	char*	e;
+};
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	iname = "-";
-	if(nifc != -1) {
-		iname = ifbuf;
-		snprint(ifbuf, sizeof ifbuf, "%d", nifc);
-	}
-	p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+	int n = seprintroute(rw->p, rw->e, r) - rw->p;
 	if(rw->o < 0){
-		n = p - rw->p;
 		if(n > -rw->o){
-			memmove(rw->p, rw->p-rw->o, n+rw->o);
-			rw->p = p + rw->o;
+			memmove(rw->p, rw->p - rw->o, n + rw->o);
+			rw->p += n + rw->o;
 		}
 		rw->o += n;
 	} else
-		rw->p = p;
+		rw->p += n;
+	return rw->p < rw->e;
 }
 
-/*
- *  recurse descending tree, applying the function in Routewalk
- */
 static int
 rr(Route *r, Routewalk *rw)
 {
 	int h;
 
-	if(rw->e <= rw->p)
-		return 0;
 	if(r == nil)
 		return 1;
-
 	if(rr(r->left, rw) == 0)
 		return 0;
-
 	if(r->type & Rv4)
 		h = V4H(r->v4.address);
 	else
 		h = V6H(r->v6.address);
-
-	if(h == rw->h)
-		rw->walk(r, rw);
-
+	if(h == rw->h){
+		if(rr1(rw, r) == 0)
+			return 0;
+	}
 	if(rr(r->mid, rw) == 0)
 		return 0;
-
 	return rr(r->right, rw);
 }
 
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
 {
+	Routewalk rw[1];
+
+	rw->p = p;
+	rw->e = p+n;
+	rw->o = -offset;
+	if(rw->o > 0)
+		return 0;
+
 	rlock(&routelock);
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
 			if(rr(f->v4root[rw->h], rw) == 0)
 				break;
 	}
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
 			if(rr(f->v6root[rw->h], rw) == 0)
 				break;
 	}
 	runlock(&routelock);
-}
 
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
-	Routewalk rw;
-
-	rw.p = p;
-	rw.e = p+n;
-	rw.o = -offset;
-	rw.walk = sprintroute;
-
-	ipwalkroutes(f, &rw);
-
-	return rw.p - p;
+	return rw->p - p;
 }
 
 /*
- *  this code is not in routeflush to reduce stack size
+ *	4	add	addr	mask	gate
+ *	5	add	addr	mask	gate			ifc
+ *	6	add	addr	mask	gate				src	smask
+ *	7	add	addr	mask	gate			ifc	src	smask
+ *	8	add	addr	mask	gate		tag	ifc	src	smask
+ *	9	add	addr	mask	gate	type	tag	ifc	src	smask
+ *	3	remove	addr	mask
+ *	4	remove	addr	mask	gate
+ *	5	remove	addr	mask					src	smask
+ *	6	remove	addr	mask	gate				src	smask
+ *	7	remove	addr	mask	gate			ifc	src	smask
+ *	8	remove	addr	mask	gate		tag	ifc	src	smask
+ *	9	remove	addr	mask	gate	type	tag	ifc	src	smask
  */
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
 {
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
+	uchar addr[IPaddrlen], mask[IPaddrlen];
+	uchar src[IPaddrlen], smask[IPaddrlen];
 	uchar gate[IPaddrlen];
-	char t[5];
-	int nifc;
+	Ipifc *ifc;
+	char *tag;
+	int type;
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	if(r->type & Rv4)
-		v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
-	else
-		v6delroute(f, addr, mask, dolock);
-}
+	type = 0;
+	tag = nil;
+	ifc = nil;
+	ipmove(gate, IPnoaddr);
+	ipmove(src, IPnoaddr);
+	ipmove(smask, IPnoaddr);
 
-/*
- *  recurse until one route is deleted
- *    returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
-	if(r == nil)
-		return 0;
-	if(routeflush(f, r->mid, tag))
-		return 1;
-	if(routeflush(f, r->left, tag))
-		return 1;
-	if(routeflush(f, r->right, tag))
-		return 1;
-	if((r->type & Rifc) == 0){
-		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
-			delroute(f, r, 0);
-			return 1;
-		}
+	if(argc < 3)
+		error(Ebadctl);
+	if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+		error(Ebadip);
+
+	if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+		if(argc < 4)
+			error(Ebadctl);
+		if(parseip(gate, argv[3]) == -1)
+			error(Ebadip);
 	}
-	return 0;
+	if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+		if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+			error(Ebadip);
+	}
+	if(argc == 5 && strcmp(argv[0], "add") == 0)
+		ifc = findipifcstr(f, argv[4]);
+	if(argc > 6)
+		ifc = findipifcstr(f, argv[argc-3]);
+	if(argc > 7)
+		tag = argv[argc-4];
+	if(argc > 8){
+		if((type = parseroutetype(argv[argc-5])) < 0)
+			error(Ebadctl);
+	} else {
+		if(isv4(addr))
+			type |= Rv4;
+	}
+	if(argc > 9)
+		error(Ebadctl);
+
+	if(type & Rv4){
+		if(!isv4(addr))
+			error(Ebadip);
+		if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+			error(Ebadip);
+		if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+			error(Ebadip);
+	} else {
+		if(isv4(addr))
+			error(Ebadip);
+	}
+
+	return mkroute(addr, mask, src, smask, gate, type, ifc, tag);	
 }
 
 long
 routewrite(Fs *f, Chan *c, char *p, int n)
 {
-	int h, changed;
-	char *tag;
 	Cmdbuf *cb;
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar gate[IPaddrlen];
-	IPaux *a, *na;
+	IPaux *a;
+	Route *x, r;
 
 	cb = parsecmd(p, n);
 	if(waserror()){
@@ -797,54 +1112,44 @@
 		free(cb);
 		nexterror();
 	}
-
+	if(cb->nf < 1)
+		error("short control request");
 	if(strcmp(cb->f[0], "flush") == 0){
-		tag = cb->f[1];
+		char *tag = cb->nf < 2 ? nil : cb->f[1];
+		int h;
+
+		wlock(&routelock);
 		for(h = 0; h < nelem(f->v4root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v4root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v4root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+				routerem(f, &r);
 			}
 		for(h = 0; h < nelem(f->v6root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v6root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v6root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+				routerem(f, &r);
 			}
-	} else if(strcmp(cb->f[0], "remove") == 0){
-		if(cb->nf < 3)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
-		else
-			v6delroute(f, addr, mask, 1);
-	} else if(strcmp(cb->f[0], "add") == 0){
-		if(cb->nf < 4)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		parseip(gate, cb->f[3]);
-		tag = "none";
-		if(c != nil){
+		wunlock(&routelock);
+	} else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+		r = parseroute(f, cb->f, cb->nf);
+		if(*r.tag == 0){
 			a = c->aux;
-			tag = a->tag;
+			strncpy(r.tag, a->tag, sizeof(r.tag));
 		}
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+		wlock(&routelock);
+		if(strcmp(cb->f[0], "add") == 0)
+			routeadd(f, &r);
 		else
-			v6addroute(f, tag, addr, mask, gate, 0);
+			routerem(f, &r);
+		wunlock(&routelock);
 	} else if(strcmp(cb->f[0], "tag") == 0) {
 		if(cb->nf < 2)
 			error(Ebadarg);
-
 		a = c->aux;
-		na = newipaux(a->owner, cb->f[1]);
-		c->aux = na;
+		c->aux = newipaux(a->owner, cb->f[1]);
 		free(a);
-	}
+	} else
+		error(Ebadctl);
 
 	poperror();
 	free(cb);
--- a/os/ip/iprouter.c
+++ /dev/null
@@ -1,56 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-#include	"../ip/ip.h"
-
-IProuter iprouter;
-
-/*
- *  User level routing.  Ip packets we don't know what to do with
- *  come here.
- */
-void
-useriprouter(Fs *f, Ipifc *ifc, Block *bp)
-{
-	qlock(&f->iprouter);
-	if(f->iprouter.q != nil){
-		bp = padblock(bp, IPaddrlen);
-		if(bp == nil)
-			return;
-		ipmove(bp->rp, ifc->lifc->local);
-		qpass(f->iprouter.q, bp);
-	}else
-		freeb(bp);
-	qunlock(&f->iprouter);
-}
-
-void
-iprouteropen(Fs *f)
-{
-	qlock(&f->iprouter);
-	f->iprouter.opens++;
-	if(f->iprouter.q == nil)
-		f->iprouter.q = qopen(64*1024, 0, 0, 0);
-	else if(f->iprouter.opens == 1)
-		qreopen(f->iprouter.q);
-	qunlock(&f->iprouter);
-}
-
-void
-iprouterclose(Fs *f)
-{
-	qlock(&f->iprouter);
-	f->iprouter.opens--;
-	if(f->iprouter.opens == 0)
-		qclose(f->iprouter.q);
-	qunlock(&f->iprouter);
-}
-
-long
-iprouterread(Fs *f, void *a, int n)
-{
-	return qread(f->iprouter.q, a, n);
-}
--- a/os/ip/ipv6.c
+++ b/os/ip/ipv6.c
@@ -8,250 +8,127 @@
 #include	"ip.h"
 #include	"ipv6.h"
 
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= (32*1024),	/* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp)	(((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
-
-typedef struct	IP	IP;
-typedef struct	Fragment4	Fragment4;
-typedef struct	Fragment6	Fragment6;
-typedef struct	Ipfrag	Ipfrag;
-
-Block*		ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void		ipfragfree6(IP*, Fragment6*);
-Fragment6*	ipfragallo6(IP*);
+static Block*		ip6reassemble(IP*, int, Block*);
+static Fragment6*	ipfragallo6(IP*);
+static void		ipfragfree6(IP*, Fragment6*);
+static Block*		procopts(Block *bp);
 static Block*		procxtns(IP *ip, Block *bp, int doreasm);
-int		unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block*		procopts(Block *bp);
+static int		unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
 
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
 {
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
+	v6params *v6p;
 
-	Nstats,
-};
+	v6p = smalloc(sizeof(v6params));
 
-static char *statnames[] =
-{
-[Forwarding]	"Forwarding",
-[DefaultTTL]	"DefaultTTL",
-[InReceives]	"InReceives",
-[InHdrErrors]	"InHdrErrors",
-[InAddrErrors]	"InAddrErrors",
-[ForwDatagrams]	"ForwDatagrams",
-[InUnknownProtos]	"InUnknownProtos",
-[InDiscards]	"InDiscards",
-[InDelivers]	"InDelivers",
-[OutRequests]	"OutRequests",
-[OutDiscards]	"OutDiscards",
-[OutNoRoutes]	"OutNoRoutes",
-[ReasmTimeout]	"ReasmTimeout",
-[ReasmReqds]	"ReasmReqds",
-[ReasmOKs]	"ReasmOKs",
-[ReasmFails]	"ReasmFails",
-[FragOKs]	"FragOKs",
-[FragFails]	"FragFails",
-[FragCreates]	"FragCreates",
-};
+	v6p->rp.mflag		= 0;		/* default not managed */
+	v6p->rp.oflag		= 0;
+	v6p->rp.maxraint	= 600000;	/* millisecs */
+	v6p->rp.minraint	= 200000;
+	v6p->rp.linkmtu		= 0;		/* no mtu sent */
+	v6p->rp.reachtime	= 0;
+	v6p->rp.rxmitra		= 0;
+	v6p->rp.ttl		= MAXTTL;
+	v6p->rp.routerlt	= (3 * v6p->rp.maxraint) / 1000;
 
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
+	v6p->hp.rxmithost	= 1000;		/* v6 RETRANS_TIMER */
 
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
+	f->v6p			= v6p;
+}
 
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
-	int tentative;
-	Ipifc *ifc;
+	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+	int morefrags, blklen, rv = 0;
 	uchar *gate, nexthdr;
-	Ip6hdr *eh;
-	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
-	Route *r, *sr;
-	Fraghdr6 fraghdr;
 	Block *xp, *nb;
+	Fraghdr6 fraghdr;
 	IP *ip;
-	int rv = 0;
+	Ip6hdr *eh;
+	Ipifc *ifc;
+	Route *r;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip6hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip6hdr*)bp->rp;
+	assert(BLEN(bp) >= IP6HDR);
 	len = blocklen(bp);
-	
-	tentative = iptentative(f, eh->src);
-	if(tentative){
-		netlog(f, Logip, "reject tx of packet with tentative src address\n");
-		goto free;
-	}
-
-	if(gating){
-		chunk = nhgets(eh->ploadlen);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk + IPV6HDR_LEN < len)
-			len = chunk + IPV6HDR_LEN;
-	}
-
 	if(len >= IP_MAX){
-//		print("len > IP_MAX, free\n");
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v6lookup(f, eh->dst, c);
-	if(r == nil){
-//		print("no route for %I, src %I free\n", eh->dst, eh->src);
+	r = v6lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v6lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v6.gate;
 
-	if(!gating)
-		eh->vcf[0] = IP_VER6;
-	eh->ttl = ttl;
-	if(!gating) {
-		eh->vcf[0] |= (tos >> 4);
-		eh->vcf[1] = (tos << 4);
-	}
-
-	if(!canrlock(ifc)) {
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
 	}
-
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
 
-	if(ifc->m == nil) {
+	if(ifc->m == nil)
 		goto raise;
+
+	if(!gating){
+		eh->vcf[0] = IP_VER6;
+		eh->vcf[0] |= tos >> 4;
+		eh->vcf[1]  = tos << 4;
 	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		hnputs(eh->ploadlen, len-IPV6HDR_LEN);
-		ifc->m->bwrite(ifc, bp, V6, gate);
+		hnputs(eh->ploadlen, len - IP6HDR);
+		ipifcoput(ifc, bp, V6, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-	if(gating) 
-	if(ifc->reassemble <= 0) {
-
-		/* v6 intermediate nodes are not supposed to fragment pkts;
-		   we fragment if ifc->reassemble is turned on; an exception
-		   needed for nat.
+	if(gating && !ifc->reassemble) {
+		/*
+		 * v6 intermediate nodes are not supposed to fragment pkts;
+		 * we fragment if ifc->reassemble is turned on; an exception
+		 * needed for nat.
 		 */
-
 		ip->stats[OutDiscards]++;
 		icmppkttoobig6(f, ifc, bp);
-		netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
 		goto raise;
 	}
-		
+
 	/* start v6 fragmentation */
-	uflen = unfraglen(bp, &nexthdr, 1);
+	uflen = unfraglen(bp, &nexthdr, 1, 0);
+	if(uflen < IP6HDR || nexthdr == FH) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+		goto raise;
+	}
 	if(uflen > medialen) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
 		goto raise;
 	}
 
@@ -260,7 +137,7 @@
 	if(seglen < 8) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
@@ -271,13 +148,13 @@
 
 	xp = bp;
 	offset = uflen;
-	while (xp != nil && offset && offset >= BLEN(xp)) {
+	while (offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
 	xp->rp += offset;
 
-	fragoff = 0; 
+	fragoff = 0;
 	morefrags = 1;
 
 	for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
 		memmove(nb->wp, eh, uflen);
 		nb->wp += uflen;
 
-		hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+		hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
 		fraghdr.offsetRM[1] |= morefrags;
 		memmove(nb->wp, &fraghdr, IP6FHDR);
 		nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
 		/* Copy data */
 		chunk = seglen;
 		while (chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -316,10 +193,9 @@
 			xp->rp += blklen;
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
-				xp = xp->next; 
+				xp = xp->next;
 		}
-
-		ifc->m->bwrite(ifc, nb, V6, gate);
+		ipifcoput(ifc, nb, V6, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
 	runlock(ifc);
 	poperror();
 free:
-	freeblist(bp);	
+	freeblist(bp);
 	return rv;
 }
 
@@ -335,16 +211,10 @@
 void
 ipiput6(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos;
-	uchar proto;
+	int hl, len, hop, tos;
+	IP *ip;
 	Ip6hdr *h;
 	Proto *p;
-	int notforme;
-	int tentative;
-	uchar v6dst[IPaddrlen];
-	IP *ip;
-	Route *r, *sr;
 
 	ip = f->ip;
 	ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
 			return;
 	}
 
-	h = (Ip6hdr *)(bp->rp);
-
-	memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
-	notforme = ipforme(f, v6dst) == 0;
-	tentative = iptentative(f, v6dst);
-  
-	if(tentative && (h->proto != ICMPv6)) {
-		print("tentative addr, drop\n");
-		freeblist(bp);
-		return;
-	}
-
 	/* Check header version */
-	if(BLKIPVER(bp) != IP_VER6) {
+	h = (Ip6hdr*)bp->rp;
+	if((h->vcf[0] & 0xF0) != IP_VER6) {
 		ip->stats[InHdrErrors]++;
 		netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
-		freeblist(bp);
+		goto drop;
+	}
+	len = IP6HDR + nhgets(h->ploadlen);
+	if((bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
 		return;
 	}
+	h = (Ip6hdr*)bp->rp;
 
 	/* route */
-	if(notforme) {
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
+	if(!ipforme(f, h->dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
+
+		if(!ip->iprouting)
+			goto drop;
+
+		/* don't forward to link-local destinations */
+		if(islinklocal(h->dst) ||
+		   (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+			ip->stats[OutDiscards]++;
+			goto drop;
 		}
+			
 		/* don't forward to source's network */
-		sr = v6lookup(f, h->src, nil);
-		r = v6lookup(f, h->dst, nil);
-
-		if(r == nil || sr == r){
+		rh.r = nil;
+		r  = v6lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
 			icmpttlexceeded6(f, ifc, bp);
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* process headers & reassemble if the interface expects it */
-		bp = procxtns(ip, bp, r->ifc->reassemble);
-
+		bp = procxtns(ip, bp, nifc->reassemble);
 		if(bp == nil)
 			return;
 
 		ip->stats[ForwDatagrams]++;
-		h = (Ip6hdr *) (bp->rp);
-		tos = IPV6CLASS(h);
+		h = (Ip6hdr*)bp->rp;
+		tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
 		hop = h->ttl;
-		ipoput6(f, bp, 1, hop-1, tos, nil);
+		ipoput6(f, bp, 1, hop-1, tos, &rh);
 		return;
 	}
 
 	/* reassemble & process headers if needed */
 	bp = procxtns(ip, bp, 1);
-
 	if(bp == nil)
 		return;
 
-	h = (Ip6hdr *) (bp->rp);
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	h = (Ip6hdr*)bp->rp;
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
 
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -447,20 +318,20 @@
 /*
  * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
  */
-void
+static void
 ipfragfree6(IP *ip, Fragment6 *frag)
 {
 	Fragment6 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	memset(frag->src, 0, IPaddrlen);
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	memset(frag->src, 0, IPaddrlen);
+	memset(frag->dst, 0, IPaddrlen);
 
 	l = &ip->flisthead6;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -470,13 +341,12 @@
 
 	frag->next = ip->fragfree6;
 	ip->fragfree6 = frag;
-
 }
 
 /*
  * ipfragallo6 - copied from ipfragalloc4
  */
-Fragment6*
+static Fragment6*
 ipfragallo6(IP *ip)
 {
 	Fragment6 *f;
@@ -483,7 +353,7 @@
 
 	while(ip->fragfree6 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead6; f->next; f = f->next)
+		for(f = ip->flisthead6; f->next != nil; f = f->next)
 			;
 		ipfragfree6(ip, f);
 	}
@@ -497,108 +367,109 @@
 }
 
 static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
-	int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
 	uchar proto;
-	Ip6hdr *h;
+	int offset;
 
-	h = (Ip6hdr *) (bp->rp);
-	offset = unfraglen(bp, &proto, 0);
-
-	if((proto == FH) && (doreasm != 0)) {
-		bp = ip6reassemble(ip, offset, bp, h);
-		if(bp == nil) 
-			return nil; 
-		offset = unfraglen(bp, &proto, 0);
+	offset = unfraglen(bp, &proto, 0, doreasm);
+	if(offset >= IP6HDR && proto == FH && doreasm) {
+		bp = ip6reassemble(ip, offset, bp);
+		if(bp == nil)
+			return nil;
+		offset = unfraglen(bp, &proto, 0, 0);
+		if(proto == FH)
+			offset = -1;
 	}
-
-	if(proto == DOH || offset > IP6HDR) 
+	if(offset < IP6HDR){
+		ip->stats[InHdrErrors]++;
+		ip->stats[InDiscards]++;
+		freeblist(bp);
+		return nil;
+	}
+	if(proto == DOH || offset > IP6HDR)
 		bp = procopts(bp);
-
 	return bp;
 }
 
-
-/*	returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- *	hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- *	of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- *	field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
  */
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
 {
-	uchar *p, *q;
-	int ufl, hs;
+	uchar *e, *p, *q;
 
+	e = bp->wp;
 	p = bp->rp;
-	q = p+6;	/* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+	q = p+6;   /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
 	*nexthdr = *q;
-	ufl = IP6HDR;
-	p += ufl;
-
-	for(;;) {
-		if(*nexthdr == HBH || *nexthdr == RH) {
-			*nexthdr = *p;
-			hs = ((int)*(p+1) + 1) * 8;
-			ufl += hs;
-			q = p;
-			p += hs;
-		}
-		else
-			break;
+	p += IP6HDR;
+	while(*nexthdr == HBH || *nexthdr == RH){
+		if(p+2 > e)
+			return -1;
+		q = p;
+		*nexthdr = *q;
+		p += ((int)p[1] + 1) * 8;
 	}
-
-	if(*nexthdr == FH)
-		*q = *p;
-
-	if(setfh)
+	if(p > e)
+		return -1;
+	if(*nexthdr == FH){
+		if(p+IP6FHDR > e || *p == FH)
+			return -1;
+		if(popfh)
+			*q = *p;
+	} else if(setfh)
 		*q = FH;
-
-	return ufl;
+	return p - bp->rp;
 }
 
-Block*
+static Block*
 procopts(Block *bp)
 {
 	return bp;
 }
 
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
 {
-
-	int fend, offset;
+	int offset, ovlap, fragsize, len;
+	uchar src[IPaddrlen], dst[IPaddrlen];
 	uint id;
-	Fragment6 *f, *fnext;
+	Block *bl, **l, *prev;
 	Fraghdr6 *fraghdr;
-	uchar src[IPaddrlen], dst[IPaddrlen];
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Fragment6 *f, *fnext;
+	Ipfrag *fp, *fq;
+	Ip6hdr* ih;
 
-	fraghdr = (Fraghdr6 *) (bp->rp + uflen);
-	memmove(src, ih->src, IPaddrlen);
-	memmove(dst, ih->dst, IPaddrlen);
-	id = nhgetl(fraghdr->id);
-	offset = nhgets(fraghdr->offsetRM) & ~7;
-
 	/*
-	 *  block lists are too hard, pullupblock into a single block
+	 *  block lists are too hard, concatblock into a single block
 	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip6hdr *)(bp->rp);
-	}
+	bp = concatblock(bp);
 
+	ih = (Ip6hdr*)bp->rp;
+	fraghdr = (Fraghdr6*)(bp->rp + uflen);
+	id = nhgetl(fraghdr->id);
+	offset = nhgets(fraghdr->offsetRM);
+	fragsize = BLEN(bp) - uflen - IP6FHDR;
 
+	memmove(src, ih->src, IPaddrlen);
+	memmove(dst, ih->dst, IPaddrlen);
+
 	qlock(&ip->fraglock6);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead6; f; f = fnext){
+	for(f = ip->flisthead6; f != nil; f = fnext){
 		fnext = f->next;
-		if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+		if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
 		}
 	}
 
-
 	/*
 	 *  if this isn't a fragmented packet, accept it
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(nhgets(fraghdr->offsetRM)==0) {	// first frag is also the last
+	if((offset & ~6) == 0) {	/* 1st frag is also last */
 		if(f != nil) {
-			ipfragfree6(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree6(ip, f);
 		}
 		qunlock(&ip->fraglock6);
+
+		/* get rid of frag header */
+		memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+		bp->rp += IP6FHDR;
+		ih = (Ip6hdr*)bp->rp;
+		hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset;
-	BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = offset & ~7;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -638,8 +516,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock6);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock6);
+
 		return nil;
 	}
 
@@ -649,7 +528,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -656,15 +535,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock6);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -673,29 +553,27 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 
 		/* Take completely covered segments out */
-
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
-
-			if(ovlap <= 0) 
-				break; 
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, uflen);
-				(*l)->rp += ovlap;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
+			if(ovlap <= 0)
 				break;
+			if(ovlap < fq->flen) {
+				/* move up ip and frag header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
+				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -703,45 +581,55 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-	
-		fraghdr = (Fraghdr6 *) (bl->rp + uflen);
-		if((fraghdr->offsetRM[1] & 1) == 0) {
 
-			bl = f->blist;
+		fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+		if(fraghdr->offsetRM[1] & 1)
+			continue;
 
-			/* get rid of frag header in first fragment */
+		bl = f->blist;
+		fq = (Ipfrag*)bl->base;
 
-			memmove(bl->rp + IP6FHDR, bl->rp, uflen);
-			bl->rp += IP6FHDR;
-			len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
-			bl->wp = bl->rp + len + IP6HDR;
+		/* get rid of frag header in first fragment */
+		memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+		bl->rp += IP6FHDR;
+		len = BLEN(bl);
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += uflen + IP6FHDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
 
-			bl = f->blist;
-			f->blist = nil;
+		if(len >= IP_MAX){
 			ipfragfree6(ip, f);
-			ih = (Ip6hdr*)(bl->rp);
-			hnputs(ih->ploadlen, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock6);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree6(ip, f);
+
+		ih = (Ip6hdr*)bl->rp;
+		hnputs(ih->ploadlen, len-IP6HDR);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock6);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock6);
 	return nil;
 }
-
--- a/os/ip/ipv6.h
+++ b/os/ip/ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration.  rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
 #define isv6mcast(addr)	  ((addr)[0] == 0xff)
 #define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
 
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np)	(nhgets((np)->ploadlen) > 24)
+#define issmcast(addr)	(memcmp((addr), v6solicitednode, 13) == 0)
 
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
 
-typedef struct Ip6hdr     Ip6hdr;
-typedef struct Opthdr     Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6    Fraghdr6;
-
-struct Ip6hdr {
-	uchar vcf[4];       	// version:4, traffic class:8, flow label:20
-	uchar ploadlen[2];  	// payload length: packet length - 40
-	uchar proto;		// next header type
-	uchar ttl;          	// hop limit
-	uchar src[IPaddrlen];
-	uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
-	uchar nexthdr;
-	uchar len;
-};
-
-struct Routinghdr {
-	uchar nexthdr;
-	uchar len;
-	uchar rtetype;
-	uchar segrem;
-};
-
-struct Fraghdr6 {
-	uchar nexthdr;
-	uchar res;
-	uchar offsetRM[2];	// Offset, Res, M flag
-	uchar id[4];
-};
-
-
-enum {			/* Header Types */
-	HBH		= 0,	//?
+enum {				/* Header Types */
+	HBH		= 0,	/* hop-by-hop multicast routing protocol */
 	ICMP		= 1,
 	IGMP		= 2,
 	GGP		= 3,
@@ -72,89 +50,113 @@
 	Maxhdrtype	= 256,
 };
 
-
 enum {
-	//	multicast flgs and scop
+	/* multicast flags and scopes */
 
-	well_known_flg				= 0,
-	transient_flg				= 1,
+//	Well_known_flg	= 0,
+//	Transient_flg	= 1,
 
-	node_local_scop 			= 1,
-	link_local_scop 			= 2,
-	site_local_scop 			= 5,
-	org_local_scop				= 8,
-	global_scop				= 14,
+//	Interface_local_scop = 1,
+	Link_local_scop	= 2,
+//	Site_local_scop	= 5,
+//	Org_local_scop	= 8,
+	Global_scop	= 14,
 
-	//	various prefix lengths
+	/* various prefix lengths */
+	SOLN_PREF_LEN	= 13,
 
-	SOLN_PREF_LEN				= 13,
+	/* icmpv6 unreachability codes */
+	Icmp6_no_route		= 0,
+	Icmp6_ad_prohib		= 1,
+	Icmp6_out_src_scope	= 2,
+	Icmp6_adr_unreach	= 3,
+	Icmp6_port_unreach	= 4,
+	Icmp6_gress_src_fail	= 5,
+	Icmp6_rej_route		= 6,
+	Icmp6_unknown		= 7,  /* our own invention for internal use */
 
-	//	icmpv6 unreach codes
-	icmp6_no_route				= 0,
-	icmp6_ad_prohib				= 1,
-	icmp6_unassigned			= 2,
-	icmp6_adr_unreach			= 3,
-	icmp6_port_unreach			= 4,
-	icmp6_unkn_code				= 5,
+	/* various flags & constants */
+	v6MINTU		= 1280,
+	HOP_LIMIT	= 255,
+	IP6HDR		= 40,		/* sizeof(Ip6hdr) = 8 + 2*16 */
+	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
 
-	// 	various flags & constants
+	/* option types */
 
-	v6MINTU      				= 1280,
-	HOP_LIMIT    				= 255,
-	ETHERHDR_LEN 				= 14,
-	IPV6HDR_LEN  				= 40,
-	IPV4HDR_LEN  				= 20,
+	/* neighbour discovery */
+	SRC_LLADDR	= 1,
+	TARGET_LLADDR	= 2,
+	PREFIX_INFO	= 3,
+	REDIR_HEADER	= 4,
+	MTU_OPTION	= 5,
+	/* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+	V6nd_home	= 8,
+	V6nd_srcaddrs	= 9,		/* rfc3122 */
+	V6nd_ip		= 17,
+	/* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+	V6nd_rdns	= 25,
+	/* plan 9 extensions */
+	V6nd_9fs	= 250,
+	V6nd_9auth	= 251,
 
-	// 	option types
+	SRC_UNSPEC	= 0,
+	SRC_UNI		= 1,
+	TARG_UNI	= 2,
+	TARG_MULTI	= 3,
 
-	SRC_LLADDRESS    			= 1,
-	TARGET_LLADDRESS 			= 2,
-	PREFIX_INFO      			= 3,
-	REDIR_HEADER     			= 4,
-	MTU_OPTION       			= 5,
+	Tunitent	= 1,
+	Tuniproxy	= 2,
+	Tunirany	= 3,
 
-	SRC_UNSPEC  				= 0,
-	SRC_UNI     				= 1,
-	TARG_UNI    				= 2,
-	TARG_MULTI  				= 3,
+	/* Node constants */
+	MAX_MULTICAST_SOLICIT	= 3,
+	RETRANS_TIMER		= 1000,
+};
 
-	t_unitent   				= 1,
-	t_uniproxy  				= 2,
-	t_unirany   				= 3,
+typedef struct Ip6hdr	Ip6hdr;
+typedef struct Opthdr	Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6	Fraghdr6;
 
-	//	Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */\
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */ \
+	uchar	proto;		/* next header type */ \
+	uchar	ttl;		/* hop limit */ \
+	uchar	src[IPaddrlen]; \
+	uchar	dst[IPaddrlen]
 
-	MAX_INITIAL_RTR_ADVERT_INTERVAL 	= 16000,
-	MAX_INITIAL_RTR_ADVERTISEMENTS  	= 3,
-	MAX_FINAL_RTR_ADVERTISEMENTS    	= 3,
-	MIN_DELAY_BETWEEN_RAS 			= 3000,
-	MAX_RA_DELAY_TIME     			= 500,
+struct	Ip6hdr {
+	IPV6HDR;
+	uchar	payload[];
+};
 
-	//	Host constants
+struct	Opthdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+};
 
-	MAX_RTR_SOLICITATION_DELAY 		= 1000,
-	RTR_SOLICITATION_INTERVAL  		= 4000,
-	MAX_RTR_SOLICITATIONS      		= 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused.  Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct	Routinghdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+	uchar	rtetype;
+	uchar	segrem;
+};
 
-	//	Node constants
-
-	MAX_MULTICAST_SOLICIT   		= 3,
-	MAX_UNICAST_SOLICIT     		= 3,
-	MAX_ANYCAST_DELAY_TIME  		= 1000,
-	MAX_NEIGHBOR_ADVERTISEMENT 		= 3,
-	REACHABLE_TIME 				= 30000,
-	RETRANS_TIMER  				= 1000,
-	DELAY_FIRST_PROBE_TIME 			= 5000,
-
+struct	Fraghdr6 {
+	uchar	nexthdr;
+	uchar	res;
+	uchar	offsetRM[2];	/* Offset, Res, M flag */
+	uchar	id[4];
 };
 
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
 extern uchar v6allnodesN[IPaddrlen];
 extern uchar v6allnodesL[IPaddrlen];
 extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
 extern uchar v6allroutersL[IPaddrlen];
 extern uchar v6allnodesNmask[IPaddrlen];
 extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
 extern uchar v6solicitednode[IPaddrlen];
 extern uchar v6solicitednodemask[IPaddrlen];
 extern uchar v6Unspecified[IPaddrlen];
 extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
 extern uchar v6linklocal[IPaddrlen];
 extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
 extern uchar v6multicast[IPaddrlen];
 extern uchar v6multicastmask[IPaddrlen];
 
 extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
 extern int v6mcpreflen;
 extern int v6snpreflen;
 extern int v6aNpreflen;
@@ -184,3 +179,10 @@
 extern int v6aLpreflen;
 
 extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
--- a/os/ip/kernel.h
+++ /dev/null
@@ -1,10 +1,0 @@
-extern	int	kclose(int);
-extern	int	kdial(char*, char*, char*, int*);
-extern	int	kannounce(char*, char*);
-extern	void	kerrstr(char*);
-extern	void	kgerrstr(char*);
-extern	int	kopen(char*, int);
-extern	long	kread(int, void*, long);
-extern	long	kseek(int, vlong, int);
-extern	long	kwrite(int, void*, long);
-extern	void	kwerrstr(char *, ...);
--- a/os/ip/loopbackmedium.c
+++ b/os/ip/loopbackmedium.c
@@ -28,13 +28,12 @@
 	LB *lb;
 
 	lb = smalloc(sizeof(*lb));
+	lb->readp = (void*)-1;
 	lb->f = ifc->conv->p->f;
-	/* TO DO: make queue size a function of kernel memory */
-	lb->q = qopen(128*1024, Qmsg, nil, nil);
+	lb->q = qopen(1024*1024, Qmsg, nil, nil);
 	ifc->arg = lb;
-	ifc->mbps = 1000;
 
-	kproc("loopbackread", loopbackread, ifc, 0);
+	kproc("loopbackread", loopbackread, ifc);
 
 }
 
@@ -43,13 +42,29 @@
 {
 	LB *lb = ifc->arg;
 
-	if(lb->readp)
+	while(waserror())
+		;
+
+	/* wat for reader to start */
+	while(lb->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+		
+	if(lb->readp != nil)
 		postnote(lb->readp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for reader to die */
-	while(lb->readp != 0)
+	while(lb->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	/* clean up */
 	qfree(lb->q);
 	free(lb);
@@ -76,23 +91,14 @@
 	ifc = a;
 	lb = ifc->arg;
 	lb->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		lb->readp = 0;
-		pexit("hangup", 1);
-	}
-	for(;;){
-		bp = qbread(lb->q, Maxtu);
-		if(bp == nil)
-			continue;
-		ifc->in++;
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+	if(!waserror())
+	while((bp = qbread(lb->q, Maxtu)) != nil){
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
+		ifc->in++;
 		if(ifc->lifc == nil)
 			freeb(bp);
 		else
@@ -100,6 +106,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	lb->readp = nil;
+	pexit("hangup", 1);
 }
 
 Medium loopbackmedium =
--- a/os/ip/nat.c
+++ /dev/null
@@ -1,549 +1,0 @@
-#include		"u.h"
-#include		"../port/lib.h"
-#include		"mem.h"
-#include		"dat.h"
-#include		"fns.h"
-#include		"../port/error.h"
-
-#include		"ip.h"
-
-typedef struct NatProto NatProto;
-typedef struct NatAddr NatAddr;
-
-/*
- * NAT.
- */
-struct Nat
-{
-	uchar	src[IPv4addrlen];	/* Source address */
-	uchar	sport[2];		/* Source port */
-	uchar	lport[2];		/* Local port */
-	uchar	proto;			/* Protocol */
-	long	time;			/* Time */
-	Conv	*conv;			/* Conversation */
-	Nat	*next;			/* Next node */
-};
-
-/*
- * Protocol list.
- */
-struct NatProto
-{
-	uchar	proto;			/* Protocol */
-	int	sport;			/* Source port offset */
-	int	dport;			/* Destination port offset */
-	int	cksum;			/* Checksum offset */
-	int	timeout;		/* Timeout */
-};
-
-/*
- * Address list.
- */
-struct NatAddr
-{
-	uchar	src[IPaddrlen];		/* Source address */
-	uchar	mask[IPaddrlen];	/* Source address mask */
-	uchar	net[IPaddrlen];		/* Source network address */
-	Iplifc	*dst;			/* Destination interface */
-	NatAddr	*next;			/* Next node */
-};
-
-static Nat *head = nil;
-static NatAddr *addrhead = nil;
-
-/*
- * Timeouts for ICMP, TCP and UDP are respectively confirmed
- * in RFC 5508, RFC 5382 and RFC 4787.
- */
-static NatProto prototab[] =
-{
-	{ 1, 4, 4, 2, 60*1000 },		/* ICMP */
-	{ 6, 0, 2, 16, (2*60*60+4*60)*1000 },	/* TCP */
-	{ 17, 0, 2, 6, 2*60*1000 },		/* UDP */
-	{ 40, 6, 8, 0, 10*30*1000 },		/* IL */
-	{ 255, 0, 2, 6, 2*60*1000 },		/* RUDP */
-	{ 0 }
-};
-
-NatProto*	parseproto(uchar);
-void		natprepend(Nat*);
-Nat*		natexistout(uchar*, uchar, uchar*);
-Nat*		natexistin(uchar, uchar*);
-int		natdelete(uchar*, uchar, uchar*);
-int		natpurge(uchar);
-Nat*		natlport(Proto*, Ip4hdr*, uchar*);
-int		natgc(uchar);
-void		checksumadjust(uchar*, uchar*, int, uchar*, int);
-Iplifc*		natonifco(Ipifc*, Ip4hdr*);
-Iplifc*		natonifci(Ipifc*);
-void		nataddrprepend(NatAddr*);
-NatAddr*	nataddrexist(uchar*, uchar*, Iplifc*);
-int		addnataddr(uchar*, uchar*, Iplifc*);
-int		removenataddr(uchar*, uchar*, Iplifc*);
-void		shownataddr(void);
-void		flushnataddr(void);
-
-/*
- * Return protocol attributes if known.
- */
-NatProto*
-parseproto(uchar proto)
-{
-	NatProto *np;
-
-	for(np = prototab; np->proto; np++)
-		if(proto == np->proto)
-			return np;
-
-	return nil;
-}
-
-/*
- * Output NAT.
- * Return -1 if the packet must be NATed but the protocol is unknown.
- */
-int
-nato(Block *b, Ipifc *ifc, Fs *f)
-{
-	Nat *n;		/* NAT table */
-	NatProto *np;	/* Protocol list */
-	Iplifc *lifc;	/* Logical interface */
-	Ip4hdr *h;	/* Source IPv4 header */
-	Proto *p;	/* New protocol */
-	uchar *laddr;	/* Local address on Iplifc */
-	uchar *sport;	/* Source port */
-	uchar *cksum;	/* Source checksum */
-
-	h = (Ip4hdr*)(b->rp);
-
-	/* Verify on which logical interface NAT is enabled,
-           and if this source address must be translated */
-	if((lifc=natonifco(ifc, h)) == nil)
-		return 0;
-
-	laddr = lifc->local+IPv4off;
-	p = Fsrcvpcolx(f, h->proto);
-
-	if(ip4cmp(h->src, laddr) != 0){
-		if((np=parseproto(h->proto)) != nil){
-			/* Protocol layer */
-			sport = (b->rp)+sizeof(Ip4hdr)+np->sport;
-			cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
-			if((n = natlport(p, h, sport)) == nil)
-				return -1;
-			memmove(sport, n->lport, 2);
-			checksumadjust(cksum, n->sport, 2, n->lport, 2);
-			if(np->proto != 1)
-				/* ICMP checksum doesn't include IP header */
-				checksumadjust(cksum, n->src, IPv4addrlen,
-					laddr, IPv4addrlen);
-			/* IP layer */
-			ip4move(h->src, laddr);
-			checksumadjust(h->cksum, n->src, IPv4addrlen,
-				h->src, IPv4addrlen);
-			return 0;
-		}else{
-			netlog(f, Lognat, "nat: unknown protocol %d\n", h->proto);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Input NAT.
- */
-void
-nati(Block *b, Ipifc *ifc)
-{
-	Nat *n;		/* NAT table */
-	NatProto *np;	/* Protocol list */
-	Ip4hdr *h;	/* Source IPv4 header */
-	uchar *lport;	/* Our local port, and dst port for the packet */
-	uchar *cksum;	/* Source checksum */
-
-	h = (Ip4hdr*)(b->rp);
-
-	/* Verify if NAT is enabled on this interface */
-	if(natonifci(ifc) == nil)
-		return;
-
-	if((np=parseproto(h->proto)) != nil){
-		lport = (b->rp)+sizeof(Ip4hdr)+np->dport;
-		if((n=natexistin(h->proto, lport)) != nil){
-			/* Protocol layer */
-			cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
-			checksumadjust(cksum, lport, 2, n->sport, 2);
-			memmove(lport, n->sport, 2);
-			if(np->proto != 1)
-				/* ICMP checksum doesn't include IP header */
-		   		checksumadjust(cksum, h->dst, IPv4addrlen,
-					n->src, IPv4addrlen);
-			/* IP layer */
-			checksumadjust(h->cksum, h->dst, IPv4addrlen,
-				n->src, IPv4addrlen);
-			ip4move(h->dst, n->src);
-		}
-	}
-}
-
-/*
- * Add Nat to Nat list.
- */
-void
-natprepend(Nat *n)
-{
-	n->next = head;
-	head = n;
-}
-
-/*
- * Return Nat if it exists in Nat list.
- */
-Nat*
-natexistout(uchar *src, uchar proto, uchar *sport)
-{
-	Nat *c;		/* Current node */
-
-	for(c=head; c!=nil; c=c->next)
-		if(ip4cmp(src, c->src) == 0 &&
-			memcmp(sport, c->sport, 2) == 0 &&
-			proto == c->proto){
-			c->time = NOW;
-			return c;
-		}
-
-	return nil;
-}
-
-/*
- * Return Nat if it exists in Nat list.
- */
-Nat*
-natexistin(uchar proto, uchar *lport)
-{
-	Nat *c;		/* Current node */
-
-	for(c=head; c!=nil; c=c->next)
-		if(memcmp(lport, c->lport, 2) == 0 &&
-			proto == c->proto){
-			c->time = NOW;
-			return c;
-		}
-
-	return nil;
-}
-
-/*
- * Delete Nat in Nat list.
- * Return -1 if it doesn't exist.
- */
-int
-natdelete(uchar src[IPv4addrlen], uchar proto, uchar sport[2])
-{
-	Nat *p;		/* Precedent node */
-	Nat *c;		/* Current node */
-
-	for(p=nil, c=head; c!=nil; p=c, c=c->next)
-		if(ip4cmp(src, c->src) == 0 &&
-			memcmp(sport, c->sport, 2) == 0 &&
-			proto == c->proto)
-			break;
-
-	if(c == nil)
-		return -1;
-
-	if(p == nil)
-		head = head->next;
-	else
-		p->next = c->next;
-
-	closeconv(c->conv);
-	free(c);
-
-	return 0;
-}
-
-/*
- * Purge Nat list.
- */
-int
-natpurge(uchar proto)
-{
-	Nat *c;		/* Current node */
-	int n;		/* Number of purged connections */
-
-	for(n = 0;; n++){
-		do{
-			if((c = head) == nil)
-				return n;
-			head = head->next;
-		}while(c->proto != proto);
-		closeconv(c->conv);
-		free(c);
-	}
-}
-
-/*
- * Create a new Nat if necessary.
- */
-Nat*
-natlport(Proto *p, Ip4hdr *h, uchar *sport)
-{
-	Nat *n;		/* New NAT node */
-	Conv *s;	/* New conversation */
-
-	if((n=natexistout(h->src, h->proto, sport)) == nil){
-		qlock(p);
-		s = Fsprotoclone(p, "network");
-		qunlock(p);
-		if(s == nil){
-			error(Enodev);
-			return nil;
-		}
-		setlport(s);
-		n = malloc(sizeof(Nat));
-		ip4move(n->src, h->src);
-		memmove(n->sport, sport, 2);
-		memmove(n->lport, &s->lport, 2);
-		n->proto = h->proto;
-		n->time = NOW;
-		n->conv = s;
-		natprepend(n);
-	}
-
-	return n;
-}
-
-/*
- * Nat list garbage collector.
- */
-int
-natgc(uchar proto){
-	Nat *p;		/* Precedent node */
-	Nat *c;		/* Current node */
-	NatProto *np;	/* Protocol list */
-	int n;		/* Number of garbage collected connections */
-
-	n = 0;
-	p = nil;
-	c = head;
-
-	np = parseproto(proto);
-
-	while(c != nil){
-		if(NOW - c->time > np->timeout){
- 			if(p == nil){
- 				head = head->next;
-				if(proto == c->proto)
-					n++;
-				closeconv(c->conv);
-				free(c);
-				p = nil;
-				c = head;
- 			}else{
- 				p->next = c->next;
-				if(proto == c->proto)
-					n++;
-				closeconv(c->conv);
-				free(c);
- 				c = p->next;
- 			}
-		}else{
-			p = c;
-			c = c->next;
-		}
-	}
-
-	if(n == 0)	/* Prevent Conv saturation */
-		n = natpurge(proto);
-
-	return n;
-}
-
-/*
- * Function checksumadjust from RFC 3022.
- */
-void
-checksumadjust(uchar *chksum, uchar *optr, int olen, uchar *nptr, int nlen)
-{
-	long x, old, new;
-
-	x=chksum[0]*256+chksum[1];
-	x=~x & 0xffff;
-	while(olen){
-		old=optr[0]*256+optr[1];
-		optr+=2;
-		x-=old & 0xffff;
-		if(x<=0){
-			x--;
-			x&=0xffff;
-		}
-		olen-=2;
-	}
-	while(nlen){
-		new=nptr[0]*256+nptr[1];
-		nptr+=2;
-		x+=new & 0xffff;
-		if(x & 0x10000){
-			x++;
-			x&=0xffff;
-		}
-		nlen-=2;
-	}
-	x=~x & 0xffff;
-	chksum[0]=x/256;
-	chksum[1]=x & 0xff;
-}
-
-/*
- * Add NatAddr to NatAddr list.
- */
-void
-nataddrprepend(NatAddr *na)
-{
-	na->next = addrhead;
-	addrhead = na;
-}
-
-/*
- * Return NatAddr if it exists in NatAddr list.
- */
-NatAddr*
-nataddrexist(uchar *src, uchar *mask, Iplifc *dst)
-{
-	NatAddr *c;	/* Current node */
-
-	for(c=addrhead; c!=nil; c=c->next)
-		if(ipcmp(src, c->src) == 0 &&
-			ipcmp(mask, c->mask) == 0 &&
-			dst == c->dst)
-			return c;
-
-	return nil;
-}
-
-/*
- * Create a new NatAddr.
- * Return -1 if it already exist.
- */
-int
-addnataddr(uchar *src, uchar *mask, Iplifc *dst)
-{
-	NatAddr *na;		/* New address node */
-	uchar net[IPaddrlen];	/* Network address */
-
-	maskip(src, mask, net);
-
-	if(nataddrexist(src, mask, dst) != nil)
-		return -1;
-
-	na = malloc(sizeof(NatAddr));
-	ipmove(na->src, src);
-	ipmove(na->mask, mask);
-	ipmove(na->net, net);
-	na->dst = dst;
-
-	nataddrprepend(na);
-
-	return 0;
-}
-
-/*
- * Remove a NatAddr.
- * Return -1 if it doesn't exist.
- */
-int
-removenataddr(uchar *src, uchar *mask, Iplifc *dst)
-{
-	NatAddr *c;	/* Current node */
-	NatAddr *p;	/* Precedent node */
-
-	for(p=nil, c=addrhead; c!=nil; p=c, c=c->next)
-		if(ipcmp(src, c->src) == 0 &&
-			ipcmp(mask, c->mask) == 0 &&
-			dst == c->dst)
-			break;
-
-	if(c == nil)
-		return -1;
-
-	if(p == nil)
-		addrhead = addrhead->next;
-	else
-		p->next = c->next;
-
-	return 0;
-}
-
-/*
- * Display NatAddr list.
- */
-void
-shownataddr(void)
-{
-	NatAddr *c;	/* Current node */
-
-	for(c=addrhead; c!=nil; c=c->next)
-		print("%I %V %I\n", c->src, c->mask+IPv4off, c->dst->local);
-}
-
-/*
- * Flush NatAddr list.
- */
-void
-flushnataddr(void)
-{
-	NatAddr *c;	/* Current node */
-
-	while((c=addrhead) != nil){
-		addrhead = addrhead->next;
-		free(c);
-	}
-}
-
-/*
- * Return logical interface if NAT is enabled on this interface,
- * and the source address must be translated.
- */
-Iplifc*
-natonifco(Ipifc *ifc, Ip4hdr* h)
-{
-	NatAddr *na;		/* Address list */
-	Iplifc *lifc;		/* Logical interface */
-	uchar src[IPaddrlen];	/* Source address */
-	uchar net[IPaddrlen];	/* Source network address */
-
-	for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
-		for(na=addrhead; na; na=na->next)
-			if(lifc == na->dst){
-				/* NAT enabled on this logical interface */
-				v4tov6(src, h->src);
-				maskip(src, na->mask, net);
-				if(ipcmp(net, na->net) == 0)
-					/* Source address must be translated */
-					return lifc;
-			}
-
-	return nil;
-}
-
-/*
- * Return logical interface if NAT is enabled on this interface.
- */
-Iplifc*
-natonifci(Ipifc *ifc)
-{
-	NatAddr *na;		/* Address list */
-	Iplifc *lifc;		/* Logical interface */
-
-	for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
-		for(na=addrhead; na; na=na->next)
-			if(lifc == na->dst){
-				/* NAT enabled on this logical interface */
-				return lifc;
-			}
-
-	return nil;
-}
--- a/os/ip/netdevmedium.c
+++ b/os/ip/netdevmedium.c
@@ -49,12 +49,13 @@
 	mchan = namec(argv[2], Aopen, ORDWR, 0);
 
 	er = smalloc(sizeof(*er));
+	er->readp = (void*)-1;
 	er->mchan = mchan;
 	er->f = ifc->conv->p->f;
 
 	ifc->arg = er;
 
-	kproc("netdevread", netdevread, ifc, 0);
+	kproc("netdevread", netdevread, ifc);
 }
 
 /*
@@ -65,13 +66,29 @@
 {
 	Netdevrock *er = ifc->arg;
 
+	while(waserror())
+		;
+
+	/* wait for reader to start */
+	while(er->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
 	if(er->readp != nil)
 		postnote(er->readp, 1, "unbind", 0);
 
-	/* wait for readers to die */
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
+	/* wait for reader to die */
 	while(er->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan != nil)
 		cclose(er->mchan);
 
@@ -86,8 +103,6 @@
 {
 	Netdevrock *er = ifc->arg;
 
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 
@@ -104,34 +119,22 @@
 	Ipifc *ifc;
 	Block *bp;
 	Netdevrock *er;
-	char *argv[1];
 
 	ifc = a;
 	er = ifc->arg;
 	er->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->readp = nil;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
 		if(bp == nil){
-			/*
-			 * get here if mchan is a pipe and other side hangs up
-			 * clean up this interface & get out
-ZZZ is this a good idea?
-			 */
 			poperror();
-			er->readp = nil;
-			argv[0] = "unbind";
-			if(!waserror())
+			if(!waserror()){
+				static char *argv[]  = { "unbind" };
 				ifc->conv->p->ctl(ifc->conv, argv, 1);
-			pexit("hangup", 1);
+			}
+			break;
 		}
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
@@ -144,6 +147,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	er->readp = nil;
+	pexit("hangup", 1);
 }
 
 void
--- a/os/ip/netlog.c
+++ b/os/ip/netlog.c
@@ -7,7 +7,7 @@
 #include	"../ip/ip.h"
 
 enum {
-	Nlog		= 4*1024,
+	Nlog		= 16*1024,
 };
 
 /*
@@ -39,12 +39,12 @@
 	{ "ppp",	Logppp, },
 	{ "ip",		Logip, },
 	{ "fs",		Logfs, },
-	{ "tcp",	Logtcp, },
 	{ "il",		Logil, },
+	{ "tcp",	Logtcp, },
 	{ "icmp",	Logicmp, },
 	{ "udp",	Logudp, },
 	{ "compress",	Logcompress, },
-	{ "ilmsg",	Logil|Logilmsg, },
+	{ "logilmsg",	Logilmsg, },
 	{ "gre",	Loggre, },
 	{ "tcpwin",	Logtcp|Logtcpwin, },
 	{ "tcprxmt",	Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
 		nexterror();
 	}
 	if(f->alog->opens == 0){
-		if(f->alog->buf == nil)
+		if(f->alog->buf == nil){
 			f->alog->buf = malloc(Nlog);
+			if(f->alog->buf == nil)
+				error(Enomem);
+		}
 		f->alog->rptr = f->alog->buf;
 		f->alog->end = f->alog->buf + Nlog;
 	}
@@ -202,6 +205,7 @@
 		else
 			f->alog->iponlyset = 1;
 		free(cb);
+		poperror();
 		return;
 
 	default:
@@ -227,7 +231,7 @@
 void
 netlog(Fs *f, int mask, char *fmt, ...)
 {
-	char buf[128], *t, *fp;
+	char buf[256], *t, *fp;
 	int i, n;
 	va_list arg;
 
--- a/os/ip/nullmedium.c
+++ b/os/ip/nullmedium.c
@@ -19,8 +19,9 @@
 }
 
 static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
 {
+	freeb(bp);
 	error("nullbwrite");
 }
 
--- a/os/ip/pktmedium.c
+++ b/os/ip/pktmedium.c
@@ -16,10 +16,10 @@
 Medium pktmedium =
 {
 .name=		"pkt",
-.hsize=		14,
-.mintu=		40,
+.hsize=		0,
+.mintu=		0,
 .maxtu=		4*1024,
-.maclen=	6,
+.maclen=	0,
 .bind=		pktbind,
 .unbind=	pktunbind,
 .bwrite=	pktbwrite,
@@ -28,12 +28,13 @@
 };
 
 /*
- *  called to bind an IP ifc to an ethernet device
+ *  called to bind an IP ifc to an packet device
  *  called with ifc wlock'd
  */
 static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
 {
+	USED(argc, argv);
 }
 
 /*
@@ -51,7 +52,6 @@
 pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
 {
 	/* enqueue onto the conversation's rq */
-	bp = concatblock(bp);
 	if(ifc->conv->snoopers.ref > 0)
 		qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
 	qpass(ifc->conv->rq, bp);
--- a/os/ip/plan9.c
+++ /dev/null
@@ -1,36 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-#include	"ip.h"
-
-/*
- *  some hacks for commonality twixt inferno and plan9
- */
-
-char*
-commonuser(void)
-{
-	return up->env->user;
-}
-
-Chan*
-commonfdtochan(int fd, int mode, int a, int b)
-{
-	return fdtochan(up->env->fgrp, fd, mode, a, b);
-}
-
-char*
-commonerror(void)
-{
-	return up->env->errstr;
-}
-
-int
-postnote(Proc *p, int, char *, int)
-{
-	swiproc(p, 0);
-	return 0;
-}
--- a/os/ip/ppp.c
+++ /dev/null
@@ -1,1656 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-#include	<libcrypt.h>
-#include	<kernel.h>
-#include	"ip.h"
-#include	"ppp.h"
-
-int	nocompress;
-Ipaddr	pppdns[2];
-
-/*
- * Calculate FCS - rfc 1331
- */
-ushort fcstab[256] =
-{
-      0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
-      0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
-      0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
-      0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
-      0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
-      0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
-      0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
-      0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
-      0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
-      0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
-      0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
-      0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
-      0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
-      0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
-      0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
-      0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
-      0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
-      0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
-      0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
-      0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
-      0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
-      0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
-      0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
-      0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
-      0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
-      0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
-      0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
-      0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
-      0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
-      0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
-      0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
-      0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
-};
-
-static char *snames[] =
-{
-	"Sclosed",
-	"Sclosing",
-	"Sreqsent",
-	"Sackrcvd",
-	"Sacksent",
-	"Sopened",
-};
-
-static void	init(PPP*);
-static void	setphase(PPP*, int);
-static void	pinit(PPP*, Pstate*);
-static void	ppptimer(void*);
-static void	ptimer(PPP*, Pstate*);
-static int	getframe(PPP*, Block**);
-static Block*	putframe(PPP*, int, Block*);
-static uchar*	escapebyte(PPP*, ulong, uchar*, ushort*);
-static void	config(PPP*, Pstate*, int);
-static int	getopts(PPP*, Pstate*, Block*);
-static void	rejopts(PPP*, Pstate*, Block*, int);
-static void	newstate(PPP*, Pstate*, int);
-static void	rcv(PPP*, Pstate*, Block*);
-static void	getchap(PPP*, Block*);
-static void	getpap(PPP*, Block*);
-static void	sendpap(PPP*);
-static void	getlqm(PPP*, Block*);
-static void	putlqm(PPP*);
-static void	hangup(PPP*);
-static void	remove(PPP*);
-
-static	int		validv4(Ipaddr);
-static	void		invalidate(Ipaddr);
-static	void		ipconnect(PPP *);
-static	void		setdefroute(PPP *, Ipaddr);
-static	void		printopts(PPP *, Pstate*, Block*, int);
-static	void		sendtermreq(PPP*, Pstate*);
-
-static void
-errlog(PPP *ppp, char *err)
-{
-	int n;
-	char msg[64];
-
-	n = snprint(msg, sizeof(msg), "%s\n", err);
-	qproduce(ppp->ifc->conv->eq, msg, n);
-}
-
-static void
-init(PPP* ppp)
-{
-	if(ppp->inbuf == nil){
-		ppp->inbuf = allocb(4096);
-		ppp->outbuf = allocb(4096);
-
-		ppp->lcp = malloc(sizeof(Pstate));
-		ppp->ipcp = malloc(sizeof(Pstate));
-		if(ppp->lcp == nil || ppp->ipcp == nil)
-			error("ppp init: malloc");
-
-		ppp->lcp->proto = Plcp;
-		ppp->lcp->state = Sclosed;
-		ppp->ipcp->proto = Pipcp;
-		ppp->ipcp->state = Sclosed;
-
-		kproc("ppptimer", ppptimer, ppp, KPDUPPG|KPDUPFDG);
-	}
-
-	pinit(ppp, ppp->lcp);
-	setphase(ppp, Plink);
-}
-
-static void
-setphase(PPP *ppp, int phase)
-{
-	int oldphase;
-
-	oldphase = ppp->phase;
-
-	ppp->phase = phase;
-	switch(phase){
-	default:
-		panic("ppp: unknown phase %d", phase);
-	case Pdead:
-		/* restart or exit? */
-		pinit(ppp, ppp->lcp);
-		setphase(ppp, Plink);
-		break;
-	case Plink:
-		/* link down */
-		switch(oldphase) {
-		case Pnet:
-			newstate(ppp, ppp->ipcp, Sclosed);
-		}
-		break;
-	case Pauth:
-		if(ppp->usepap)
-			sendpap(ppp);
-		else if(!ppp->usechap)
-			setphase(ppp, Pnet);
-		break;
-	case Pnet:
-		pinit(ppp, ppp->ipcp);
-		break;
-	case Pterm:
-		/* what? */
-		break;
-	}
-}
-
-static void
-pinit(PPP *ppp, Pstate *p)
-{
-	p->timeout = 0;
-
-	switch(p->proto){
-	case Plcp:
-		ppp->magic = TK2MS(MACHP(0)->ticks);
-		ppp->xctlmap = 0xffffffff;
-		ppp->period = 0;
-		p->optmask = 0xffffffff;
-		ppp->rctlmap = 0;
-		ppp->ipcp->state = Sclosed;
-		ppp->ipcp->optmask = 0xffffffff;
-
-		/* quality goo */
-		ppp->timeout = 0;
-		memset(&ppp->in, 0, sizeof(ppp->in));
-		memset(&ppp->out, 0, sizeof(ppp->out));
-		memset(&ppp->pin, 0, sizeof(ppp->pin));
-		memset(&ppp->pout, 0, sizeof(ppp->pout));
-		memset(&ppp->sin, 0, sizeof(ppp->sin));
-		break;
-	case Pipcp:
-		if(ppp->localfrozen == 0)
-			invalidate(ppp->local);
-		if(ppp->remotefrozen == 0)
-			invalidate(ppp->remote);
-		p->optmask = 0xffffffff;
-		ppp->ctcp = compress_init(ppp->ctcp);
-		ppp->usedns = 3;
-		invalidate(ppp->dns1);
-		invalidate(ppp->dns2);
-		break;
-	}
-	p->confid = p->rcvdconfid = -1;
-	config(ppp, p, 1);
-	newstate(ppp, p, Sreqsent);
-}
-
-/*
- *  change protocol to a new state.
- */
-static void
-newstate(PPP *ppp, Pstate *p, int state)
-{
-	netlog(ppp->f, Logppp, "%ux %ux %s->%s ctlmap %lux/%lux flags %ux mtu %d mru %d\n", ppp, p->proto,
-		snames[p->state], snames[state], ppp->rctlmap, ppp->xctlmap, p->flags,
-		ppp->mtu, ppp->mru);
-
-	if(p->proto == Plcp) {
-		if(state == Sopened)
-			setphase(ppp, Pauth);
-		else if(state == Sclosed)
-			setphase(ppp, Pdead);
-		else if(p->state == Sopened)
-			setphase(ppp, Plink);
-	}
-
-	if(p->proto == Pipcp && state == Sopened && validv4(ppp->local) && validv4(ppp->remote)){
-		netlog(ppp->f, Logppp, "pppnewstate: local %I remote %I\n", ppp->local, ppp->remote);
-		ipmove(pppdns[0], ppp->dns1);
-		ipmove(pppdns[1], ppp->dns2);
-		ipconnect(ppp);
-		/* if this is the only network, set up a default route */
-//		if(ppp->ifc->link==nil)		/* how??? */
-			setdefroute(ppp, ppp->remote);
-		errlog(ppp, Enoerror);
-	}
-
-	p->state = state;
-}
-
-static void
-remove(PPP *ppp)
-{
-	free(ppp->ipcp);
-	ppp->ipcp = 0;
-	free(ppp->ctcp);
-	ppp->ctcp = 0;
-	free(ppp->lcp);
-	ppp->lcp = 0;
-	if (ppp->inbuf) {
-		freeb(ppp->inbuf);
-		ppp->inbuf = nil;
-	}
-	if (ppp->outbuf) {
-		freeb(ppp->outbuf);
-		ppp->outbuf = nil;
-	}
-	free(ppp);
-}
-
-void
-pppclose(PPP *ppp)
-{
-	hangup(ppp);
-	remove(ppp);
-}
-
-static void
-dumpblock(Block *b)
-{
-	char x[256];
-	int i;
-
-	for(i = 0; i < (sizeof(x)-1)/3 && b->rp+i < b->wp; i++)
-		sprint(&x[3*i], "%2.2ux ", b->rp[i]);
-	print("%s\n", x);
-}
-
-/* returns (protocol, information) */
-static int
-getframe(PPP *ppp, Block **info)
-{
-	uchar *p, *from, *to;
-	int n, len, proto;
-	ulong c;
-	ushort fcs;
-	Block *buf, *b;
-
-	buf = ppp->inbuf;
-	for(;;){
-		/* read till we hit a frame byte or run out of room */
-		for(p = buf->rp; buf->wp < buf->lim;){
-			for(; p < buf->wp; p++)
-				if(*p == HDLC_frame)
-					goto break2;
-
-			len = buf->lim - buf->wp;
-			n = 0;
-			if(ppp->dchan != nil)
-				n = kchanio(ppp->dchan, buf->wp, len, OREAD);
-				netlog(ppp->f, Logppp, "ppp kchanio %d bytes\n", n);
-			if(n <= 0){
-				buf->wp = buf->rp;
-//				if(n < 0)
-//					print("ppp kchanio(%s) returned %d: %r",
-//						ppp->dchan->path->elem, n);
-				*info = nil;
-				return 0;
-			}
-			buf->wp += n;
-		}
-break2:
-
-		/* copy into block, undoing escapes, and caculating fcs */
-		fcs = PPP_initfcs;
-		b = allocb(p - buf->rp);
-		to = b->wp;
-		for(from = buf->rp; from != p;){
-			c = *from++;
-			if(c == HDLC_esc){
-				if(from == p)
-					break;
-				c = *from++ ^ 0x20;
-			} else if((c < 0x20) && (ppp->rctlmap & (1 << c)))
-				continue;
-			*to++ = c;
-			fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
-		}
-
-		/* copy down what's left in buffer */
-		p++;
-		memmove(buf->rp, p, buf->wp - p);
-		n = p - buf->rp;
-		buf->wp -= n;
-		b->wp = to - 2;
-
-		/* return to caller if checksum matches */
-		if(fcs == PPP_goodfcs){
-			if(b->rp[0] == PPP_addr && b->rp[1] == PPP_ctl)
-				b->rp += 2;
-			proto = *b->rp++;
-			if((proto & 0x1) == 0)
-				proto = (proto<<8) | *b->rp++;
-			if(b->rp < b->wp){
-				ppp->in.bytes += n;
-				ppp->in.packets++;
-				*info = b;
-				return proto;
-			}
-		} else if(BLEN(b) > 0){
-			ppp->ifc->inerr++;
-			ppp->in.discards++;
-			netlog(ppp->f, Logppp, "len %d/%d cksum %ux (%ux %ux %ux %ux)\n",
-				BLEN(b), BLEN(buf), fcs, b->rp[0],
-				b->rp[1], b->rp[2], b->rp[3]);
-		}
-
-		freeblist(b);
-	}
-	*info = nil;
-	return 0;
-}
-
-/* send a PPP frame */
-static Block *
-putframe(PPP *ppp, int proto, Block *b)
-{
-	Block *buf;
-	uchar *to, *from;
-	ushort fcs;
-	ulong ctlmap;
-	int c;
-	Block *bp;
-
-	if(ppp->dchan == nil){
-		netlog(ppp->f, Logppp, "putframe: dchan down\n");
-		errlog(ppp, Ehungup);
-		return b;
-	}
-	netlog(ppp->f, Logppp, "putframe %ux %d %d (%d bytes)\n", proto, b->rp[0], b->rp[1], BLEN(b));
-
-	ppp->out.packets++;
-
-	if(proto == Plcp)
-		ctlmap = 0xffffffff;
-	else
-		ctlmap = ppp->xctlmap;
-
-	/* make sure we have head room */
-	if(b->rp - b->base < 4){
-		b = padblock(b, 4);
-		b->rp += 4;
-	}
-
-	/* add in the protocol and address, we'd better have left room */
-	from = b->rp;
-	*--from = proto;
-	if(!(ppp->lcp->flags&Fpc) || proto > 0x100 || proto == Plcp)
-		*--from = proto>>8;
-	if(!(ppp->lcp->flags&Fac) || proto == Plcp){
-		*--from = PPP_ctl;
-		*--from = PPP_addr;
-	}
-
-	qlock(&ppp->outlock);
-	buf = ppp->outbuf;
-
-	/* escape and checksum the body */
-	fcs = PPP_initfcs;
-	to = buf->rp;
-
-	*to++ = HDLC_frame;
-
-	for(bp = b; bp; bp = bp->next){
-		if(bp != b)
-			from = bp->rp;
-		for(; from < bp->wp; from++){
-			c = *from;
-			if(c == HDLC_frame || c == HDLC_esc
-			   || (c < 0x20 && ((1<<c) & ctlmap))){
-				*to++ = HDLC_esc;
-				*to++ = c ^ 0x20;
-			} else 
-				*to++ = c;
-			fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
-		}
-	}
-
-	/* add on and escape the checksum */
-	fcs = ~fcs;
-	c = fcs;
-	if(c == HDLC_frame || c == HDLC_esc
-	   || (c < 0x20 && ((1<<c) & ctlmap))){
-		*to++ = HDLC_esc;
-		*to++ = c ^ 0x20;
-	} else 
-		*to++ = c;
-	c = fcs>>8;
-	if(c == HDLC_frame || c == HDLC_esc
-	   || (c < 0x20 && ((1<<c) & ctlmap))){
-		*to++ = HDLC_esc;
-		*to++ = c ^ 0x20;
-	} else 
-		*to++ = c;
-
-	/* add frame marker and send */
-	*to++ = HDLC_frame;
-	buf->wp = to;
-	if(ppp->dchan == nil){
-		netlog(ppp->f, Logppp, "putframe: dchan down\n");
-		errlog(ppp, Ehungup);
-	}else{
-		kchanio(ppp->dchan, buf->rp, BLEN(buf), OWRITE);
-		ppp->out.bytes += BLEN(buf);
-	}
-
-	qunlock(&ppp->outlock);
-	return b;
-}
-
-#define IPB2LCP(b) ((Lcpmsg*)((b)->wp-4))
-
-static Block*
-alloclcp(int code, int id, int len)
-{
-	Block *b;
-	Lcpmsg *m;
-
-	/*
-	 *  leave room for header
-	 */
-	b = allocb(len);
-
-	m = (Lcpmsg*)b->wp;
-	m->code = code;
-	m->id = id;
-	b->wp += 4;
-
-	return b;
-}
-
-static void
-putao(Block *b, int type, int aproto, int alg)
-{
-	*b->wp++ = type;
-	*b->wp++ = 5;
-	hnputs(b->wp, aproto);
-	b->wp += 2;
-	*b->wp++ = alg;
-}
-
-static void
-putlo(Block *b, int type, ulong val)
-{
-	*b->wp++ = type;
-	*b->wp++ = 6;
-	hnputl(b->wp, val);
-	b->wp += 4;
-}
-
-static void
-putv4o(Block *b, int type, Ipaddr val)
-{
-	*b->wp++ = type;
-	*b->wp++ = 6;
-	if(v6tov4(b->wp, val) < 0){
-		/*panic("putv4o")*/;
-	}
-	b->wp += 4;
-}
-
-static void
-putso(Block *b, int type, ulong val)
-{
-	*b->wp++ = type;
-	*b->wp++ = 4;
-	hnputs(b->wp, val);
-	b->wp += 2;
-}
-
-static void
-puto(Block *b, int type)
-{
-	*b->wp++ = type;
-	*b->wp++ = 2;
-}
-
-/*
- *  send configuration request
- */
-static void
-config(PPP *ppp, Pstate *p, int newid)
-{
-	Block *b;
-	Lcpmsg *m;
-	int id;
-
-	if(newid){
-		id = ++(p->id);
-		p->confid = id;
-		p->timeout = Timeout;
-	} else
-		id = p->confid;
-	b = alloclcp(Lconfreq, id, 256);
-	m = IPB2LCP(b);
-	USED(m);
-
-	switch(p->proto){
-	case Plcp:
-		if(p->optmask & Fmagic)
-			putlo(b, Omagic, ppp->magic);
-		if(p->optmask & Fmtu)
-			putso(b, Omtu, ppp->mru);
-		if(p->optmask & Fac)
-			puto(b, Oac);
-		if(p->optmask & Fpc)
-			puto(b, Opc);
-		if(p->optmask & Fctlmap)
-			putlo(b, Octlmap, 0);	/* we don't want anything escaped */
-		break;
-	case Pipcp:
-		if((p->optmask & Fipaddr) /*&& validv4(ppp->local)*/)
-			putv4o(b, Oipaddr, ppp->local);
-		if(!nocompress && (p->optmask & Fipcompress)){
-			*b->wp++ = Oipcompress;
-			*b->wp++ = 6;
-			hnputs(b->wp, Pvjctcp);
-			b->wp += 2;
-			*b->wp++ = MAX_STATES-1;
-			*b->wp++ = 1;
-		}
-		if(ppp->usedns & 1)
-			putlo(b, Oipdns, 0);
-		if(ppp->usedns & 2)
-			putlo(b, Oipdns2, 0);
-		break;
-	}
-
-	hnputs(m->len, BLEN(b));
-	b = putframe(ppp, p->proto, b);
-	freeblist(b);
-}
-
-/*
- *  parse configuration request, sends an ack or reject packet
- *
- *	returns:	-1 if request was syntacticly incorrect
- *			 0 if packet was accepted
- *			 1 if packet was rejected
- */
-static int
-getopts(PPP *ppp, Pstate *p, Block *b)
-{
-	Lcpmsg *m, *repm;	
-	Lcpopt *o;
-	uchar *cp;
-	ulong rejecting, nacking, flags, proto;
-	ulong mtu, ctlmap, period;
-	ulong x;
-	Block *repb;
-	Ipaddr ipaddr;
-
-	rejecting = 0;
-	nacking = 0;
-	flags = 0;
-
-	/* defaults */
-	invalidate(ipaddr);
-	mtu = ppp->mtu;
-
-	ctlmap = 0xffffffff;
-	period = 0;
-
-	m = (Lcpmsg*)b->rp;
-	repb = alloclcp(Lconfack, m->id, BLEN(b));
-	repm = IPB2LCP(repb);
-
-	/* copy options into ack packet */
-	memmove(repm->data, m->data, b->wp - m->data);
-	repb->wp += b->wp - m->data;
-
-	/* look for options we don't recognize or like */
-	for(cp = m->data; cp < b->wp; cp += o->len){
-		o = (Lcpopt*)cp;
-		if(cp + o->len > b->wp || o->len == 0){
-			freeblist(repb);
-			netlog(ppp->f, Logppp, "ppp %s: bad option length %ux\n", ppp->ifc->dev,
-				o->type);
-			return -1;
-		}
-
-		switch(p->proto){
-		case Plcp:
-			switch(o->type){
-			case Oac:
-				flags |= Fac;
-				continue;
-			case Opc:
-				flags |= Fpc;
-				continue;
-			case Omtu:
-				mtu = nhgets(o->data);
-				if(mtu < ppp->ifc->m->mintu){
-					netlog(ppp->f, Logppp, "bogus mtu %d\n", mtu);
-					mtu = ppp->ifc->m->mintu;
-				}
-				continue;
-			case Omagic:
-				if(ppp->magic == nhgetl(o->data))
-					netlog(ppp->f, Logppp, "ppp: possible loop\n");
-				continue;
-			case Octlmap:
-				ctlmap = nhgetl(o->data);
-				continue;
-			case Oquality:
-				proto = nhgets(o->data);
-				if(proto != Plqm)
-					break;
-				x = nhgetl(o->data+2)*10;
-				period = (x+Period-1)/Period;
-				continue;
-			case Oauth:
-				proto = nhgets(o->data);
-				if(proto == Ppap && ppp->chapname[0] && ppp->secret[0]){
-					ppp->usepap = 1;
-					netlog(ppp->f, Logppp, "PPP %s: select PAP\n", ppp->ifc->dev);
-					continue;
-				}
-				if(proto != Pchap || o->data[2] != APmd5){
-					if(!nacking){
-						nacking = 1;
-						repb->wp = repm->data;
-						repm->code = Lconfnak;
-					}
-					putao(repb, Oauth, Pchap, APmd5);
-				}
-				else
-					ppp->usechap = 1;
-				ppp->usepap = 0;
-				continue;
-			}
-			break;
-		case Pipcp:
-			switch(o->type){
-			case Oipaddr:	
-				v4tov6(ipaddr, o->data);
-				if(!validv4(ppp->remote))
-					continue;
-				if(!validv4(ipaddr) && !rejecting){
-					/* other side requesting an address */
-					if(!nacking){
-						nacking = 1;
-						repb->wp = repm->data;
-						repm->code = Lconfnak;
-					}
-					putv4o(repb, Oipaddr, ppp->remote);
-				}
-				continue;
-			case Oipcompress:
-				proto = nhgets(o->data);
-				if(nocompress || proto != Pvjctcp || compress_negotiate(ppp->ctcp, o->data+2) < 0)
-					break;
-				flags |= Fipcompress;
-				continue;
-			}
-			break;
-		}
-
-		/* come here if option is not recognized */
-		if(!rejecting){
-			rejecting = 1;
-			repb->wp = repm->data;
-			repm->code = Lconfrej;
-		}
-		netlog(ppp->f, Logppp, "ppp %s: bad %ux option %d\n", ppp->ifc->dev, p->proto, o->type);
-		memmove(repb->wp, o, o->len);
-		repb->wp += o->len;
-	}
-
-	/* permanent changes only after we know that we liked the packet */
-	if(!rejecting && !nacking){
-		switch(p->proto){
-		case Plcp:
-			netlog(ppp->f, Logppp, "Plcp: mtu: %d %d x:%lux/r:%lux %lux\n", mtu, ppp->mtu, ppp->xctlmap, ppp->rctlmap, ctlmap);
-			ppp->period = period;
-			ppp->xctlmap = ctlmap;
-			if(mtu > Maxmtu)
-				mtu = Maxmtu;
-			if(mtu < Minmtu)
-				mtu = Minmtu;
-			ppp->mtu = mtu;
-			break;
-		case Pipcp:
-			if(validv4(ipaddr) && ppp->remotefrozen == 0)
- 				ipmove(ppp->remote, ipaddr);
-			break;
-		}
-		p->flags = flags;
-	}
-
-	hnputs(repm->len, BLEN(repb));
-	repb = putframe(ppp, p->proto, repb);
-	freeblist(repb);
-
-	return rejecting || nacking;
-}
-
-/*
- *  parse configuration rejection, just stop sending anything that they
- *  don't like (except for ipcp address nak).
- */
-static void
-rejopts(PPP *ppp, Pstate *p, Block *b, int code)
-{
-	Lcpmsg *m;
-	Lcpopt *o;
-
-	/* just give up trying what the other side doesn't like */
-	m = (Lcpmsg*)b->rp;
-	for(b->rp = m->data; b->rp < b->wp; b->rp += o->len){
-		o = (Lcpopt*)b->rp;
-		if(b->rp + o->len > b->wp || o->len == 0){
-			netlog(ppp->f, Logppp, "ppp %s: bad roption length %ux\n", ppp->ifc->dev,
-				o->type);
-			return;
-		}
-
-		if(code == Lconfrej){
-			if(o->type < 8*sizeof(p->optmask))
-				p->optmask &= ~(1<<o->type);
-			if(o->type == Oipdns)
-				ppp->usedns &= ~1;
-			else if(o->type == Oipdns2)
-				ppp->usedns &= ~2;
-			netlog(ppp->f, Logppp, "ppp %s: %ux rejecting %d\n", ppp->ifc->dev, p->proto,
-				o->type);
-			continue;
-		}
-
-		switch(p->proto){
-		case Plcp:
-			switch(o->type){
-			case Octlmap:
-				ppp->rctlmap = nhgetl(o->data);
-				break;
-			default:
-				if(o->type < 8*sizeof(p->optmask))
-					p->optmask &= ~(1<<o->type);
-				break;
-			};
-		case Pipcp:
-			switch(o->type){
-			case Oipaddr:
-				if(!validv4(ppp->local))
-					v4tov6(ppp->local, o->data);
-//				if(o->type < 8*sizeof(p->optmask))
-//					p->optmask &= ~(1<<o->type);
-				break;
-			case Oipdns:
-				if(!validv4(ppp->dns1))
-					v4tov6(ppp->dns1, o->data);
-				ppp->usedns &= ~1;
-				break;
-			case Oipdns2:
-				if(!validv4(ppp->dns2))
-					v4tov6(ppp->dns2, o->data);
-				ppp->usedns &= ~2;
-				break;
-			default:
-				if(o->type < 8*sizeof(p->optmask))
-					p->optmask &= ~(1<<o->type);
-				break;
-			}
-			break;
-		}
-	}
-}
-
-
-/*
- *  put a messages through the lcp or ipcp state machine.  They are
- *  very similar.
- */
-static void
-rcv(PPP *ppp, Pstate *p, Block *b)
-{
-	ulong len;
-	int err;
-	Lcpmsg *m;
-
-	if(BLEN(b) < 4){
-		netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
-		freeblist(b);
-		return;
-	}
-	m = (Lcpmsg*)b->rp;
-	len = nhgets(m->len);
-	if(BLEN(b) < len){
-		netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
-		freeblist(b);
-		return;
-	}
-
-	netlog(ppp->f, Logppp, "ppp: %ux rcv %d len %d id %d/%d/%d\n",
-		p->proto, m->code, len, m->id, p->confid, p->id);
-
-	if(p->proto != Plcp && ppp->lcp->state != Sopened){
-		netlog(ppp->f, Logppp, "ppp: non-lcp with lcp not open\n");
-		freeb(b);
-		return;
-	}
-
-	qlock(ppp);
-	switch(m->code){
-	case Lconfreq:
-		/* flush the output queue */
-		if(p->state == Sopened && p->proto == Plcp)
-			kchanio(ppp->cchan, "f", 1, OWRITE);
-
-		printopts(ppp, p, b, 0);
-		err = getopts(ppp, p, b);
-		if(err < 0)
-			break;
-
-		if(m->id == p->rcvdconfid)
-			break;			/* don't change state for duplicates */
-		p->rcvdconfid = m->id;
-
-		switch(p->state){
-		case Sackrcvd:
-			if(err)
-				break;
-			newstate(ppp, p, Sopened);
-			break;
-		case Sclosed:
-		case Sopened:
-			config(ppp, p, 1);
-			if(err == 0)
-				newstate(ppp, p, Sacksent);
-			else
-				newstate(ppp, p, Sreqsent);
-			break;
-			break;
-		case Sreqsent:
-		case Sacksent:
-			if(err == 0)
-				newstate(ppp, p, Sacksent);
-			else
-				newstate(ppp, p, Sreqsent);
-			break;
-		}
-		break;
-	case Lconfack:
-		if(p->confid != m->id){
-			/* ignore if it isn't the message we're sending */
-			netlog(ppp->f, Logppp, "ppp: dropping confack\n");
-			break;
-		}
-		p->confid = -1;		/* ignore duplicates */
-		p->id++;		/* avoid sending duplicates */
-
-		switch(p->state){
-		case Sopened:
-		case Sackrcvd:
-			config(ppp, p, 1);
-			newstate(ppp, p, Sreqsent);
-			break;
-		case Sreqsent:
-			newstate(ppp, p, Sackrcvd);
-			break;
-		case Sacksent:
-			newstate(ppp, p, Sopened);
-			break;
-		}
-		break;
-	case Lconfrej:
-	case Lconfnak:
-		if(p->confid != m->id) {
-			/* ignore if it isn't the message we're sending */
-			netlog(ppp->f, Logppp, "ppp: dropping confrej or confnak\n");
-			break;
-		}
-		p->confid = -1;		/* ignore duplicates */
-		p->id++;		/* avoid sending duplicates */
-
-		switch(p->state){
-		case Sopened:
-		case Sackrcvd:
-			config(ppp, p, 1);
-			newstate(ppp, p, Sreqsent);
-			break;
-		case Sreqsent:
-		case Sacksent:
-			printopts(ppp, p, b, 0);
-			rejopts(ppp, p, b, m->code);
-			config(ppp, p, 1);
-			break;
-		}
-		break;
-	case Ltermreq:
-		m->code = Ltermack;
-		b = putframe(ppp, p->proto, b);
-
-		switch(p->state){
-		case Sackrcvd:
-		case Sacksent:
-			newstate(ppp, p, Sreqsent);
-			break;
-		case Sopened:
-			newstate(ppp, p, Sclosing);
-			break;
-		}
-		break;
-	case Ltermack:
-		if(p->termid != m->id)	/* ignore if it isn't the message we're sending */
-			break;
-
-		if(p->proto == Plcp)
-			ppp->ipcp->state = Sclosed;
-		switch(p->state){
-		case Sclosing:
-			newstate(ppp, p, Sclosed);
-			break;
-		case Sackrcvd:
-			newstate(ppp, p, Sreqsent);
-			break;
-		case Sopened:
-			config(ppp, p, 0);
-			newstate(ppp, p, Sreqsent);
-			break;
-		}
-		break;
-	case Lcoderej:
-		netlog(ppp->f, Logppp, "ppp %s: code reject %d\n", ppp->ifc->dev, m->data[0]);
-		break;
-	case Lprotorej:
-		netlog(ppp->f, Logppp, "ppp %s: proto reject %lux\n", ppp->ifc->dev, nhgets(m->data));
-		break;
-	case Lechoreq:
-		m->code = Lechoack;
-		b = putframe(ppp, p->proto, b);
-		break;
-	case Lechoack:
-	case Ldiscard:
-		/* nothing to do */
-		break;
-	}
-
-	qunlock(ppp);
-	freeblist(b);
-}
-
-/*
- *  timer for protocol state machine
- */
-static void
-ptimer(PPP *ppp, Pstate *p)
-{
-	if(p->state == Sopened || p->state == Sclosed)
-		return;
-
-	p->timeout--;
-	switch(p->state){
-	case Sclosing:
-		sendtermreq(ppp, p);
-		break;
-	case Sreqsent:
-	case Sacksent:
-		if(p->timeout <= 0){
-			if(p->proto && ppp->cchan != nil)
-				kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
-			newstate(ppp, p, Sclosed);
-		} else {
-			config(ppp, p, 0);
-		}
-		break;
-	case Sackrcvd:
-		if(p->timeout <= 0){
-			if(p->proto && ppp->cchan != nil)
-				kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
-			newstate(ppp, p, Sclosed);
-		}
-		else {
-			config(ppp, p, 0);
-			newstate(ppp, p, Sreqsent);
-		}
-		break;
-	}
-}
-
-/*
- *  timer for ppp
- */
-static void
-ppptimer(void *arg)
-{
-	PPP *ppp;
-
-	ppp = arg;
-	ppp->timep = up;
-	if(waserror()){
-		netlog(ppp->f, Logppp, "ppptimer: %I: %s\n", ppp->local, up->env->errstr);
-		ppp->timep = 0;
-		pexit("hangup", 1);
-	}
-	for(;;){
-		tsleep(&up->sleep, return0, nil, Period);
-		if(ppp->pppup){
-			qlock(ppp);
-
-			ptimer(ppp, ppp->lcp);
-			if(ppp->lcp->state == Sopened)
-				ptimer(ppp, ppp->ipcp);
-
-			if(ppp->period && --(ppp->timeout) <= 0){
-				ppp->timeout = ppp->period;
-				putlqm(ppp);
-			}
-
-			qunlock(ppp);
-		}
-	}
-}
-
-static void
-setdefroute(PPP *ppp, Ipaddr gate)
-{
-	int fd, n;
-	char path[128], msg[128];
-
-	snprint(path, sizeof path, "#I%d/iproute", ppp->f->dev);
-	fd = kopen(path, ORDWR);
-	if(fd < 0)
-		return;
-	n = snprint(msg, sizeof(msg), "add 0 0 %I", gate);
-	kwrite(fd, msg, n);
-	kclose(fd);
-}
-
-static void
-ipconnect(PPP *ppp)
-{
-	int fd, n;
-	char path[128], msg[128];
-
-	snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ppp->ifc->conv->x);
-	fd = kopen(path, ORDWR);
-	if(fd < 0)
-		return;
-	n = snprint(msg, sizeof(msg), "connect %I 255.255.255.255 %I", ppp->local, ppp->remote);
-	if (kwrite(fd, msg, n) != n)
-		print("ppp ipconnect: %s: %r\n", msg);
-	kclose(fd);
-}
-
-PPP*
-pppopen(PPP *ppp, char *dev,
-	Ipaddr ipaddr, Ipaddr remip,
-	int mtu, int framing,
-	char *chapname, char *secret)
-{
-	int fd, cfd;
-	char ctl[Maxpath];
-
-	invalidate(ppp->remote);
-	invalidate(ppp->local);
-	invalidate(ppp->dns1);
-	invalidate(ppp->dns2);
-	ppp->mtu = Defmtu;
-	ppp->mru = mtu;
-	ppp->framing = framing;
-
-	if(remip != nil && validv4(remip)){
-		ipmove(ppp->remote, remip);
-		ppp->remotefrozen = 1;
-	}
-	if(ipaddr != nil && validv4(ipaddr)){
-		ipmove(ppp->local, ipaddr);
-		ppp->localfrozen = 1;
-	}
-
-	/* authentication goo */
-	ppp->secret[0] = 0;
-	if(secret != nil)
-		strncpy(ppp->secret, secret, sizeof(ppp->secret));
-	ppp->chapname[0] = 0;
-	if(chapname != nil)
-		strncpy(ppp->chapname, chapname, sizeof(ppp->chapname));
-
-	if(strchr(dev, '!'))
-		fd = kdial(dev, nil, nil, nil);
-	else
-		fd = kopen(dev, ORDWR);
-	if(fd < 0){
-		netlog(ppp->f, Logppp, "ppp: can't open %s\n", dev);
-		return nil;
-	}
-	ppp->dchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
-	kclose(fd);
-
-	/* set up serial line */
-/* XXX this stuff belongs in application, not driver */
-	sprint(ctl, "%sctl", dev);
-	cfd = kopen(ctl, ORDWR);
-	if(cfd >= 0){
-		ppp->cchan = fdtochan(up->env->fgrp, cfd, ORDWR, 0, 1);
-		kclose(cfd);
-		kchanio(ppp->cchan, "m1", 2, OWRITE);	/* cts/rts flow control/fifo's) on */
-		kchanio(ppp->cchan, "q64000", 6, OWRITE);/* increas q size to 64k */
-		kchanio(ppp->cchan, "n1", 2, OWRITE);	/* nonblocking writes on */
-		kchanio(ppp->cchan, "r1", 2, OWRITE);	/* rts on */
-		kchanio(ppp->cchan, "d1", 2, OWRITE);	/* dtr on */
-	}
-
-	ppp->pppup = 1;
-	init(ppp);
-	return ppp;
-}
-
-static void
-hangup(PPP *ppp)
-{
-	qlock(ppp);
-	if(waserror()){
-		qunlock(ppp);
-		nexterror();
-	}
-	netlog(ppp->f, Logppp, "PPP Hangup\n");
-	errlog(ppp, Ehungup);
-	if(ppp->pppup && ppp->cchan != nil){
-		kchanio(ppp->cchan, "f", 1, OWRITE);	/* flush */
-		kchanio(ppp->cchan, "h", 1, OWRITE);	/* hangup */
-	}
-	cclose(ppp->dchan);
-	cclose(ppp->cchan);
-	ppp->dchan = nil;
-	ppp->cchan = nil;
-	ppp->pppup = 0;
-	qunlock(ppp);
-	poperror();
-}
-
-/* return next input IP packet */
-Block*
-pppread(PPP *ppp)
-{
-	Block *b;
-	int proto;
-	Lcpmsg *m;
-
-	for(;;){
-		proto = getframe(ppp, &b);
-		if(b == nil)
-			return nil;
-		netlog(ppp->f, Logppp, "ppp: read proto %d len %d\n", proto, blocklen(b));
-		switch(proto){
-		case Plcp:
-			rcv(ppp, ppp->lcp, b);
-			break;
-		case Pipcp:
-			rcv(ppp, ppp->ipcp, b);
-			break;
-		case Pip:
-			if(ppp->ipcp->state == Sopened)
-				return b;
-			freeblist(b);
-			break;
-		case Plqm:
-			getlqm(ppp, b);
-			break;
-		case Pchap:
-			getchap(ppp, b);
-			break;
-		case Ppap:
-			getpap(ppp, b);
-			break;
-		case Pvjctcp:
-		case Pvjutcp:
-			if(ppp->ipcp->state == Sopened){
-				b = tcpuncompress(ppp->ctcp, b, proto, ppp->f);
-				if(b != nil)
-					return b;
-			}
-			freeblist(b);
-			break;
-		default:
-			netlog(ppp->f, Logppp, "unknown proto %ux\n", proto);
-			if(ppp->lcp->state == Sopened){
-				/* reject the protocol */
-				b->rp -= 6;
-				m = (Lcpmsg*)b->rp;
-				m->code = Lprotorej;
-				m->id = ++ppp->lcp->id;
-				hnputs(m->data, proto);
-				hnputs(m->len, BLEN(b));
-				b = putframe(ppp, Plcp, b);
-			}
-			freeblist(b);
-			break;
-		}
-	}
-	return nil;		/* compiler confused */
-}
-
-/* transmit an IP packet */
-int
-pppwrite(PPP *ppp, Block *b)
-{
-	ushort proto;
-	int r;
-
-	qlock(ppp);
-
-	/* can't send ip packets till we're established */
-	if(ppp->ipcp->state != Sopened)
-		goto ret;
-
-	/* link hung up */
-	if(ppp->dchan == nil)
-		goto ret;
-
-	b = concatblock(b);		/* or else compression will barf */
-
-	proto = Pip;
-	if(ppp->ipcp->flags & Fipcompress)
-		proto = compress(ppp->ctcp, b, ppp->f);
-	b = putframe(ppp, proto, b);
-
-
-ret:
-	qunlock(ppp);
-
-	r = blocklen(b);
-	netlog(ppp->f, Logppp, "ppp wrt len %d\n", r);
-
-	freeblist(b);
-	return r;
-}
-
-/*
- *  link quality management
- */
-static void
-getlqm(PPP *ppp, Block *b)
-{
-	Qualpkt *p;
-
-	p = (Qualpkt*)b->rp;
-	if(BLEN(b) == sizeof(Qualpkt)){
-		ppp->in.reports++;
-		ppp->pout.reports = nhgetl(p->peeroutreports);
-		ppp->pout.packets = nhgetl(p->peeroutpackets);
-		ppp->pout.bytes = nhgetl(p->peeroutbytes);
-		ppp->pin.reports = nhgetl(p->peerinreports);
-		ppp->pin.packets = nhgetl(p->peerinpackets);
-		ppp->pin.discards = nhgetl(p->peerindiscards);
-		ppp->pin.errors = nhgetl(p->peerinerrors);
-		ppp->pin.bytes = nhgetl(p->peerinbytes);
-
-		/* save our numbers at time of reception */
-		memmove(&ppp->sin, &ppp->in, sizeof(Qualstats));
-
-	}
-	freeblist(b);
-	if(ppp->period == 0)
-		putlqm(ppp);
-
-}
-static void
-putlqm(PPP *ppp)
-{
-	Qualpkt *p;
-	Block *b;
-
-	b = allocb(sizeof(Qualpkt));
-	b->wp += sizeof(Qualpkt);
-	p = (Qualpkt*)b->rp;
-	hnputl(p->magic, 0);
-
-	/* heresay (what he last told us) */
-	hnputl(p->lastoutreports, ppp->pout.reports);
-	hnputl(p->lastoutpackets, ppp->pout.packets);
-	hnputl(p->lastoutbytes, ppp->pout.bytes);
-
-	/* our numbers at time of last reception */
-	hnputl(p->peerinreports, ppp->sin.reports);
-	hnputl(p->peerinpackets, ppp->sin.packets);
-	hnputl(p->peerindiscards, ppp->sin.discards);
-	hnputl(p->peerinerrors, ppp->sin.errors);
-	hnputl(p->peerinbytes, ppp->sin.bytes);
-
-	/* our numbers now */
-	hnputl(p->peeroutreports, ppp->out.reports+1);
-	hnputl(p->peeroutpackets, ppp->out.packets+1);
-	hnputl(p->peeroutbytes, ppp->out.bytes+53/*hack*/);
-
-	b = putframe(ppp, Plqm, b);
-	freeblist(b);
-	ppp->out.reports++;
-}
-
-/*
- *  challenge response dialog
- */
-static void
-getchap(PPP *ppp, Block *b)
-{
-	Lcpmsg *m;
-	int len, vlen, n;
-	char md5buf[512];
-
-	m = (Lcpmsg*)b->rp;
-	len = nhgets(m->len);
-	if(BLEN(b) < len){
-		netlog(ppp->f, Logppp, "ppp %s: short chap message\n", ppp->ifc->dev);
-		freeblist(b);
-		return;
-	}
-
-	switch(m->code){
-	case Cchallenge:
-		vlen = m->data[0];
-		if(vlen > len - 5){
-			netlog(ppp->f, Logppp, "PPP %s: bad challenge len\n", ppp->ifc->dev);
-			freeblist(b);
-			break;
-		}
-
-		netlog(ppp->f, Logppp, "PPP %s: CHAP Challenge\n", ppp->ifc->dev);
-netlog(ppp->f, Logppp, "(secret %s chapname %s id %d)\n", ppp->secret, ppp->chapname, m->id);
-		/* create string to hash */
-		md5buf[0] = m->id;
-		strcpy(md5buf+1, ppp->secret);
-		n = strlen(ppp->secret) + 1;
-		memmove(md5buf+n, m->data+1, vlen);
-		n += vlen;
-		freeblist(b);
-
-		/* send reply */
-		len = 4 + 1 + 16 + strlen(ppp->chapname);
-		b = alloclcp(2, md5buf[0], len);
-		m = IPB2LCP(b);
-		m->data[0] = 16;
-		md5((uchar*)md5buf, n, m->data+1, 0);
-		memmove((char*)m->data+17, ppp->chapname, strlen(ppp->chapname));
-		hnputs(m->len, len);
-		b->wp += len-4;
-		b = putframe(ppp, Pchap, b);
-		break;
-	case Cresponse:
-		netlog(ppp->f, Logppp, "PPP %s: chap response?\n", ppp->ifc->dev);
-		break;
-	case Csuccess:
-		netlog(ppp->f, Logppp, "PPP %s: chap succeeded\n", ppp->ifc->dev);
-		setphase(ppp, Pnet);
-		break;
-	case Cfailure:
-		netlog(ppp->f, Logppp, "PPP %s: chap failed: %.*s\n", ppp->ifc->dev, len-4, m->data);
-		errlog(ppp, Eperm);
-		break;
-	default:
-		netlog(ppp->f, Logppp, "PPP %s: chap code %d?\n", ppp->ifc->dev, m->code);
-		break;
-	}
-	freeblist(b);
-}
-
-/*
- *  password authentication protocol dialog
- *	-- obsolete but all we know how to use with NT just now
- */
-static void
-sendpap(PPP *ppp)
-{
-	Lcpmsg *m;
-	int clen, slen, len;
-	Block *b;
-	uchar *p;
-
-	clen = strlen(ppp->chapname);
-	slen = strlen(ppp->secret);
-	len = 4 + 1 + clen + 1 + slen;
-	ppp->papid = ++ppp->lcp->id;
-	b = alloclcp(Cpapreq, ppp->papid, len);
-	m = IPB2LCP(b);
-	p = m->data;
-	p[0] = clen;
-	memmove(p+1, ppp->chapname, clen);
-	p += clen + 1;
-	p[0] = slen;
-	memmove(p+1, ppp->secret, slen);
-	hnputs(m->len, len);
-	b->wp += len-4;
-	b = putframe(ppp, Ppap, b);
-	netlog(ppp->f, Logppp, "PPP %s: sent pap auth req (%d)\n", ppp->ifc->dev, len);
-	freeblist(b);
-}
-
-static void
-getpap(PPP *ppp, Block *b)
-{
-	Lcpmsg *m;
-	int len;
-
-	m = (Lcpmsg*)b->rp;
-	len = nhgets(m->len);
-	if(BLEN(b) < len){
-		netlog(ppp->f, Logppp, "ppp %s: short pap message\n", ppp->ifc->dev);
-		freeblist(b);
-		return;
-	}
-
-	switch(m->code){
-	case Cpapreq:
-		netlog(ppp->f, Logppp, "PPP %s: pap request?\n", ppp->ifc->dev);
-		break;
-	case Cpapack:
-		netlog(ppp->f, Logppp, "PPP %s: PAP succeeded\n", ppp->ifc->dev);
-		setphase(ppp, Pnet);
-		break;
-	case Cpapnak:
-		if(m->data[0])
-			netlog(ppp->f, Logppp, "PPP %s: PAP failed: %.*s\n", ppp->ifc->dev, len-5, m->data+1);
-		else
-			netlog(ppp->f, Logppp, "PPP %s: PAP failed\n", ppp->ifc->dev);
-		errlog(ppp, Eperm);
-		break;
-	default:
-		netlog(ppp->f, Logppp, "PPP %s: pap code %d?\n", ppp->ifc->dev, m->code);
-		break;
-	}
-	freeblist(b);
-}
-
-static void
-printopts(PPP *ppp, Pstate *p, Block *b, int send)
-{
-	Lcpmsg *m;	
-	Lcpopt *o;
-	int proto, x, period;
-	uchar *cp;
-	char *code, *dir;
-
-	m = (Lcpmsg*)b->rp;
-	switch(m->code) {
-	default: code = "<unknown>"; break;
-	case Lconfreq: code = "confrequest"; break;
-	case Lconfack: code = "confack"; break;
-	case Lconfnak: code = "confnak"; break;
-	case Lconfrej: code = "confreject"; break;
-	}
-
-	if(send)
-		dir = "send";
-	else
-		dir = "recv";
-
-	netlog(ppp->f, Logppp, "ppp: %s %s: id=%d\n", dir, code, m->id);
-
-	for(cp = m->data; cp < b->wp; cp += o->len){
-		o = (Lcpopt*)cp;
-		if(cp + o->len > b->wp || o->len == 0){
-			netlog(ppp->f, Logppp, "\tbad option length %ux\n", o->type);
-			return;
-		}
-
-		switch(p->proto){
-		case Plcp:
-			switch(o->type){
-			default:
-				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
-				break;
-			case Omtu:
-				netlog(ppp->f, Logppp, "\tmtu = %d\n", nhgets(o->data));
-				break;
-			case Octlmap:
-				netlog(ppp->f, Logppp, "\tctlmap = %ux\n", nhgetl(o->data));
-				break;
-			case Oauth:
-				netlog(ppp->f, Logppp, "\tauth = ", nhgetl(o->data));
-				proto = nhgets(o->data);
-				switch(proto) {
-				default:
-					netlog(ppp->f, Logppp, "unknown auth proto %d\n", proto);
-					break;
-				case Ppap:
-					netlog(ppp->f, Logppp, "password\n");
-					break;
-				case Pchap:
-					netlog(ppp->f, Logppp, "chap %ux\n", o->data[2]);
-					break;
-				}
-				break;
-			case Oquality:
-				proto = nhgets(o->data);
-				switch(proto) {
-				default:
-					netlog(ppp->f, Logppp, "\tunknown quality proto %d\n", proto);
-					break;
-				case Plqm:
-					x = nhgetl(o->data+2)*10;
-					period = (x+Period-1)/Period;
-					netlog(ppp->f, Logppp, "\tlqm period = %d\n", period);
-					break;
-				}
-			case Omagic:
-				netlog(ppp->f, Logppp, "\tmagic = %ux\n", nhgetl(o->data));
-				break;
-			case Opc:
-				netlog(ppp->f, Logppp, "\tprotocol compress\n");
-				break;
-			case Oac:
-				netlog(ppp->f, Logppp, "\taddr compress\n");
-				break;
-			}
-			break;
-		case Pccp:
-			switch(o->type){
-			default:
-				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
-				break;
-			case Ocoui:	
-				netlog(ppp->f, Logppp, "\tOUI\n");
-				break;
-			case Ocstac:
-				netlog(ppp->f, Logppp, "\tstac LZS\n");
-				break;
-			case Ocmppc:	
-				netlog(ppp->f, Logppp, "\tMicrosoft PPC len=%d %ux\n", o->len, nhgetl(o->data));
-				break;
-			}
-			break;
-		case Pecp:
-			switch(o->type){
-			default:
-				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
-				break;
-			case Oeoui:	
-				netlog(ppp->f, Logppp, "\tOUI\n");
-				break;
-			case Oedese:
-				netlog(ppp->f, Logppp, "\tDES\n");
-				break;
-			}
-			break;
-		case Pipcp:
-			switch(o->type){
-			default:
-				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
-				break;
-			case Oipaddrs:	
-				netlog(ppp->f, Logppp, "\tip addrs - deprecated\n");
-				break;
-			case Oipcompress:
-				netlog(ppp->f, Logppp, "\tip compress\n");
-				break;
-			case Oipaddr:	
-				netlog(ppp->f, Logppp, "\tip addr %V\n", o->data);
-				break;
-			case Oipdns:
-				netlog(ppp->f, Logppp, "\tdns addr %V\n", o->data);
-				break;
-			case Oipwins:	
-				netlog(ppp->f, Logppp, "\twins addr %V\n", o->data);
-				break;
-			case Oipdns2:
-				netlog(ppp->f, Logppp, "\tdns2 addr %V\n", o->data);
-				break;
-			case Oipwins2:	
-				netlog(ppp->f, Logppp, "\twins2 addr %V\n", o->data);
-				break;
-			}
-			break;
-		}
-	}
-}
-
-static void
-sendtermreq(PPP *ppp, Pstate *p)
-{
-	Block *b;
-	Lcpmsg *m;
-
-	p->termid = ++(p->id);
-	b = alloclcp(Ltermreq, p->termid, 4);
-	m = IPB2LCP(b);
-	hnputs(m->len, 4);
-	putframe(ppp, p->proto, b);
-	freeb(b);
-	newstate(ppp, p, Sclosing);
-}
-
-static void
-sendechoreq(PPP *ppp, Pstate *p)
-{
-	Block *b;
-	Lcpmsg *m;
-
-	p->termid = ++(p->id);
-	b = alloclcp(Lechoreq, p->id, 4);
-	m = IPB2LCP(b);
-	hnputs(m->len, 4);
-	putframe(ppp, p->proto, b);
-	freeb(b);
-}
-
-/*
- *  return non-zero if this is a valid v4 address
- */
-static int
-validv4(Ipaddr addr)
-{
-	return memcmp(addr, v4prefix, IPv4off) == 0;
-}
-
-static void
-invalidate(Ipaddr addr)
-{
-	ipmove(addr, IPnoaddr);
-}
--- a/os/ip/ppp.h
+++ /dev/null
@@ -1,258 +1,0 @@
-typedef struct PPP	PPP;
-typedef struct Pstate	Pstate;
-typedef struct Lcpmsg	Lcpmsg;
-typedef struct Lcpopt	Lcpopt;
-typedef struct Qualpkt	Qualpkt;
-typedef struct Qualstats Qualstats;
-typedef struct Tcpc	Tcpc;
-
-typedef uchar Ipaddr[IPaddrlen];
-
-enum
-{
-	HDLC_frame=	0x7e,
-	HDLC_esc=	0x7d,
-
-	/* PPP frame fields */
-	PPP_addr=	0xff,
-	PPP_ctl=	0x3,
-	PPP_initfcs=	0xffff,
-	PPP_goodfcs=	0xf0b8,
-
-	/* PPP phases */
-	Pdead=		0,	
-	Plink,				/* doing LCP */
-	Pauth,				/* doing chap */
-	Pnet,				/* doing IPCP, CCP */
-	Pterm,				/* closing down */
-
-	/* PPP protocol types */
-	Pip=		0x21,		/* internet */
-	Pvjctcp=	0x2d,		/* compressing van jacobson tcp */
-	Pvjutcp=	0x2f,		/* uncompressing van jacobson tcp */
-	Pcdata=		0xfd,		/* compressed datagram */
-	Pipcp=		0x8021,		/* ip control */
-	Pecp=		0x8053,		/* encryption control */
-	Pccp=		0x80fd,		/* compressed datagram control */
-	Plcp=		0xc021,		/* link control */
-	Ppap=		0xc023,		/* password auth. protocol */
-	Plqm=		0xc025,		/* link quality monitoring */
-	Pchap=		0xc223,		/* challenge/response */
-
-	/* LCP codes */
-	Lconfreq=	1,
-	Lconfack=	2,
-	Lconfnak=	3,
-	Lconfrej=	4,
-	Ltermreq=	5,
-	Ltermack=	6,
-	Lcoderej=	7,
-	Lprotorej=	8,
-	Lechoreq=	9,
-	Lechoack=	10,
-	Ldiscard=	11,
-
-	/* Lcp configure options */
-	Omtu=		1,
-	Octlmap=	2,
-	Oauth=		3,
-	Oquality=	4,
-	Omagic=		5,
-	Opc=		7,
-	Oac=		8,
-	Obad=		12,		/* for testing */
-
-	/* authentication protocols */
-	APmd5=		5,
-
-	/* lcp flags */
-	Fmtu=		1<<Omtu,
-	Fctlmap=	1<<Octlmap,
-	Fauth=		1<<Oauth,
-	Fquality=	1<<Oquality,
-	Fmagic=		1<<Omagic,
-	Fpc=		1<<Opc,
-	Fac=		1<<Oac,
-	Fbad=		1<<Obad,
-
-	/* Chap codes */
-	Cchallenge=	1,
-	Cresponse=	2,
-	Csuccess=	3,
-	Cfailure=	4,
-
-	/* Pap codes */
-	Cpapreq=		1,
-	Cpapack=		2,
-	Cpapnak=		3,
-
-	/* link states */
-	Sclosed=		0,
-	Sclosing,
-	Sreqsent,
-	Sackrcvd,
-	Sacksent,
-	Sopened,
-
-	/* ccp configure options */
-	Ocoui=		0,	/* proprietary compression */
-	Ocstac=		17,	/* stac electronics LZS */
-	Ocmppc=		18,	/* microsoft ppc */
-
-	/* ccp flags */
-	Fcoui=		1<<Ocoui,
-	Fcstac=		1<<Ocstac,
-	Fcmppc=		1<<Ocmppc,
-
-	/* ecp configure options */
-	Oeoui=		0,	/* proprietary compression */
-	Oedese=		1,	/* DES */
-
-	/* ecp flags */
-	Feoui=		1<<Oeoui,
-	Fedese=		1<<Oedese,
-
-	/* ipcp configure options */
-	Oipaddrs=	1,
-	Oipcompress=	2,
-	Oipaddr=	3,
-	Oipdns=		129,
-	Oipwins=	130,
-	Oipdns2=	131,
-	Oipwins2=	132,
-
-	/* ipcp flags */
-	Fipaddrs=	1<<Oipaddrs,
-	Fipcompress=	1<<Oipcompress,
-	Fipaddr=	1<<Oipaddr,
-
-	Period=		3*1000,	/* period of retransmit process (in ms) */
-	Timeout=	10,	/* xmit timeout (in Periods) */
-
-	MAX_STATES	= 16,		/* van jacobson compression states */
-	Defmtu=		1450,		/* default that we will ask for */
-	Minmtu=		128,		/* minimum that we will accept */
-	Maxmtu=		2000,		/* maximum that we will accept */
-};
-
-
-struct Pstate
-{
-	int	proto;		/* protocol type */
-	int	timeout;		/* for current state */
-	int	rxtimeout;	/* for current retransmit */
-	ulong	flags;		/* options received */
-	uchar	id;		/* id of current message */
-	uchar	confid;		/* id of current config message */
-	uchar	termid;		/* id of current termination message */
-	uchar	rcvdconfid;	/* id of last conf message received */
-	uchar	state;		/* PPP link state */
-	ulong	optmask;		/* which options to request */
-	int	echoack;	/* recieved echo ack */
-	int	echotimeout;	/* echo timeout */
-};
-
-struct Qualstats
-{
-	ulong	reports;
-	ulong	packets;
-	ulong	bytes;
-	ulong	discards;
-	ulong	errors;
-};
-
-struct PPP
-{
-	QLock;
-
-	Chan*	dchan;			/* serial line */
-	Chan*	cchan;			/* serial line control */
-	int		framing;	/* non-zero to use framing characters */
-	Ipaddr	local;
-	int		localfrozen;
-	Ipaddr	remote;
-	int		remotefrozen;
-
-	int	pppup;
-	Fs	*f;		/* file system we belong to */
-	Ipifc*	ifc;
-	Proc*	readp;			/* reading process */
-	Proc*	timep;			/* timer process */
-	Block*	inbuf;			/* input buffer */
-	Block*	outbuf;			/* output buffer */
-	QLock	outlock;		/*  and its lock */
-
-	ulong	magic;			/* magic number to detect loop backs */
-	ulong	rctlmap;		/* map of chars to ignore in rcvr */
-	ulong	xctlmap;		/* map of chars to excape in xmit */
-	int		phase;		/* PPP phase */
-	Pstate*	lcp;			/* lcp state */
-	Pstate*	ipcp;			/* ipcp state */
-	char	secret[256];		/* md5 key */
-	char	chapname[256];		/* chap system name */
-	Tcpc*	ctcp;
-	ulong		mtu;		/* maximum xmit size */
-	ulong		mru;		/* maximum recv size */
-
-	int	baud;
-	int	usepap;	/* authentication is PAP in every sense, not CHAP */
-	int	papid;
-	int	usechap;
-
-	/* rfc */
-	int	usedns;
-	Ipaddr	dns1;
-	Ipaddr	dns2;
-
-	/* link quality monitoring */
-	int		period;		/* lqm period */
-	int		timeout;	/* time to next lqm packet */
-	Qualstats	in;		/* local */
-	Qualstats	out;
-	Qualstats	pin;		/* peer */
-	Qualstats	pout;
-	Qualstats	sin;		/* saved */
-};
-
-PPP*		pppopen(PPP*, char*, Ipaddr, Ipaddr, int, int, char*, char*);
-Block*	pppread(PPP*);
-int		pppwrite(PPP*, Block*);
-void		pppclose(PPP*);
-
-struct Lcpmsg
-{
-	uchar	code;
-	uchar	id;
-	uchar	len[2];
-	uchar	data[1];
-};
-
-struct Lcpopt
-{
-	uchar	type;
-	uchar	len;
-	uchar	data[1];
-};
-
-struct Qualpkt
-{
-	uchar	magic[4];
-
-	uchar	lastoutreports[4];
-	uchar	lastoutpackets[4];
-	uchar	lastoutbytes[4];
-	uchar	peerinreports[4];
-	uchar	peerinpackets[4];
-	uchar	peerindiscards[4];
-	uchar	peerinerrors[4];
-	uchar	peerinbytes[4];
-	uchar	peeroutreports[4];
-	uchar	peeroutpackets[4];
-	uchar	peeroutbytes[4];
-};
-
-ushort	compress(Tcpc*, Block*, Fs*);
-Tcpc*	compress_init(Tcpc*);
-int		compress_negotiate(Tcpc*, uchar*);
-ushort	tcpcompress(Tcpc*, Block*, Fs*);
-Block*	tcpuncompress(Tcpc*, Block*, ushort, Fs*);
--- a/os/ip/pppmedium.c
+++ /dev/null
@@ -1,192 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-
-#include "ip.h"
-#include "kernel.h"
-#include "ppp.h"
-
-static void	pppreader(void *a);
-static void	pppbind(Ipifc *ifc, int argc, char **argv);
-static void	pppunbind(Ipifc *ifc);
-static void	pppbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
-static void	deadremote(Ipifc *ifc);
-
-Medium pppmedium =
-{
-.name=	"ppp",
-.hsize=	4,
-.mintu=	Minmtu,
-.maxtu=	Maxmtu,
-.maclen=	0,
-.bind=	pppbind,
-.unbind=	pppunbind,
-.bwrite=	pppbwrite,
-.unbindonclose=	0,		/* don't unbind on last close */
-};
-
-/*
- *  called to bind an IP ifc to an ethernet device
- *  called with ifc wlock'd
- */
-static void
-pppbind(Ipifc *ifc, int argc, char **argv)
-{
-	PPP *ppp;
-	Ipaddr ipaddr, remip;
-	int mtu, framing;
-	char *chapname, *secret;
-
-	if(argc < 3)
-		error(Ebadarg);
-
-	ipmove(ipaddr, IPnoaddr);
-	ipmove(remip, IPnoaddr);
-	mtu = Defmtu;
-	framing = 1;
-	chapname = nil;
-	secret = nil;
-
-	switch(argc){
-	default:
-	case 9:
-		if(argv[8][0] != '-')
-			secret = argv[8];
-	case 8:
-		if(argv[7][0] != '-')
-			chapname = argv[7];
-	case 7:
-		if(argv[6][0] != '-')
-			framing = strtoul(argv[6], 0, 0);
-	case 6:
-		if(argv[5][0] != '-')
-			mtu = strtoul(argv[5], 0, 0);
-	case 5:
-		if(argv[4][0] != '-')
-			parseip(remip, argv[4]);
-	case 4:
-		if(argv[3][0] != '-')
-			parseip(ipaddr, argv[3]);
-	case 3:
-		break;
-	}
-
-	ppp = smalloc(sizeof(*ppp));
-	ppp->ifc = ifc;
-	ppp->f = ifc->conv->p->f;
-	ifc->arg = ppp;
-	if(waserror()){
-		pppunbind(ifc);
-		nexterror();
-	}
-	if(pppopen(ppp, argv[2], ipaddr, remip, mtu, framing, chapname, secret) == nil)
-		error("ppp open failed");
-	poperror();
-	kproc("pppreader", pppreader, ifc, KPDUPPG|KPDUPFDG);
-}
-
-static void
-pppreader(void *a)
-{
-	Ipifc *ifc;
-	Block *bp;
-	PPP *ppp;
-
-	ifc = a;
-	ppp = ifc->arg;
-	ppp->readp = up;	/* hide identity under a rock for unbind */
-	setpri(PriHi);
-
-	if(waserror()){
-		netlog(ppp->f, Logppp, "pppreader: %I: %s\n", ppp->local, up->env->errstr);
-		ppp->readp = 0;
-		deadremote(ifc);
-		pexit("hangup", 1);
-	}
-
-	for(;;){
-		bp = pppread(ppp);
-		if(bp == nil)
-			error("hungup");
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
-		if(waserror()){
-			runlock(ifc);
-			nexterror();
-		}
-		ifc->in++;
-		if(ifc->lifc == nil)
-			freeb(bp);
-		else
-			ipiput(ppp->f, ifc, bp);
-		runlock(ifc);
-		poperror();
-	}
-}
-
-/*
- *  called with ifc wlock'd
- */
-static void
-pppunbind(Ipifc *ifc)
-{
-	PPP *ppp = ifc->arg;
-
-	if(ppp == nil)
-		return;
-	if(ppp->readp)
-		postnote(ppp->readp, 1, "unbind", 0);
-	if(ppp->timep)
-		postnote(ppp->timep, 1, "unbind", 0);
-
-	/* wait for kprocs to die */
-	while(ppp->readp != 0 || ppp->timep != 0)
-		tsleep(&up->sleep, return0, 0, 300);
-
-	pppclose(ppp);
-	qclose(ifc->conv->eq);
-	ifc->arg = nil;
-}
-
-/*
- *  called by ipoput with a single packet to write with ifc rlock'd
- */
-static void
-pppbwrite(Ipifc *ifc, Block *bp, int, uchar*)
-{
-	PPP *ppp = ifc->arg;
-
-	pppwrite(ppp, bp);
-	ifc->out++;
-}
-
-/*
- *	If the other end hangs up, we have to unbind the interface.  An extra
- *	unbind (in the case where we are hanging up) won't do any harm.
- */
-static void
-deadremote(Ipifc *ifc)
-{
-	int fd;
-	char path[128];
-	PPP *ppp;
-
-	ppp = ifc->arg;
-	snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ifc->conv->x);
-	fd = kopen(path, ORDWR);
-	if(fd < 0)
-		return;
-	kwrite(fd, "unbind", sizeof("unbind")-1);
-	kclose(fd);
-}
-
-void
-pppmediumlink(void)
-{
-	addipmedium(&pppmedium);
-}
--- a/os/ip/ptclbsum.c
+++ /dev/null
@@ -1,72 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-#include	"ip.h"
-
-static	short	endian	= 1;
-static	uchar*	aendian	= (uchar*)&endian;
-#define	LITTLE	*aendian
-
-ushort
-ptclbsum(uchar *addr, int len)
-{
-	ulong losum, hisum, mdsum, x;
-	ulong t1, t2;
-
-	losum = 0;
-	hisum = 0;
-	mdsum = 0;
-
-	x = 0;
-	if((uintptr)addr & 1) {
-		if(len) {
-			hisum += addr[0];
-			len--;
-			addr++;
-		}
-		x = 1;
-	}
-	while(len >= 16) {
-		t1 = *(ushort*)(addr+0);
-		t2 = *(ushort*)(addr+2);	mdsum += t1;
-		t1 = *(ushort*)(addr+4);	mdsum += t2;
-		t2 = *(ushort*)(addr+6);	mdsum += t1;
-		t1 = *(ushort*)(addr+8);	mdsum += t2;
-		t2 = *(ushort*)(addr+10);	mdsum += t1;
-		t1 = *(ushort*)(addr+12);	mdsum += t2;
-		t2 = *(ushort*)(addr+14);	mdsum += t1;
-		mdsum += t2;
-		len -= 16;
-		addr += 16;
-	}
-	while(len >= 2) {
-		mdsum += *(ushort*)addr;
-		len -= 2;
-		addr += 2;
-	}
-	if(x) {
-		if(len)
-			losum += addr[0];
-		if(LITTLE)
-			losum += mdsum;
-		else
-			hisum += mdsum;
-	} else {
-		if(len)
-			hisum += addr[0];
-		if(LITTLE)
-			hisum += mdsum;
-		else
-			losum += mdsum;
-	}
-
-	losum += hisum >> 8;
-	losum += (hisum & 0xff) << 8;
-	while(hisum = losum>>16)
-		losum = hisum + (losum & 0xffff);
-
-	return losum & 0xffff;
-}
--- a/os/ip/rudp.c
+++ b/os/ip/rudp.c
@@ -1,4 +1,5 @@
 /*
+ *  Reliable User Datagram Protocol, currently only for IPv4.
  *  This protocol is compatible with UDP's packet format.
  *  It could be done over UDP if need be.
  */
@@ -25,20 +26,17 @@
 
 enum
 {
-	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_PHDRSIZE	= 12,	/* pseudo header */
+//	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_RHDRSIZE	= 36,	/* pseudo header + udp header + rudp header */
 	UDP_IPHDR	= 8,	/* ip header */
 	IP_UDPPROTO	= 254,
-	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
-	UDP_USEAD4	= 12,
+	UDP_USEAD7	= 52,	/* size of new ipv6 headers struct */
 
 	Rudprxms	= 200,
 	Rudptickms	= 50,
 	Rudpmaxxmit	= 10,
 	Maxunacked	= 100,
-
 };
 
 #define Hangupgen	0xffffffff	/* used only in hangup messages */
@@ -205,7 +203,7 @@
 		qlock(&rpriv->apl);
 		if(rpriv->ackprocstarted == 0){
 			sprint(kpname, "#I%drudpack", rudp->f->dev);
-			kproc(kpname, relackproc, rudp, 0);
+			kproc(kpname, relackproc, rudp);
 			rpriv->ackprocstarted = 1;
 		}
 		qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
 	qlock(ucb);
 	for(r = ucb->r; r; r = r->next)
 		m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+	m += snprint(state+m, n-m, "\n");
 	qunlock(ucb);
 	return m;
 }
@@ -281,7 +280,7 @@
 	/* force out any delayed acks */
 	ucb = (Rudpcb*)c->ptcl;
 	qlock(ucb);
-	for(r = ucb->r; r; r = r->next){
+	for(r = ucb->r; r != nil; r = r->next){
 		if(r->acksent != r->rcvseq)
 			relsendack(c, r, 0);
 	}
@@ -374,27 +373,10 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-
-		bp->rp += 4;			/* Igonore local port */
-		break;
 	default:
 		ipmove(raddr, c->raddr);
 		ipmove(laddr, c->laddr);
 		rport = c->rport;
-
 		break;
 	}
 
@@ -402,9 +384,6 @@
 
 	/* Make space to fit rudp & ip header */
 	bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
-
 	uh = (Udphdr *)(bp->rp);
 	uh->vihl = IP_VER4;
 
@@ -417,7 +396,6 @@
 	uh->frag[1] = 0;
 	hnputs(uh->udpplen, ptcllen);
 	switch(ucb->headers){
-	case 6:
 	case 7:
 		v6tov4(uh->udpdst, raddr);
 		hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.rudpNoPorts++;
 		qunlock(rudp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	default:
 		/* connection oriented rudp */
 		if(ipcmp(c->raddr, IPnoaddr) == 0){
-			/* save the src address in the conversation */
+			/* reply with the same ip address (if not broadcast) */
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
+			ipmove(c->laddr, laddr);
 		 	ipmove(c->raddr, raddr);
 			c->rport = rport;
-
-			/* reply with the same ip address (if not broadcast) */
-			if(ipforme(f, laddr) == Runi)
-				ipmove(c->laddr, laddr);
-			else
-				v4tov6(c->laddr, ifc->lifc->local);
 		}
 		break;
 	}
-	if(bp->next)
-		bp = concatblock(bp);
 
 	if(qfull(c->rq)) {
-		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
-			laddr, lport);
+		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-	else
-		qpass(c->rq, bp);
-	
 	qunlock(ucb);
 }
 
@@ -629,16 +594,14 @@
 	if(n < 1)
 		return rudpunknown;
 
-	if(strcmp(f[0], "headers++4") == 0){
-		ucb->headers = 7;
+	if(strcmp(f[0], "headers") == 0){
+		ucb->headers = 7;		/* new headers format */
 		return nil;
-	} else if(strcmp(f[0], "headers") == 0){
-		ucb->headers = 6;
-		return nil;
 	} else if(strcmp(f[0], "hangup") == 0){
 		if(n < 3)
 			return "bad syntax";
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			return Ebadip;
 		x = atoi(f[2]);
 		qlock(ucb);
 		relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
 		qunlock(ucb);
 		return nil;
 	} else if(strcmp(f[0], "randdrop") == 0){
-		x = 10;		/* default is 10% */
+		x = 10;			/* default is 10% */
 		if(n > 1)
 			x = atoi(f[1]);
 		if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
 	pdest = nhgets(h->udpdport);
 
 	/* Look for a connection */
-	for(p = rudp->conv; *p; p++) {
-		s = *p;
+	for(p = rudp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -701,12 +665,6 @@
 		upriv->orders);
 }
 
-int
-rudpgc(Proto *rudp)
-{
-	return natgc(rudp->ipproto);
-}
-
 void
 rudpinit(Fs *fs)
 {
@@ -725,9 +683,8 @@
 	rudp->rcv = rudpiput;
 	rudp->advise = rudpadvise;
 	rudp->stats = rudpstats;
-	rudp->gc = rudpgc;
 	rudp->ipproto = IP_UDPPROTO;
-	rudp->nc = 16;
+	rudp->nc = 32;
 	rudp->ptclsize = sizeof(Rudpcb);
 
 	Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
 
 	rudp = (Proto *)a;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Rudptickms);
 
@@ -989,8 +948,6 @@
 	Fs *f;
 
 	bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
 	bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
 	f = c->p->f;
 	uh = (Udphdr *)(bp->rp);
--- a/os/ip/tcp.c
+++ b/os/ip/tcp.c
@@ -41,13 +41,13 @@
 	EOLOPT		= 0,
 	NOOPOPT		= 1,
 	MSSOPT		= 2,
-	MSS_LENGTH	= 4,		/* Mean segment size */
+	MSS_LENGTH	= 4,		/* Maximum segment size */
 	WSOPT		= 3,
 	WS_LENGTH	= 3,		/* Bits to scale window size by */
 	MSL2		= 10,
 	MSPTICK		= 50,		/* Milliseconds per timer tick */
-	DEF_MSS		= 1460,		/* Default mean segment */
-	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
+	DEF_MSS		= 1460,		/* Default maximum segment */
+	DEF_MSS6	= 1220,		/* Default maximum segment (min) for v6 */
 	DEF_RTT		= 500,		/* Default round trip */
 	DEF_KAT		= 120000,	/* Default time (ms) between keep alives */
 	TCP_LISTEN	= 0,		/* Listen connection */
@@ -81,7 +81,13 @@
 	NLHT		= 256,		/* hash table size, must be a power of 2 */
 	LHTMASK		= NLHT-1,
 
-	HaveWS		= 1<<8,
+	/*
+	 * window is 64kb · 2ⁿ
+	 * these factors determine the ultimate bandwidth-delay product.
+	 * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+	 */
+	Maxqscale	= 4,		/* maximum queuing scale */
+	Defadvscale	= 4,		/* default advertisement */
 };
 
 /* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
 	ulong	seq;
 	ulong	ack;
 	uchar	flags;
-	ushort	ws;	/* window scale option (if not zero) */
-	ulong	wnd;
+	uchar	update;
+	ushort	ws;	/* window scale option */
+	ulong	wnd;	/* prescaled window*/
 	ushort	urg;
 	ushort	mss;	/* max segment size option (if not zero) */
 	ushort	len;	/* size of data */
@@ -205,44 +212,53 @@
 		ulong	wnd;		/* Tcp send window */
 		ulong	urg;		/* Urgent data pointer */
 		ulong	wl2;
-		int	scale;		/* how much to right shift window in xmitted packets */
+		uint	scale;		/* how much to right shift window in xmitted packets */
 		/* to implement tahoe and reno TCP */
 		ulong	dupacks;	/* number of duplicate acks rcvd */
+		ulong	partialack;
 		int	recovery;	/* loss recovery flag */
-		ulong	rxt;		/* right window marker for recovery */
+		int	retransmit;	/* retransmit 1 packet @ una flag */
+		int	rto;
+		ulong	rxt;		/* right window marker for recovery "recover" rfc3782 */
 	} snd;
 	struct {
 		ulong	nxt;		/* Receive pointer to next uchar slot */
 		ulong	wnd;		/* Receive window incoming */
+		ulong	wsnt;		/* Last wptr sent.  important to track for large bdp */
+		ulong	wptr;
 		ulong	urg;		/* Urgent pointer */
+		ulong	ackptr;		/* last acked sequence */
 		int	blocked;
-		int	una;		/* unacked data segs */
-		int	scale;		/* how much to left shift window in rcved packets */
+		uint	scale;		/* how much to left shift window in rcv'd packets */
 	} rcv;
 	ulong	iss;			/* Initial sequence number */
-	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
 	ulong	cwind;			/* Congestion window */
-	int	scale;			/* desired snd.scale */
-	ushort	ssthresh;		/* Slow start threshold */
+	ulong	abcbytes;		/* appropriate byte counting rfc 3465 */
+	uint	scale;			/* desired snd.scale */
+	ulong	ssthresh;		/* Slow start threshold */
 	int	resent;			/* Bytes just resent */
 	int	irs;			/* Initial received squence */
-	ushort	mss;			/* Mean segment size */
+	ushort	mss;			/* Maximum segment size */
 	int	rerecv;			/* Overlap of data rerecevived */
-	ulong	window;			/* Recevive window */
+	ulong	window;			/* Our receive window (queue) */
+	uint	qscale;			/* Log2 of our receive window (queue) */
 	uchar	backoff;		/* Exponential backoff counter */
 	int	backedoff;		/* ms we've backed off for rexmits */
 	uchar	flags;			/* State flags */
 	Reseq	*reseq;			/* Resequencing queue */
+	int	nreseq;
+	int	reseqlen;
 	Tcptimer	timer;			/* Activity timer */
 	Tcptimer	acktimer;		/* Acknowledge timer */
 	Tcptimer	rtt_timer;		/* Round trip timer */
 	Tcptimer	katimer;		/* keep alive timer */
 	ulong	rttseq;			/* Round trip sequence */
-	int	srtt;			/* Shortened round trip */
+	int	srtt;			/* Smoothed round trip */
 	int	mdev;			/* Mean deviation of round trip */
 	int	kacounter;		/* count down for keep alive */
 	uint	sndsyntime;		/* time syn sent */
 	ulong	time;			/* time Finwait2 or Syn_received was sent */
+	ulong	timeuna;			/* snd.una when time was set */
 	int	nochecksum;		/* non-zero means don't send checksums */
 	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
 
@@ -285,11 +301,11 @@
 };
 
 int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
-ushort	tcp_mss = DEF_MSS;	/* Maximum segment size to be sent */
 
 enum {
 	/* MIB stats */
 	MaxConn,
+	Mss,
 	ActiveOpens,
 	PassiveOpens,
 	EstabResets,
@@ -297,6 +313,7 @@
 	InSegs,
 	OutSegs,
 	RetransSegs,
+	RetransSegsSent,
 	RetransTimeouts,
 	InErrs,
 	OutRsts,
@@ -305,14 +322,27 @@
 	CsumErrs,
 	HlenErrs,
 	LenErrs,
+	Resequenced,
 	OutOfOrder,
+	ReseqBytelim,
+	ReseqPktlim,
+	Delayack,
+	Wopenack,
 
+	Recovery,
+	RecoveryDone,
+	RecoveryRTO,
+	RecoveryNoSeq,
+	RecoveryCwind,
+	RecoveryPA,
+
 	Nstats
 };
 
-static char *statnames[] =
+static char *statnames[Nstats] =
 {
 [MaxConn]	"MaxConn",
+[Mss]		"MaxSegment",
 [ActiveOpens]	"ActiveOpens",
 [PassiveOpens]	"PassiveOpens",
 [EstabResets]	"EstabResets",
@@ -320,6 +350,7 @@
 [InSegs]	"InSegs",
 [OutSegs]	"OutSegs",
 [RetransSegs]	"RetransSegs",
+[RetransSegsSent]	"RetransSegsSent",
 [RetransTimeouts]	"RetransTimeouts",
 [InErrs]	"InErrs",
 [OutRsts]	"OutRsts",
@@ -327,6 +358,19 @@
 [HlenErrs]	"HlenErrs",
 [LenErrs]	"LenErrs",
 [OutOfOrder]	"OutOfOrder",
+[Resequenced]	"Resequenced",
+[ReseqBytelim]	"ReseqBytelim",
+[ReseqPktlim]	"ReseqPktlim",
+[Delayack]	"Delayack",
+[Wopenack]	"Wopenack",
+
+[Recovery]	"Recovery",
+[RecoveryDone]	"RecoveryDone",
+[RecoveryRTO]	"RecoveryRTO",
+
+[RecoveryNoSeq]	"RecoveryNoSeq",
+[RecoveryCwind]	"RecoveryCwind",
+[RecoveryPA]	"RecoveryPA",
 };
 
 typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
 	QLock	apl;
 	int	ackprocstarted;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 };
 
 /*
@@ -356,34 +400,34 @@
  *  of DoS attack.
  *
  *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
- *  it that number gets acked by the other end, we shut down the connection.
- *  Look for tcpporthogedefense in the code.
+ *  that number gets acked by the other end, we shut down the connection.
+ *  Look for tcpporthogdefense in the code.
  */
 int tcpporthogdefense = 0;
 
-int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void	localclose(Conv*, char*);
-void	procsyn(Conv*, Tcp*);
-void	tcpiput(Proto*, Ipifc*, Block*);
-void	tcpoutput(Conv*);
-int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void	tcpstart(Conv*, int);
-void	tcptimeout(void*);
-void	tcpsndsyn(Conv*, Tcpctl*);
-void	tcprcvwin(Conv*);
-void	tcpacktimer(void*);
-void	tcpkeepalive(void*);
-void	tcpsetkacounter(Tcpctl*);
-void	tcprxmit(Conv*);
-void	tcpsettimer(Tcpctl*);
-void	tcpsynackrtt(Conv*);
-void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	int	addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static	int	dumpreseq(Tcpctl*);
+static	void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static	void	limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static	void	limborexmit(Proto*);
+static	void	localclose(Conv*, char*);
+static	void	procsyn(Conv*, Tcp*);
+static	void	tcpacktimer(void*);
+static	void	tcpiput(Proto*, Ipifc*, Block*);
+static	void	tcpkeepalive(void*);
+static	void	tcpoutput(Conv*);
+static	void	tcprcvwin(Conv*);
+static	void	tcprxmit(Conv*);
+static	void	tcpsetkacounter(Tcpctl*);
+static	void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	void	tcpsettimer(Tcpctl*);
+static	void	tcpsndsyn(Conv*, Tcpctl*);
+static	void	tcpstart(Conv*, int);
+static	void	tcpsynackrtt(Conv*);
+static	void	tcptimeout(void*);
+static	int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
 
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
 tcpsetstate(Conv *s, uchar newstate)
 {
 	Tcpctl *tcb;
@@ -403,11 +447,6 @@
 	if(newstate == Established)
 		tpriv->stats[CurrEstab]++;
 
-	/**
-	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
-		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
-	**/
-
 	switch(newstate) {
 	case Closed:
 		qclose(s->rq);
@@ -430,7 +469,12 @@
 tcpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -447,12 +491,14 @@
 	s = (Tcpctl*)(c->ptcl);
 
 	return snprint(state, n,
-		"%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+		"%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
 		tcpstates[s->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
-		s->srtt, s->mdev,
-		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+		s->nreseq, s->reseqlen,
+		s->srtt, s->mdev, s->ssthresh,
+		s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+		s->qscale,
 		s->timer.start, s->timer.count, s->rerecv,
 		s->katimer.start, s->katimer.count);
 }
@@ -470,7 +516,12 @@
 tcpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdannounce(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -524,7 +575,7 @@
 	}
 }
 
-void
+static void
 tcpkick(void *x)
 {
 	Conv *s = x;
@@ -546,7 +597,6 @@
 		/*
 		 * Push data
 		 */
-		tcprcvwin(s);
 		tcpoutput(s);
 		break;
 	default:
@@ -558,7 +608,9 @@
 	poperror();
 }
 
-void
+static int seq_lt(ulong, ulong);
+
+static void
 tcprcvwin(Conv *s)				/* Call with tcb locked */
 {
 	int w;
@@ -568,12 +620,20 @@
 	w = tcb->window - qlen(s->rq);
 	if(w < 0)
 		w = 0;
-	tcb->rcv.wnd = w;
-	if(w == 0)
+	/* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+	if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+		w = tcb->rcv.wptr - tcb->rcv.nxt;
+	if(w != tcb->rcv.wnd)
+	if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
 		tcb->rcv.blocked = 1;
+		netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+			tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+	}
+	tcb->rcv.wnd = w;
+	tcb->rcv.wptr = tcb->rcv.nxt + w;
 }
 
-void
+static void
 tcpacktimer(void *v)
 {
 	Tcpctl *tcb;
@@ -589,7 +649,6 @@
 	qlock(s);
 	if(tcb->state != Closed){
 		tcb->flags |= FORCE;
-		tcprcvwin(s);
 		tcpoutput(s);
 	}
 	qunlock(s);
@@ -597,10 +656,52 @@
 }
 
 static void
+tcpcongestion(Tcpctl *tcb)
+{
+	ulong inflight;
+
+	inflight = tcb->snd.nxt - tcb->snd.una;
+	if(inflight > tcb->cwind)
+		inflight = tcb->cwind;
+	tcb->ssthresh = inflight / 2;
+	if(tcb->ssthresh < 2*tcb->mss)
+		tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+	L		= 2,		/* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+	uint limit;
+
+	tcb->abcbytes += acked;
+	if(tcb->cwind < tcb->ssthresh){
+		/* slow start */
+		if(tcb->snd.rto)
+			limit = 1*tcb->mss;
+		else
+			limit = L*tcb->mss;
+		tcb->cwind += MIN(tcb->abcbytes, limit);
+		tcb->abcbytes = 0;
+	}
+	else{
+		tcb->snd.rto = 0;
+		/* avoidance */
+		if(tcb->abcbytes >= tcb->cwind){
+			tcb->abcbytes -= tcb->cwind;
+			tcb->cwind += tcb->mss;
+		}
+	}
+}
+
+static void
 tcpcreate(Conv *c)
 {
 	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
-	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+	c->wq = qopen(QMAX, Qkick, tcpkick, c);
 }
 
 static void
@@ -608,7 +709,7 @@
 {
 	if(newstate != TcptimerON){
 		if(t->state == TcptimerON){
-			// unchain
+			/* unchain */
 			if(priv->timers == t){
 				priv->timers = t->next;
 				if(t->prev != nil)
@@ -622,7 +723,7 @@
 		}
 	} else {
 		if(t->state != TcptimerON){
-			// chain
+			/* chain */
 			if(t->prev != nil || t->next != nil)
 				panic("timerstate2");
 			t->prev = nil;
@@ -635,7 +736,7 @@
 	t->state = newstate;
 }
 
-void
+static void
 tcpackproc(void *a)
 {
 	Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
 	tcp = a;
 	priv = tcp->priv;
 
+	while(waserror())
+		;
+
 	for(;;) {
 		tsleep(&up->sleep, return0, 0, MSPTICK);
 
@@ -681,7 +785,7 @@
 	}
 }
 
-void
+static void
 tcpgo(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
 	qunlock(&priv->tl);
 }
 
-void
+static void
 tcphalt(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil)
@@ -704,17 +808,16 @@
 	qunlock(&priv->tl);
 }
 
-int
+static int
 backoff(int n)
 {
 	return 1 << n;
 }
 
-void
+static void
 localclose(Conv *s, char *reason)	/* called with tcb locked */
 {
 	Tcpctl *tcb;
-	Reseq *rp,*rp1;
 	Tcppriv *tpriv;
 
 	tpriv = s->p->priv;
@@ -728,12 +831,7 @@
 	tcphalt(tpriv, &tcb->katimer);
 
 	/* Flush reassembly queue; nothing more can arrive */
-	for(rp = tcb->reseq; rp != nil; rp = rp1) {
-		rp1 = rp->next;
-		freeblist(rp->bp);
-		free(rp);
-	}
-	tcb->reseq = nil;
+	dumpreseq(tcb);
 
 	if(tcb->state == Syn_sent)
 		Fsconnected(s, reason);
@@ -747,45 +845,46 @@
 }
 
 /* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
 {
 	Ipifc *ifc;
 	int mtu;
 
-	ifc = findipifc(tcp->f, addr, 0);
-	switch(version){
-	default:
-	case V4:
-		mtu = DEF_MSS;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
-		break;
-	case V6:
-		mtu = DEF_MSS6;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
-		break;
-	}
-	if(ifc != nil){
-		if(ifc->mbps > 100)
-			*scale = HaveWS | 3;
-		else if(ifc->mbps > 10)
-			*scale = HaveWS | 1;
-		else
-			*scale = HaveWS | 0;
-	} else
-		*scale = HaveWS | 0;
+	/*
+	 * set the ws.  it doesn't commit us to anything.
+	 * ws is the ultimate limit to the bandwidth-delay product.
+	 */
+	*scale = Defadvscale;
 
-	return mtu;
+	/*
+	 * currently we do not implement path MTU discovery
+	 * so use interface MTU *only* if directly reachable
+	 * or when we use V4 which allows routers to fragment.
+	 * otherwise, we use the default MSS which assumes a
+	 * safe minimum MTU of 1280 bytes for V6.
+	 */  
+	if(r != nil && (ifc = r->ifc) != nil){
+		mtu = ifc->maxtu - ifc->m->hsize;
+		if(version == V4)
+			return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+		mtu -= TCP6_PKT + TCP6_HDRSIZE;
+		if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+			return mtu;
+	}
+	if(version == V6)
+		return DEF_MSS6;
+	else
+		return DEF_MSS;
 }
 
-void
+static void
 inittcpctl(Conv *s, int mode)
 {
 	Tcpctl *tcb;
 	Tcp4hdr* h4;
 	Tcp6hdr* h6;
+	Tcppriv *tpriv;
 	int mss;
 
 	tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
 
 	memset(tcb, 0, sizeof(Tcpctl));
 
-	tcb->ssthresh = 65535;
+	tcb->ssthresh = QMAX;			/* reset by tcpsetscale() */
 	tcb->srtt = tcp_irtt<<LOGAGAIN;
 	tcb->mdev = 0;
 
@@ -841,19 +940,18 @@
 	}
 
 	tcb->mss = tcb->cwind = mss;
+	tcb->abcbytes = 0;
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* default is no window scaling */
-	tcb->window = QMAX;
-	tcb->rcv.wnd = QMAX;
-	tcb->rcv.scale = 0;
-	tcb->snd.scale = 0;
-	qsetlimit(s->rq, QMAX);
+	tcpsetscale(s, tcb, 0, 0);
 }
 
 /*
  *  called with s qlocked
  */
-void
+static void
 tcpstart(Conv *s, int mode)
 {
 	Tcpctl *tcb;
@@ -865,8 +963,8 @@
 	if(tpriv->ackprocstarted == 0){
 		qlock(&tpriv->apl);
 		if(tpriv->ackprocstarted == 0){
-			sprint(kpname, "#I%dtcpack", s->p->f->dev);
-			kproc(kpname, tcpackproc, s->p, 0);
+			snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+			kproc(kpname, tcpackproc, s->p);
 			tpriv->ackprocstarted = 1;
 		}
 		qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
 }
 
 static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
 {
-	static char buf[128];
+	char *p;
 
-	sprint(buf, "%d", flag>>10);	/* Head len */
+	p = seprint(buf, e, "%d", flag>>10);	/* Head len */
 	if(flag & URG)
-		strcat(buf, " URG");
+		p = seprint(p, e, " URG");
 	if(flag & ACK)
-		strcat(buf, " ACK");
+		p = seprint(p, e, " ACK");
 	if(flag & PSH)
-		strcat(buf, " PSH");
+		p = seprint(p, e, " PSH");
 	if(flag & RST)
-		strcat(buf, " RST");
+		p = seprint(p, e, " RST");
 	if(flag & SYN)
-		strcat(buf, " SYN");
+		p = seprint(p, e, " SYN");
 	if(flag & FIN)
-		strcat(buf, " FIN");
-
+		p = seprint(p, e, " FIN");
+	USED(p);
 	return buf;
 }
 
-Block *
+static Block*
 htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -940,14 +1038,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP6_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP6_PKT;
 	}
 
@@ -1000,7 +1094,7 @@
 	return data;
 }
 
-Block *
+static Block*
 htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -1013,7 +1107,7 @@
 	if(tcph->flags & SYN){
 		if(tcph->mss)
 			hdrlen += MSS_LENGTH;
-		if(tcph->ws)
+		if(1)
 			hdrlen += WS_LENGTH;
 		optpad = hdrlen & 3;
 		if(optpad)
@@ -1024,14 +1118,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP4_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP4_PKT;
 	}
 
@@ -1055,7 +1145,8 @@
 			hnputs(opt, tcph->mss);
 			opt += 2;
 		}
-		if(tcph->ws != 0){
+		/* always offer.  rfc1323 §2.2 */
+		if(1){
 			*opt++ = WSOPT;
 			*opt++ = WS_LENGTH;
 			*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
 	return data;
 }
 
-int
+static int
 ntohtcp6(Tcp *tcph, Block **bpp)
 {
 	Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->ploadlen) - hdrlen;
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1136,7 +1228,7 @@
 	return hdrlen;
 }
 
-int
+static int
 ntohtcp4(Tcp *tcph, Block **bpp)
 {
 	Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1200,16 +1293,19 @@
 }
 
 /*
- *  For outgiing calls, generate an initial sequence
+ *  For outgoing calls, generate an initial sequence
  *  number and put a SYN on the send queue
  */
-void
+static void
 tcpsndsyn(Conv *s, Tcpctl *tcb)
 {
+	Tcppriv *tpriv;
+
 	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
 	tcb->rttseq = tcb->iss;
 	tcb->snd.wl2 = tcb->iss;
 	tcb->snd.una = tcb->iss;
+	tcb->snd.rxt = tcb->iss;
 	tcb->snd.ptr = tcb->rttseq;
 	tcb->snd.nxt = tcb->rttseq;
 	tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
 	tcb->sndsyntime = NOW;
 
 	/* set desired mss and scale */
-	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+	tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 }
 
 void
@@ -1229,7 +1327,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 
-	netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+	netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
 
 	tpriv = tcp->priv;
 
@@ -1307,7 +1405,7 @@
  *  send a reset to the remote side and close the conversation
  *  called with s qlocked
  */
-char*
+static char*
 tcphangup(Conv *s)
 {
 	Tcp seg;
@@ -1322,7 +1420,7 @@
 			memset(&seg, 0, sizeof seg);
 			seg.flags = RST | ACK;
 			seg.ack = tcb->rcv.nxt;
-			tcb->rcv.una = 0;
+			tcb->rcv.ackptr = seg.ack;
 			seg.seq = tcb->snd.ptr;
 			seg.wnd = 0;
 			seg.urg = 0;
@@ -1353,7 +1451,7 @@
 /*
  *  (re)send a SYN ACK
  */
-int
+static int
 sndsynack(Proto *tcp, Limbo *lp)
 {
 	Block *hbp;
@@ -1360,7 +1458,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 	Tcp seg;
-	int scale;
+	uint scale;
 
 	/* make pseudo header */
 	switch(lp->version) {
@@ -1388,11 +1486,12 @@
 		panic("sndrst: version %d", lp->version);
 	}
 
+	memset(&seg, 0, sizeof seg);
 	seg.seq = lp->iss;
 	seg.ack = lp->irs+1;
 	seg.flags = SYN|ACK;
 	seg.urg = 0;
-	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+	seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
 	seg.wnd = QMAX;
 
 	/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
 	}
 }
 
+static void
+initialwindow(Tcpctl *tcb)
+{
+	/* RFC 3390 initial window */
+	if(tcb->mss < 1095)
+		tcb->cwind = 4*tcb->mss;
+	else if(tcb->mss < 2190)
+		tcb->cwind = 4380;
+	else
+		tcb->cwind = 2*tcb->mss;
+}
+
 /*
  *  come here when we finally get an ACK to our SYN-ACK.
  *  lookup call in limbo.  if found, create a new conversation
@@ -1596,7 +1707,7 @@
 	/* find a call in limbo */
 	h = hashipa(src, segp->source);
 	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
-		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+		netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
 			src, segp->source, lp->raddr, lp->rport,
 			dst, segp->dest, lp->laddr, lp->lport,
 			version, lp->version
@@ -1611,7 +1722,7 @@
 
 		/* we're assuming no data with the initial SYN */
 		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
-			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
 				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
 			lp = nil;
 		} else {
@@ -1641,6 +1752,8 @@
 
 	tcb->irs = lp->irs;
 	tcb->rcv.nxt = tcb->irs+1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 
 	tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
 	tcb->snd.una = tcb->iss+1;
 	tcb->snd.ptr = tcb->iss+1;
 	tcb->snd.nxt = tcb->iss+1;
+	tcb->snd.rxt = tcb->iss+1;
 	tcb->flgcnt = 0;
 	tcb->flags |= SYNACK;
 
+	/* set desired mss and scale */
+	tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
 	/* our sending max segment size cannot be bigger than what he asked for */
 	if(lp->mss != 0 && lp->mss < tcb->mss)
 		tcb->mss = lp->mss;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* window scaling */
 	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
 
-	/* the congestion window always starts out as a single segment */
+	/* congestion window */
 	tcb->snd.wnd = segp->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 
 	/* set initial round trip time */
 	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
 	return new;
 }
 
-int
+static int
 seq_within(ulong x, ulong low, ulong high)
 {
 	if(low <= high){
@@ -1714,25 +1832,25 @@
 	return 0;
 }
 
-int
+static int
 seq_lt(ulong x, ulong y)
 {
 	return (int)(x-y) < 0;
 }
 
-int
+static int
 seq_le(ulong x, ulong y)
 {
 	return (int)(x-y) <= 0;
 }
 
-int
+static int
 seq_gt(ulong x, ulong y)
 {
 	return (int)(x-y) > 0;
 }
 
-int
+static int
 seq_ge(ulong x, ulong y)
 {
 	return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
  *  use the time between the first SYN and it's ack as the
  *  initial round trip time
  */
-void
+static void
 tcpsynackrtt(Conv *s)
 {
 	Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
 	tcphalt(tpriv, &tcb->rtt_timer);
 }
 
-void
+static void
 update(Conv *s, Tcp *seg)
 {
 	int rtt, delta;
 	Tcpctl *tcb;
 	ulong acked;
-	ulong expand;
 	Tcppriv *tpriv;
 
+	if(seg->update)
+		return;
+	seg->update = 1;
+
 	tpriv = s->p->priv;
 	tcb = (Tcpctl*)s->ptcl;
 
-	/* if everything has been acked, force output(?) */
-	if(seq_gt(seg->ack, tcb->snd.nxt)) {
-		tcb->flags |= FORCE;
-		return;
+	/* catch zero-window updates, update window & recover */
+	if(tcb->snd.wnd == 0 && seg->wnd > 0)
+	if(seq_lt(seg->ack,  tcb->snd.ptr)){
+		netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+			seg->ack,  tcb->snd.una, tcb->snd.ptr, seg->wnd);
+		tcb->snd.wnd = seg->wnd;
+		goto recovery;
 	}
 
-	/* added by Dong Lin for fast retransmission */
-	if(seg->ack == tcb->snd.una
-	&& tcb->snd.una != tcb->snd.nxt
-	&& seg->len == 0
-	&& seg->wnd == tcb->snd.wnd) {
-
-		/* this is a pure ack w/o window update */
-		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
-			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
-		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
-			/*
-			 *  tahoe tcp rxt the packet, half sshthresh,
- 			 *  and set cwnd to one packet
-			 */
+	/* newreno fast retransmit */
+	if(seg->ack == tcb->snd.una)
+	if(tcb->snd.una != tcb->snd.nxt)
+	if(++tcb->snd.dupacks == 3){
+recovery:
+		if(tcb->snd.recovery){
+			tpriv->stats[RecoveryCwind]++;
+			tcb->cwind += tcb->mss;
+		}else if(seq_le(tcb->snd.rxt, seg->ack)){
+			tpriv->stats[Recovery]++;
+			tcb->abcbytes = 0;
 			tcb->snd.recovery = 1;
+			tcb->snd.partialack = 0;
 			tcb->snd.rxt = tcb->snd.nxt;
-			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+			tcpcongestion(tcb);
+			tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+			netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+				tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
 			tcprxmit(s);
-		} else {
-			/* do reno tcp here. */
+		}else{
+			tpriv->stats[RecoveryNoSeq]++;
+			netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+				tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+			/* do not enter fast retransmit */
+			/* do not change ssthresh */
 		}
+	}else if(tcb->snd.recovery){
+		tpriv->stats[RecoveryCwind]++;
+		tcb->cwind += tcb->mss;
 	}
 
 	/*
@@ -1807,6 +1938,9 @@
 	 */
 	if(seq_gt(seg->ack, tcb->snd.wl2)
 	|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+		/* clear dupack if we advance wl2 */
+		if(tcb->snd.wl2 != seg->ack)
+			tcb->snd.dupacks = 0;
 		tcb->snd.wnd = seg->wnd;
 		tcb->snd.wl2 = seg->ack;
 	}
@@ -1816,22 +1950,11 @@
 		 *  don't let us hangup if sending into a closed window and
 		 *  we're still getting acks
 		 */
-		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
 			tcb->backedoff = MAXBACKMS/4;
-		}
 		return;
 	}
 
-	/*
-	 *  any positive ack turns off fast rxt,
-	 *  (should we do new-reno on partial acks?)
-	 */
-	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
-		tcb->snd.dupacks = 0;
-		tcb->snd.recovery = 0;
-	} else
-		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
 	/* Compute the new send window size */
 	acked = seg->ack - tcb->snd.una;
 
@@ -1843,24 +1966,41 @@
 		goto done;
 	}
 
-	/* slow start as long as we're not recovering from lost packets */
-	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
-		if(tcb->cwind < tcb->ssthresh) {
-			expand = tcb->mss;
-			if(acked < expand)
-				expand = acked;
+	/*
+	 *  congestion control
+	 */
+	if(tcb->snd.recovery){
+		if(seq_ge(seg->ack, tcb->snd.rxt)){
+			/* recovery finished; deflate window */
+			tpriv->stats[RecoveryDone]++;
+			tcb->snd.dupacks = 0;
+			tcb->snd.recovery = 0;
+			tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+			if(tcb->ssthresh < tcb->cwind)
+				tcb->cwind = tcb->ssthresh;
+			netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+				tcb->cwind, tcb->ssthresh);
+		} else {
+			/* partial ack; we lost more than one segment */
+			tpriv->stats[RecoveryPA]++;
+			if(tcb->cwind > acked)
+				tcb->cwind -= acked;
+			else{
+				netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+				tcb->cwind = tcb->mss;
+			}
+			netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+				acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+			if(acked >= tcb->mss)
+				tcb->cwind += tcb->mss;
+			tcb->snd.partialack++;
 		}
-		else
-			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+	} else
+		tcpabcincr(tcb, acked);
 
-		if(tcb->cwind + expand < tcb->cwind)
-			expand = tcb->snd.wnd - tcb->cwind;
-		if(tcb->cwind + expand > tcb->snd.wnd)
-			expand = tcb->snd.wnd - tcb->cwind;
-		tcb->cwind += expand;
-	}
-
 	/* Adjust the timers according to the round trip time */
+	/* todo: fix sloppy treatment of overflow cases here. */
 	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
 		tcphalt(tpriv, &tcb->rtt_timer);
 		if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
 done:
 	if(qdiscard(s->wq, acked) < acked)
 		tcb->flgcnt--;
-
 	tcb->snd.una = seg->ack;
+
+	/* newreno fast recovery */
+	if(tcb->snd.recovery)
+		tcprxmit(s);
+
 	if(seq_gt(seg->ack, tcb->snd.urg))
 		tcb->snd.urg = seg->ack;
 
-	if(tcb->snd.una != tcb->snd.nxt)
-		tcpgo(tpriv, &tcb->timer);
+	if(tcb->snd.una != tcb->snd.nxt){
+		/* “impatient” variant */
+		if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+			tcb->time = NOW;
+			tcb->timeuna = tcb->snd.una;
+			tcpgo(tpriv, &tcb->timer);
+		}
+	}
 	else
 		tcphalt(tpriv, &tcb->timer);
 
@@ -1904,12 +2054,13 @@
 	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
 		tcb->snd.ptr = tcb->snd.una;
 
-	tcb->flags &= ~RETRAN;
+	if(!tcb->snd.recovery)
+		tcb->flags &= ~RETRAN;
 	tcb->backoff = 0;
 	tcb->backedoff = 0;
 }
 
-void
+static void
 tcpiput(Proto *tcp, Ipifc*, Block *bp)
 {
 	Tcp seg;
@@ -1917,7 +2068,7 @@
 	Tcp6hdr *h6;
 	int hdrlen;
 	Tcpctl *tcb;
-	ushort length;
+	ushort length, csum;
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	Conv *s;
 	Fs *f;
@@ -1980,10 +2131,12 @@
 		h6->ttl = proto;
 		hnputl(h6->vcf, length);
 		if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
-			ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+		    (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
 			tpriv->stats[CsumErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp proto cksum\n");
+			netlog(f, Logtcp,
+			    "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+				h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
 			freeblist(bp);
 			return;
 		}
@@ -1995,7 +2148,7 @@
 		if(hdrlen < 0){
 			tpriv->stats[HlenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp hdr len\n");
+			netlog(f, Logtcp, "bad tcpv6 hdr len\n");
 			return;
 		}
 
@@ -2005,7 +2158,7 @@
 		if(bp == nil){
 			tpriv->stats[LenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "tcp len < 0 after trim\n");
+			netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
 			return;
 		}
 	}
@@ -2016,7 +2169,8 @@
 	/* Look for a matching conversation */
 	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
 	if(s == nil){
-		netlog(f, Logtcp, "iphtlook failed");
+		netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+			source, seg.source, dest, seg.dest);
 reset:
 		qunlock(tcp);
 		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
 	}
 
 	/* Cut the data to fit the receive window */
+	tcprcvwin(s);
 	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
-		netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+		if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+		netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n", 
+			seg.seq, seg.seq + length - 1, 
+			tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
 		update(s, &seg);
 		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
 			tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
 	if(seg.seq != tcb->rcv.nxt)
 	if(length != 0 || (seg.flags & (SYN|FIN))) {
 		update(s, &seg);
-		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+		if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
 			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
-		tcb->flags |= FORCE;
+		tcb->flags |= FORCE;		/* force duplicate ack; RFC 5681 §3.2 */
 		goto output;
 	}
 
+	if(tcb->nreseq > 0)
+		tcb->flags |= FORCE;		/* filled hole in sequence space; RFC 5681 §3.2 */
+
 	/*
 	 *  keep looping till we've processed this packet plus any
 	 *  adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
 				goto raise;
 			}
 		case Time_wait:
-			tcb->flags |= FORCE;
+			if(seg.flags & FIN)
+				tcb->flags |= FORCE;
 			if(tcb->timer.state != TcptimerON)
 				tcpgo(tpriv, &tcb->timer);
 		}
@@ -2272,34 +2434,12 @@
 				 * receive queue
 				 */
 				if(bp) {
-					bp = packblock(bp);
-					if(bp == nil)
-						panic("tcp packblock");
-					qpassnolim(s->rq, bp);
+					qpassnolim(s->rq, packblock(bp));
 					bp = nil;
-
-					/*
-					 *  Force an ack every 2 data messages.  This is
-					 *  a hack for rob to make his home system run
-					 *  faster.
-					 *
-					 *  this also keeps the standard TCP congestion
-					 *  control working since it needs an ack every
-					 *  2 max segs worth.  This is not quite that,
-					 *  but under a real stream is equivalent since
-					 *  every packet has a max seg in it.
-					 */
-					if(++(tcb->rcv.una) >= 2)
-						tcb->flags |= FORCE;
 				}
 				tcb->rcv.nxt += length;
 
 				/*
-				 *  update our rcv window
-				 */
-				tcprcvwin(s);
-
-				/*
 				 *  turn on the acktimer if there's something
 				 *  to ack
 				 */
@@ -2373,8 +2513,11 @@
 
 			getreseq(tcb, &seg, &bp, &length);
 
-			if(tcptrim(tcb, &seg, &bp, &length) == 0)
+			tcprcvwin(s);
+			if(tcptrim(tcb, &seg, &bp, &length) == 0){
+				tcb->flags |= FORCE;
 				break;
+			}
 		}
 	}
 output:
@@ -2394,15 +2537,15 @@
  *  the lock to ipoput the packet so some care has to be
  *  taken by callers.
  */
-void
+static void
 tcpoutput(Conv *s)
 {
 	Tcp seg;
-	int msgs;
+	uint msgs;
 	Tcpctl *tcb;
 	Block *hbp, *bp;
-	int sndcnt, n;
-	ulong ssize, dsize, usable, sent;
+	int sndcnt;
+	ulong ssize, dsize, sent;
 	Fs *f;
 	Tcppriv *tpriv;
 	uchar version;
@@ -2411,9 +2554,26 @@
 	tpriv = s->p->priv;
 	version = s->ipversion;
 
-	for(msgs = 0; msgs < 100; msgs++) {
-		tcb = (Tcpctl*)s->ptcl;
+	tcb = (Tcpctl*)s->ptcl;
 
+	/* force ack every 2*mss */
+	if((tcb->flags & FORCE) == 0)
+	if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+		tpriv->stats[Delayack]++;
+		tcb->flags |= FORCE;
+	}
+
+	/* force ack if window opening */
+	if(0)
+	if((tcb->flags & FORCE) == 0){
+		tcprcvwin(s);
+		if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+			tpriv->stats[Wopenack]++;
+			tcb->flags |= FORCE;
+		}
+	}
+
+	for(msgs = 0; msgs < 100; msgs++) {
 		switch(tcb->state) {
 		case Listen:
 		case Closed:
@@ -2421,7 +2581,12 @@
 			return;
 		}
 
+		/* Don't send anything else until our SYN has been acked */
+		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+			break;
+
 		/* force an ack when a window has opened up */
+		tcprcvwin(s);
 		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
 			tcb->rcv.blocked = 0;
 			tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
 
 		sndcnt = qlen(s->wq)+tcb->flgcnt;
 		sent = tcb->snd.ptr - tcb->snd.una;
-
-		/* Don't send anything else until our SYN has been acked */
-		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
-			break;
-
-		/* Compute usable segment based on offered window and limit
-		 * window probes to one
-		 */
+		ssize = sndcnt;
 		if(tcb->snd.wnd == 0){
-			if(sent != 0) {
-				if((tcb->flags&FORCE) == 0)
-					break;
-//				tcb->snd.ptr = tcb->snd.una;
+			/* zero window probe */
+			if(sent > 0)
+			if(!(tcb->flags & FORCE))
+				break;	/* already probing, rto re-probes */
+			if(ssize < sent)
+				ssize = 0;
+			else{
+				ssize -= sent;
+				if(ssize > 0)
+					ssize = 1;
 			}
-			usable = 1;
+		} else {
+			/* calculate usable segment size */
+			if(ssize > tcb->cwind)
+				ssize = tcb->cwind;
+			if(ssize > tcb->snd.wnd)
+				ssize = tcb->snd.wnd;
+
+			if(ssize < sent)
+				ssize = 0;
+			else {
+				ssize -= sent;
+				if(ssize > tcb->mss)
+					ssize = tcb->mss;
+			}
 		}
-		else {
-			usable = tcb->cwind;
-			if(tcb->snd.wnd < usable)
-				usable = tcb->snd.wnd;
-			usable -= sent;
-		}
-		ssize = sndcnt-sent;
-		if(ssize && usable < 2)
-			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
-				tcb->snd.wnd, tcb->cwind);
-		if(usable < ssize)
-			ssize = usable;
-		if(tcb->mss < ssize)
-			ssize = tcb->mss;
+
 		dsize = ssize;
 		seg.urg = 0;
 
-		if(ssize == 0)
-		if((tcb->flags&FORCE) == 0)
-			break;
+		if(!(tcb->flags & FORCE)){
+			if(ssize == 0)
+				break;
+			if(ssize < tcb->mss)
+			if(tcb->snd.nxt == tcb->snd.ptr)
+			if(sent > TCPREXMTTHRESH*tcb->mss)
+				break;
+		}
 
 		tcb->flags &= ~FORCE;
-		tcprcvwin(s);
 
 		/* By default we will generate an ack */
 		tcphalt(tpriv, &tcb->acktimer);
-		tcb->rcv.una = 0;
 		seg.source = s->lport;
 		seg.dest = s->rport;
 		seg.flags = ACK;
 		seg.mss = 0;
 		seg.ws = 0;
+		seg.update = 0;
 		switch(tcb->state){
 		case Syn_sent:
 			seg.flags = 0;
@@ -2516,20 +2684,9 @@
 			}
 		}
 
-		if(sent+dsize == sndcnt)
+		if(sent+dsize == sndcnt && dsize)
 			seg.flags |= PSH;
 
-		/* keep track of balance of resent data */
-		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
-			n = tcb->snd.nxt - tcb->snd.ptr;
-			if(ssize < n)
-				n = ssize;
-			tcb->resent += n;
-			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
-				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
-			tpriv->stats[RetransSegs]++;
-		}
-
 		tcb->snd.ptr += ssize;
 
 		/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
 		 * expect acknowledges
 		 */
 		if(ssize != 0){
-			if(tcb->timer.state != TcptimerON)
+			if(tcb->timer.state != TcptimerON){
+				tcb->time = NOW;
+				tcb->timeuna = tcb->snd.una;
 				tcpgo(tpriv, &tcb->timer);
+			}
 
 			/*  If round trip timer isn't running, start it.
 			 *  measure the longest packet only in case the
 			 *  transmission time dominates RTT
 			 */
+			if(tcb->snd.retransmit == 0)
 			if(tcb->rtt_timer.state != TcptimerON)
 			if(ssize == tcb->mss) {
 				tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
 		}
 
 		tpriv->stats[OutSegs]++;
+		if(tcb->snd.retransmit)
+			tpriv->stats[RetransSegsSent]++;
+		tcb->rcv.ackptr = seg.ack;
+		tcb->rcv.wsnt = tcb->rcv.wptr;
 
 		/* put off the next keep alive */
 		tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
 		default:
 			panic("tcpoutput2: version %d", version);
 		}
-		if((msgs%4) == 1){
+		if((msgs%4) == 3){
 			qunlock(s);
-			sched();
 			qlock(s);
 		}
 	}
@@ -2611,7 +2775,7 @@
 /*
  *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
  */
-void
+static void
 tcpsendka(Conv *s)
 {
 	Tcp seg;
@@ -2621,6 +2785,7 @@
 	tcb = (Tcpctl*)s->ptcl;
 
 	dbp = nil;
+	memset(&seg, 0, sizeof seg);
 	seg.urg = 0;
 	seg.source = s->lport;
 	seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
 	else
 		seg.seq = tcb->snd.una-1;
 	seg.ack = tcb->rcv.nxt;
-	tcb->rcv.una = 0;
+	tcb->rcv.ackptr = seg.ack;
+	tcprcvwin(s);
 	seg.wnd = tcb->rcv.wnd;
 	if(tcb->state == Finwait2){
 		seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
 /*
  *  set connection to time out after 12 minutes
  */
-void
+static void
 tcpsetkacounter(Tcpctl *tcb)
 {
 	tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
  *  if we've timed out, close the connection
  *  otherwise, send a keepalive and restart the timer
  */
-void
+static void
 tcpkeepalive(void *v)
 {
 	Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
 /*
  *  start keepalive timer
  */
-char*
+static char*
 tcpstartka(Conv *s, char **f, int n)
 {
 	Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
 /*
  *  turn checksums on/off
  */
-char*
+static char*
 tcpsetchecksum(Conv *s, char **f, int)
 {
 	Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
 	return nil;
 }
 
-void
+/*
+ *  retransmit (at most) one segment at snd.una.
+ *  preserve cwind & snd.ptr
+ */
+static void
 tcprxmit(Conv *s)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	ulong tcwind, tptr;
 
 	tcb = (Tcpctl*)s->ptcl;
-
 	tcb->flags |= RETRAN|FORCE;
-	tcb->snd.ptr = tcb->snd.una;
 
-	/*
-	 *  We should be halving the slow start threshhold (down to one
-	 *  mss) but leaving it at mss seems to work well enough
-	 */
- 	tcb->ssthresh = tcb->mss;
-
-	/*
-	 *  pull window down to a single packet
-	 */
+	tptr = tcb->snd.ptr;
+	tcwind = tcb->cwind;
+	tcb->snd.ptr = tcb->snd.una;
 	tcb->cwind = tcb->mss;
+	tcb->snd.retransmit = 1;
 	tcpoutput(s);
+	tcb->snd.retransmit = 0;
+	tcb->cwind = tcwind;
+	tcb->snd.ptr = tptr;
+
+	tpriv = s->p->priv;
+	tpriv->stats[RetransSegs]++;
 }
 
-void
+/*
+ *  todo: RFC 4138 F-RTO
+ */
+static void
 tcptimeout(void *arg)
 {
 	Conv *s;
@@ -2792,11 +2966,29 @@
 			localclose(s, Etimedout);
 			break;
 		}
-		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+		netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+			tcb->srtt, tcb->mdev, NOW-tcb->time,
+			tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+			tcpstates[s->state]);
 		tcpsettimer(tcb);
+		if(tcb->snd.rto == 0)
+			tcpcongestion(tcb);
 		tcprxmit(s);
+		tcb->snd.ptr = tcb->snd.una;
+		tcb->cwind = tcb->mss;
+		tcb->snd.rto = 1;
 		tpriv->stats[RetransTimeouts]++;
-		tcb->snd.dupacks = 0;
+
+		if(tcb->snd.recovery){
+			tcb->snd.dupacks = 0;			/* reno rto */
+			tcb->snd.recovery = 0;
+			tpriv->stats[RecoveryRTO]++;
+			tcb->snd.rxt = tcb->snd.nxt;
+			netlog(s->p->f, Logtcpwin,
+				"rto recovery rxt @%lud\n", tcb->snd.nxt);
+		}
+
+		tcb->abcbytes = 0;
 		break;
 	case Time_wait:
 		localclose(s, nil);
@@ -2808,7 +3000,7 @@
 	poperror();
 }
 
-int
+static int
 inwindow(Tcpctl *tcb, int seq)
 {
 	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
 /*
  *  set up state for a received SYN (or SYN ACK) packet
  */
-void
+static void
 procsyn(Conv *s, Tcp *seg)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
 
 	tcb = (Tcpctl*)s->ptcl;
 	tcb->flags |= FORCE;
 
 	tcb->rcv.nxt = seg->seq + 1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 	tcb->irs = seg->seq;
 
 	/* our sending max segment size cannot be bigger than what he asked for */
-	if(seg->mss != 0 && seg->mss < tcb->mss)
+	if(seg->mss != 0 && seg->mss < tcb->mss) {
 		tcb->mss = seg->mss;
+		tpriv = s->p->priv;
+		tpriv->stats[Mss] = tcb->mss;
+	}
 
-	/* the congestion window always starts out as a single segment */
+	/* if the server does not support ws option, disable window scaling */
+	if(seg->ws == 0){
+		tcb->scale = 0;
+		tcb->snd.scale = 0;
+	}
+
 	tcb->snd.wnd = seg->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 }
 
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
 {
-	Reseq *rp, *rp1;
-	int i, rqlen, qmax;
+	Reseq *r, *next;
 
+	for(r = tcb->reseq; r != nil; r = next){
+		next = r->next;
+		freeblist(r->bp);
+		free(r);
+	}
+	tcb->reseq = nil;
+	tcb->nreseq = 0;
+	tcb->reseqlen = 0;
+	return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+	char *s;
+
+	for(; r != nil; r = r->next){
+		s = nil;
+		if(r->next == nil && r->seg.seq != n)
+			s = "hole/end";
+		else if(r->next == nil)
+			s = "end";
+		else if(r->seg.seq != n)
+			s = "hole";
+		if(s != nil)
+			netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+				n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+		n = r->seg.seq + r->seg.len;
+	}
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+	Reseq *rp, **rr;
+	int qmax;
+
 	rp = malloc(sizeof(Reseq));
 	if(rp == nil){
-		freeblist(bp);	/* bp always consumed by add_reseq */
+		freeblist(bp);	/* bp always consumed by addreseq */
 		return 0;
 	}
 
@@ -2854,56 +3093,39 @@
 	rp->bp = bp;
 	rp->length = length;
 
-	/* Place on reassembly list sorting by starting seq number */
-	rp1 = tcb->reseq;
-	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
-		rp->next = rp1;
-		tcb->reseq = rp;
-		if(rp->next != nil)
-			tpriv->stats[OutOfOrder]++;
-		return 0;
-	}
+	tcb->reseqlen += length;
+	tcb->nreseq++;
 
-	rqlen = 0;
-	for(i = 0;; i++) {
-		rqlen += rp1->length;
-		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
-			rp->next = rp1->next;
-			rp1->next = rp;
+	/* Place on reassembly list sorting by starting seq number */
+	for(rr = &tcb->reseq;; rr = &(*rr)->next)
+		if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+			rp->next = *rr;
+			*rr = rp;
+			tpriv->stats[Resequenced]++;
 			if(rp->next != nil)
 				tpriv->stats[OutOfOrder]++;
 			break;
 		}
-		rp1 = rp1->next;
-	}
-	qmax = QMAX<<tcb->rcv.scale;
-	if(rqlen > qmax){
-		print("resequence queue > window: %d > %d\n", rqlen, qmax);
-		i = 0;
-	  	for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
-	  		print("%#lux %#lux %#ux\n", rp1->seg.seq,
-	  			rp1->seg.ack, rp1->seg.flags);
-			if(i++ > 10){
-				print("...\n");
-				break;
-			}
-		}
 
-		// delete entire reassembly queue; wait for retransmit.
-		// - should we be smarter and only delete the tail?
-		for(rp = tcb->reseq; rp != nil; rp = rp1){
-			rp1 = rp->next;
-			freeblist(rp->bp);
-			free(rp);
-		}
-		tcb->reseq = nil;
-
-	  	return -1;
+	qmax = tcb->window;
+	if(tcb->reseqlen > qmax){
+		netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqBytelim]++;
+		return dumpreseq(tcb);
 	}
+	qmax = tcb->window / tcb->mss;		/* ~190 for qscale==2, 390 for qscale=3 */
+	if(tcb->nreseq > qmax){
+		netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqPktlim]++;
+		return dumpreseq(tcb);
+	}
+
 	return 0;
 }
 
-void
+static void
 getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	Reseq *rp;
@@ -2918,10 +3140,13 @@
 	*bp = rp->bp;
 	*length = rp->length;
 
+	tcb->nreseq--;
+	tcb->reseqlen -= rp->length;
+
 	free(rp);
 }
 
-int
+static int
 tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	ushort len;
@@ -2992,7 +3217,7 @@
 	return 0;
 }
 
-void
+static void
 tcpadvise(Proto *tcp, Block *bp, char *msg)
 {
 	Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
 		v4tov6(source, h4->tcpsrc);
 		psource = nhgets(h4->tcpsport);
 		pdest = nhgets(h4->tcpdport);
-	}
-	else {
+	} else {
 		ipmove(dest, h6->tcpdst);
 		ipmove(source, h6->tcpsrc);
 		psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
 
 	/* Look for a connection */
 	qlock(tcp);
-	for(p = tcp->conv; *p; p++) {
-		s = *p;
+	for(p = tcp->conv; (s = *p) != nil; p++) {
 		tcb = (Tcpctl*)s->ptcl;
 		if(s->rport == pdest)
 		if(s->lport == psource)
@@ -3029,6 +3252,8 @@
 		if(tcb->state != Closed)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qlock(s);
 			qunlock(tcp);
 			switch(tcb->state){
@@ -3058,9 +3283,11 @@
 }
 
 /* called with c qlocked */
-char*
+static char*
 tcpctl(Conv* c, char** f, int n)
 {
+	if(n == 1 && strcmp(f[0], "close") == 0)
+		return tcpclose(c), nil;
 	if(n == 1 && strcmp(f[0], "hangup") == 0)
 		return tcphangup(c);
 	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
 	return "unknown control request";
 }
 
-int
+static int
 tcpstats(Proto *tcp, char *buf, int len)
 {
 	Tcppriv *priv;
@@ -3083,7 +3310,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -3096,7 +3323,7 @@
  *  of questionable validity so we try to use them only when we're
  *  up against the wall.
  */
-int
+static int
 tcpgc(Proto *tcp)
 {
 	Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
 	Tcpctl *tcb;
 
 
-	n = natgc(tcp->ipproto);
+	n = 0;
 	ep = &tcp->conv[tcp->nc];
 	for(pp = tcp->conv; pp < ep; pp++) {
 		c = *pp;
@@ -3116,13 +3343,13 @@
 		switch(tcb->state){
 		case Syn_received:
 			if(NOW - tcb->time > 5000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
 		case Finwait2:
 			if(NOW - tcb->time > 5*60*1000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
@@ -3132,7 +3359,7 @@
 	return n;
 }
 
-void
+static void
 tcpsettimer(Tcpctl *tcb)
 {
 	int x;
@@ -3141,9 +3368,9 @@
 	x = backoff(tcb->backoff) *
 		(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
 
-	/* bounded twixt 1/2 and 64 seconds */
-	if(x < 500/MSPTICK)
-		x = 500/MSPTICK;
+	/* bounded twixt 0.3 and 64 seconds */
+	if(x < 300/MSPTICK)
+		x = 300/MSPTICK;
 	else if(x > (64000/MSPTICK))
 		x = 64000/MSPTICK;
 	tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
 	Fsproto(fs, tcp);
 }
 
-void
+static void
 tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
 {
-	if(rcvscale){
-		tcb->rcv.scale = rcvscale & 0xff;
-		tcb->snd.scale = sndscale & 0xff;
-		tcb->window = QMAX<<tcb->snd.scale;
-		qsetlimit(s->rq, tcb->window);
-	} else {
-		tcb->rcv.scale = 0;
-		tcb->snd.scale = 0;
-		tcb->window = QMAX;
-		qsetlimit(s->rq, tcb->window);
-	}
+	/*
+	 * guess at reasonable queue sizes.  there's no current way 
+	 * to know how many nic receive buffers we can safely tie up in the
+	 * tcp stack, and we don't adjust our queues to maximize throughput
+	 * and minimize bufferbloat.  n.b. the offer (rcvscale) needs to be
+	 * respected, but we still control our own buffer commitment by
+	 * keeping a seperate qscale.
+	 */
+	tcb->rcv.scale = rcvscale & 0xff;
+	tcb->snd.scale = sndscale & 0xff;
+	tcb->qscale = rcvscale & 0xff;
+	if(rcvscale > Maxqscale)
+		tcb->qscale = Maxqscale;
+
+	if(rcvscale != tcb->rcv.scale)
+		netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+			tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+	tcb->window = QMAX<<tcb->qscale;
+	tcb->ssthresh = tcb->window;
+
+	/*
+	 * it's important to set wq large enough to cover the full
+	 * bandwidth-delay product.  it's possible to be in loss
+	 * recovery with a big window, and we need to keep sending
+	 * into the inflated window.  the difference can be huge
+	 * for even modest (70ms) ping times.
+	 */
+	qsetlimit(s->rq, QMAX<<tcb->qscale);
+	qsetlimit(s->wq, QMAX<<tcb->qscale);
+	tcprcvwin(s);
 }
--- a/os/ip/udp.c
+++ b/os/ip/udp.c
@@ -24,7 +24,6 @@
 
 	IP_UDPPROTO	= 17,
 	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
 
 	Udprxms		= 200,
 	Udptickms	= 100,
@@ -40,7 +39,7 @@
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	udpproto;	/* Protocol */
 	uchar	udpplen[2];	/* Header plus data length */
 	uchar	udpsrc[IPv4addrlen];	/* Ip source */
@@ -73,10 +72,10 @@
 typedef struct Udpstats Udpstats;
 struct Udpstats
 {
-	ulong	udpInDatagrams;
+	uvlong	udpInDatagrams;
 	ulong	udpNoPorts;
 	ulong	udpInErrors;
-	ulong	udpOutDatagrams;
+	uvlong	udpOutDatagrams;
 };
 
 typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
 typedef struct Udpcb Udpcb;
 struct Udpcb
 {
-	QLock;
 	uchar	headers;
 };
 
@@ -125,7 +123,7 @@
 static int
 udpstate(Conv *c, char *state, int n)
 {
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		c->inuse ? "Open" : "Closed",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
 static void
 udpcreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->rq = qopen(512*1024, Qmsg, 0, 0);
 	c->wq = qbypass(udpkick, c);
 }
 
@@ -175,8 +173,6 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	ucb->headers = 0;
-
-	qunlock(c);
 }
 
 void
@@ -192,12 +188,13 @@
 	Udppriv *upriv;
 	Fs *f;
 	int version;
-	Conv *rc;
+	Routehint *rh;
+	ushort csum;
 
 	upriv = c->p->priv;
 	f = c->p->f;
 
-	netlog(c->p->f, Logudp, "udp: kick\n");
+//	netlog(c->p->f, Logudp, "udp: kick\n");	/* frequent and uninteresting */
 	if(bp == nil)
 		return;
 
@@ -219,21 +216,6 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-		bp->rp += 2+2;			/* Ignore local port */
-		break;
 	default:
 		rport = 0;
 		break;
@@ -240,18 +222,12 @@
 	}
 
 	if(ucb->headers) {
-		if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
-		    ipcmp(laddr, IPnoaddr) == 0)
+		if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
 			version = V4;
 		else
 			version = V6;
 	} else {
-		if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-			memcmp(c->laddr, v4prefix, IPv4off) == 0)
-			|| ipcmp(c->raddr, IPnoaddr) == 0)
-			version = V4;
-		else
-			version = V6;
+		version = convipvers(c);
 	}
 
 	dlen = blocklen(bp);
@@ -260,9 +236,6 @@
 	switch(version){
 	case V4:
 		bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
 		uh4 = (Udp4hdr *)(bp->rp);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
 		uh4->Unused = 0;
@@ -274,7 +247,7 @@
 			v6tov4(uh4->udpdst, raddr);
 			hnputs(uh4->udpdport, rport);
 			v6tov4(uh4->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			v6tov4(uh4->udpdst, c->raddr);
 			hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			v6tov4(uh4->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh4->udpsport, c->lport);
 		hnputs(uh4->udplen, ptcllen);
 		uh4->udpcksum[0] = 0;
 		uh4->udpcksum[1] = 0;
-		hnputs(uh4->udpcksum, 
-		       ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+		csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh4->udpcksum, csum);
 		uh4->vihl = IP_VER4;
-		ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput4(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	case V6:
+		/*
+		 * using the v6 ip header to create pseudo header
+		 * first then reset it to the normal ip header
+		 */
 		bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
-		// using the v6 ip header to create pseudo header 
-		// first then reset it to the normal ip header
 		uh6 = (Udp6hdr *)(bp->rp);
 		memset(uh6, 0, 8);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
 			ipmove(uh6->udpdst, raddr);
 			hnputs(uh6->udpdport, rport);
 			ipmove(uh6->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			ipmove(uh6->udpdst, c->raddr);
 			hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			ipmove(uh6->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh6->udpsport, c->lport);
 		hnputs(uh6->udplen, ptcllen);
 		uh6->udpcksum[0] = 0;
 		uh6->udpcksum[1] = 0;
-		hnputs(uh6->udpcksum, 
-		       ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+		csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh6->udpcksum, csum);
 		memset(uh6, 0, 8);
 		uh6->viclfl[0] = IP_VER6;
 		hnputs(uh6->len, ptcllen);
 		uh6->nextheader = IP_UDPPROTO;
-		ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput6(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	default:
@@ -360,10 +336,8 @@
 	uh4 = (Udp4hdr*)(bp->rp);
 	version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
 
-	/*
-	 * Put back pseudo header for checksum 
-	 * (remember old values for icmpnoconv())
-	 */
+	/* Put back pseudo header for checksum
+	 * (remember old values for icmpnoconv()) */
 	switch(version) {
 	case V4:
 		ottl = uh4->Unused;
@@ -423,7 +397,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.udpNoPorts++;
 		qunlock(udp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
 			icmpnoconv(f, bp);
 			break;
 		case V6:
-			icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+			icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
 			break;
 		default:
 			panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
 	if(c->state == Announced){
 		if(ucb->headers == 0){
 			/* create a new conversation */
-			if(ipforme(f, laddr) != Runi) {
-				switch(version){
-				case V4:
-					v4tov6(laddr, ifc->lifc->local);
-					break;
-				case V6:
-					ipmove(laddr, ifc->lifc->local);
-					break;
-				default:
-					panic("udpiput3: version %d", version);
-				}
-			}
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
 			c = Fsnewcall(c, raddr, rport, laddr, lport, version);
 			if(c == nil){
 				qunlock(udp);
@@ -507,33 +471,21 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	}
 
-	if(bp->next)
-		bp = concatblock(bp);
-
 	if(qfull(c->rq)){
-		qunlock(c);
-		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
-		       laddr, lport);
+		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
-		return;
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-
-	qpass(c->rq, bp);
 	qunlock(c);
 
 }
@@ -545,11 +497,13 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	if(n == 1){
-		if(strcmp(f[0], "oldheaders") == 0){
-			ucb->headers = 6;
+		if(strcmp(f[0], "hangup") == 0){
+			qhangup(c->rq, nil);
+			qhangup(c->wq, nil);
 			return nil;
-		} else if(strcmp(f[0], "headers") == 0){
-			ucb->headers = 7;
+		}
+		if(strcmp(f[0], "headers") == 0){
+			ucb->headers = 7;	/* new headers format */
 			return nil;
 		}
 	}
@@ -564,34 +518,25 @@
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	ushort psource, pdest;
 	Conv *s, **p;
-	int version;
 
 	h4 = (Udp4hdr*)(bp->rp);
-	version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+	h6 = (Udp6hdr*)(bp->rp);
 
-	switch(version) {
-	case V4:
+	if((h4->vihl&0xF0)==IP_VER4) {
 		v4tov6(dest, h4->udpdst);
 		v4tov6(source, h4->udpsrc);
 		psource = nhgets(h4->udpsport);
 		pdest = nhgets(h4->udpdport);
-		break;
-	case V6:
-		h6 = (Udp6hdr*)(bp->rp);
+	} else {
 		ipmove(dest, h6->udpdst);
 		ipmove(source, h6->udpsrc);
 		psource = nhgets(h6->udpsport);
 		pdest = nhgets(h6->udpdport);
-		break;
-	default:
-		panic("udpadvise: version %d", version);
-		return;  /* to avoid a warning */
 	}
 
 	/* Look for a connection */
 	qlock(udp);
-	for(p = udp->conv; *p; p++) {
-		s = *p;
+	for(p = udp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
 	Udppriv *upriv;
 
 	upriv = udp->priv;
-	return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+	return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+		"InErrors: %lud\nOutDatagrams: %llud\n",
 		upriv->ustats.udpInDatagrams,
 		upriv->ustats.udpNoPorts,
 		upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
 		upriv->ustats.udpOutDatagrams);
 }
 
-int
-udpgc(Proto *udp)
-{
-	return natgc(udp->ipproto);
-}
-
 void
 udpinit(Fs *fs)
 {
@@ -647,7 +587,6 @@
 	udp->rcv = udpiput;
 	udp->advise = udpadvise;
 	udp->stats = udpstats;
-	udp->gc = udpgc;
 	udp->ipproto = IP_UDPPROTO;
 	udp->nc = Nchans;
 	udp->ptclsize = sizeof(Udpcb);
diff -u a/os/ip//arp.c b/os/ip//arp.c
--- a/os/ip//arp.c
+++ b/os/ip//arp.c
@@ -47,7 +47,8 @@
 
 #define haship(s) ((s)[IPaddrlen-1]%NHASH)
 
-extern int 	ReTransTimer = RETRANS_TIMER;
+int 	ReTransTimer = RETRANS_TIMER;
+
 static void 	rxmitproc(void *v);
 
 void
@@ -57,145 +58,121 @@
 	f->arp->f = f;
 	f->arp->rxmt = nil;
 	f->arp->dropf = f->arp->dropl = nil;
-	kproc("rxmitproc", rxmitproc, f->arp, 0);
+	kproc("rxmitproc", rxmitproc, f->arp);
 }
 
-/*
- *  create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
 {
-	uint t;
-	Block *next, *xp;
-	Arpent *a, *e, *f, **l;
-	Medium *m = ifc->m;
-	int empty;
+	Block *next;
 
-	/* find oldest entry */
-	e = &arp->cache[NCACHE];
-	a = arp->cache;
-	t = a->utime;
-	for(f = a; f < e; f++){
-		if(f->utime < t){
-			t = f->utime;
-			a = f;
-		}
+	while(bp != nil){
+		next = bp->list;
+		freeblist(bp);
+		bp = next;
 	}
+}
 
-	/* dump waiting packets */
-	xp = a->hold;
-	a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+	Arpent **l;
 
-	if(isv4(a->ip)){
-		while(xp){
-			next = xp->list;
-			freeblist(xp);
-			xp = next;
+	for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+		if(*l == a){
+			*l = a->nextrxt;
+			break;
 		}
 	}
-	else {	// queue icmp unreachable for rxmitproc later on, w/o arp lock
-		if(xp){
-			if(arp->dropl == nil) 
-				arp->dropf = xp;
-			else
-				arp->dropl->list = xp;
+	a->nextrxt = nil;
+	return l;
+}
 
-			for(next = xp->list; next; next = next->list)
-				xp = next;
-			arp->dropl = xp;
-			wakeup(&arp->rxmtq);
-		}
-	}
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+	Arpent **l;
+	Block *bp;
 
 	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
+	for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+		if(*l == a){
 			*l = a->hash;
 			break;
 		}
-		l = &f->hash;
 	}
+	a->hash = nil;
 
-	/* insert into new chain */
-	l = &arp->hash[haship(ip)];
-	a->hash = *l;
-	*l = a;
+	/* dump waiting packets */
+	bp = a->hold;
+	a->hold = nil;
+	if(isv4(a->ip))
+		freeblistchain(bp);
+	else {
+		rxmtunchain(arp, a);
 
-	memmove(a->ip, ip, sizeof(a->ip));
-	a->utime = NOW;
-	a->ctime = 0;
-	a->type = m;
+		/* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+		if(bp != nil){
+			if(arp->dropf == nil)
+				arp->dropf = bp;
+			else
+				arp->dropl->list = bp;
+			arp->dropl = a->last;
 
-	a->rtime = NOW + ReTransTimer;
-	a->rxtsrem = MAX_MULTICAST_SOLICIT;
-	a->ifc = ifc;
-	a->ifcid = ifc->ifcid;
-
-	/* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
-	if(!ipismulticast(a->ip) && addrxt){
-		l = &arp->rxmt;
-		empty = (*l==nil);
-
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
+			if(bp == arp->dropf)
+				wakeup(&arp->rxmtq);
 		}
-		for(f = *l; f; f = f->nextrxt){
-			l = &f->nextrxt;
-		}
-		*l = a;
-		if(empty) 
-			wakeup(&arp->rxmtq);
 	}
+	a->last = nil;
 
-	a->nextrxt = nil;
+	a->ifc = nil;
+	a->ifcid = 0;
 
-	return a;
-}
+	a->state = 0;
+	a->rxtsrem = 0;
 
-/* called with arp qlocked */
+	a->utime = 0;
+	a->ctime = 0;
 
-void
-cleanarpent(Arp *arp, Arpent *a)
+	memset(a->ip, 0, sizeof(a->ip));
+	memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ *  create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
 {
-	Arpent *f, **l;
+	Arpent *a, *e, *f, **l;
+	ulong t;
 
-	a->utime = 0;
-	a->ctime = 0;
-	a->type = 0;
-	a->state = 0;
-	
-	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
-			*l = a->hash;
-			break;
+	/* find oldest entry */
+	e = &arp->cache[NCACHE];
+	a = arp->cache;
+	t = a->utime;
+	for(f = a; f < e; f++){
+		if(f->utime < t){
+			t = f->utime;
+			a = f;
 		}
-		l = &f->hash;
 	}
+	cleanarpent(arp, a);
 
-	/* take out of re-transmit chain */
-	l = &arp->rxmt;
-	for(f = *l; f; f = f->nextrxt){
-		if(f == a){
-			*l = a->nextrxt;
-			break;
-		}
-		l = &f->nextrxt;
-	}
-	a->nextrxt = nil;
-	a->hash = nil;
-	a->hold = nil;
-	a->last = nil;
-	a->ifc = nil;
+	ipmove(a->ip, ip);
+	a->ifc = ifc;
+	a->ifcid = ifc->ifcid;
+
+	/* insert into new chain */
+	l = &arp->hash[haship(ip)];
+	a->hash = *l;
+	*l = a;
+
+	return a;
 }
 
+
 /*
  *  fill in the media address if we have it.  Otherwise return an
  *  Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
 {
 	int hash;
 	Arpent *a;
-	Medium *type = ifc->m;
 	uchar v6ip[IPaddrlen];
 
 	if(version == V4){
@@ -217,30 +193,28 @@
 
 	qlock(arp);
 	hash = haship(ip);
-	for(a = arp->hash[hash]; a; a = a->hash){
-		if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
-		if(type == a->type)
+	for(a = arp->hash[hash]; a != nil; a = a->hash){
+		if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
 			break;
 	}
-
 	if(a == nil){
-		a = newarp6(arp, ip, ifc, (version != V4));
+		a = newarpent(arp, ip, ifc);
 		a->state = AWAIT;
 	}
 	a->utime = NOW;
 	if(a->state == AWAIT){
 		if(bp != nil){
-			if(a->hold)
-				a->last->list = bp;
-			else
+			bp->list = nil; 
+			if(a->hold == nil)
 				a->hold = bp;
+			else
+				a->last->list = bp;
 			a->last = bp;
-			bp->list = nil; 
 		}
 		return a;		/* return with arp qlocked */
 	}
 
-	memmove(mac, a->mac, a->type->maclen);
+	memmove(mac, a->mac, ifc->m->maclen);
 
 	/* remove old entries */
 	if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
 arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
 {
 	Block *bp;
-	Arpent *f, **l;
 
-	if(!isv4(a->ip)){
-		l = &arp->rxmt;
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
-		}
-	}
-
 	memmove(a->mac, mac, type->maclen);
-	a->type = type;
+	if(a->state == AWAIT && !isv4(a->ip)){
+		rxmtunchain(arp, a);
+		a->rxtsrem = 0;
+	}
 	a->state = AOK;
-	a->utime = NOW;
+	a->ctime = a->utime = NOW;
 	bp = a->hold;
-	a->hold = nil;
+	a->hold = a->last = nil;
 	qunlock(arp);
 
 	return bp;
 }
 
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
 {
-	Arp *arp;
-	Route *r;
-	Arpent *a, *f, **l;
-	Ipifc *ifc;
-	Medium *type;
-	Block *bp, *next;
 	uchar v6ip[IPaddrlen];
+	Block *bp, *next;
+	Arpent *a;
+	Route *r;
+	Arp *arp;
 
-	arp = fs->arp;
+	if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+		return -1;
 
-	if(n != 6){
-//		print("arp: len = %d\n", n);
-		return;
-	}
-
 	switch(version){
 	case V4:
-		r = v4lookup(fs, ip, nil);
+		r = v4lookup(fs, ip, ia, nil);
 		v4tov6(v6ip, ip);
 		ip = v6ip;
 		break;
 	case V6:
-		r = v6lookup(fs, ip, nil);
+		r = v6lookup(fs, ip, ia, nil);
 		break;
 	default:
 		panic("arpenter: version %d", version);
-		return;	/* to supress warnings */
+		return -1;	/* to supress warnings */
 	}
 
-	if(r == nil){
-//		print("arp: no route for entry\n");
-		return;
-	}
+	if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+		return -1;
 
-	ifc = r->ifc;
-	type = ifc->m;
-
+	arp = fs->arp;
 	qlock(arp);
-	for(a = arp->hash[haship(ip)]; a; a = a->hash){
-		if(a->type != type || (a->state != AWAIT && a->state != AOK))
+	for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+		if(a->ifc != ifc || a->ifcid != ifc->ifcid)
 			continue;
-
 		if(ipcmp(a->ip, ip) == 0){
-			a->state = AOK;
-			memmove(a->mac, mac, type->maclen);
-
-			if(version == V6){
-				/* take out of re-transmit chain */
-				l = &arp->rxmt;
-				for(f = *l; f; f = f->nextrxt){
-					if(f == a){
-						*l = a->nextrxt;
-						break;
-					}
-					l = &f->nextrxt;
-				}
-			}
-
-			a->ifc = ifc;
-			a->ifcid = ifc->ifcid;
-			bp = a->hold;
-			a->hold = nil;
 			if(version == V4)
 				ip += IPv4off;
-			a->utime = NOW;
-			a->ctime = a->utime;
-			qunlock(arp);
-
-			while(bp){
+			bp = arpresolve(arp, a, ifc->m, mac);	/* unlocks arp */
+			for(; bp != nil; bp = next){
 				next = bp->list;
-				if(ifc != nil){
-					if(waserror()){
-						runlock(ifc);
-						nexterror();
-					}
-					rlock(ifc);
-					if(ifc->m != nil)
-						ifc->m->bwrite(ifc, bp, version, ip);
-					else
-						freeb(bp);
-					runlock(ifc);
-					poperror();
-				} else
-					freeb(bp);
-				bp = next;
+				bp->list = nil;
+				if(waserror()){
+					freeblistchain(next);
+					break;
+				}
+				ipifcoput(ifc, bp, version, ip);
+				poperror();
 			}
-			return;
+			return 1;
 		}
 	}
 
 	if(refresh == 0){
-		a = newarp6(arp, ip, ifc, 0);
+		a = newarpent(arp, ip, ifc);
 		a->state = AOK;
-		a->type = type;
-		a->ctime = NOW;
-		memmove(a->mac, mac, type->maclen);
+		a->ctime = a->utime = NOW;
+		memmove(a->mac, mac, n);
 	}
-
 	qunlock(arp);
+
+	return refresh == 0;
 }
 
 int
@@ -401,13 +325,12 @@
 arpwrite(Fs *fs, char *s, int len)
 {
 	int n;
-	Route *r;
 	Arp *arp;
-	Block *bp;
-	Arpent *a, *fl, **l;
+	Arpent *a, *x;
 	Medium *m;
-	char *f[4], buf[256];
-	uchar ip[IPaddrlen], mac[MAClen];
+	Ipifc *ifc;
+	char *f[5], buf[256];
+	uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
 
 	arp = fs->arp;
 
@@ -420,7 +343,7 @@
 	if(len > 0 && buf[len-1] == '\n')
 		buf[len-1] = 0;
 
-	n = getfields(buf, f, 4, 1, " ");
+	n = getfields(buf, f, nelem(f), 1, " ");
 	if(strcmp(f[0], "flush") == 0){
 		qlock(arp);
 		for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
 			memset(a->ip, 0, sizeof(a->ip));
 			memset(a->mac, 0, sizeof(a->mac));
 			a->hash = nil;
+			a->nextrxt = nil;
+			a->ifc = nil;
+			a->ifcid = 0;
 			a->state = 0;
+			a->rxtsrem = 0;
+			a->ctime = 0;
 			a->utime = 0;
-			while(a->hold != nil){
-				bp = a->hold->list;
-				freeblist(a->hold);
-				a->hold = bp;
-			}
+			freeblistchain(a->hold);
+			a->hold = a->last = nil;
 		}
 		memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+		freeblistchain(arp->dropf);
+		arp->dropf = arp->dropl = nil;
 		arp->rxmt = nil;
-		arp->dropf = nil;
-		arp->dropl = nil;
 		qunlock(arp);
 	} else if(strcmp(f[0], "add") == 0){
 		switch(n){
@@ -446,64 +370,53 @@
 		default:
 			error(Ebadarg);
 		case 3:
-			parseip(ip, f[1]);
-			if(isv4(ip))
-				r = v4lookup(fs, ip+IPv4off, nil);
-			else
-				r = v6lookup(fs, ip, nil);
-			if(r == nil)
-				error("Destination unreachable");
-			m = r->ifc->m;
-			n = parsemac(mac, f[2], m->maclen);
+			if(parseip(ip, f[1]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
 		case 4:
 			m = ipfindmedium(f[1]);
-			if(m == nil)
+			if(m == nil || m->maclen == 0)
 				error(Ebadarp);
-			parseip(ip, f[2]);
-			n = parsemac(mac, f[3], m->maclen);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
+		case 5:
+			m = ipfindmedium(f[1]);
+			if(m == nil || m->maclen == 0)
+				error(Ebadarp);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			if(parseip(ia, f[4]) == -1)
+				error(Ebadip);
+			break;
 		}
-
-		if(m->ares == nil)
-			error(Ebadarp);
-
-		m->ares(fs, V6, ip, mac, n, 0);
+		if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+			error("no interface");
+		rlock(ifc);
+		if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+			runlock(ifc);
+			error("destination unreachable");
+		}
+		runlock(ifc);
 	} else if(strcmp(f[0], "del") == 0){
-		if(n != 2)
+		if (n != 2)
 			error(Ebadarg);
-
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			error(Ebadip);
 		qlock(arp);
-
-		l = &arp->hash[haship(ip)];
-		for(a = *l; a; a = a->hash){
-			if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
-				*l = a->hash;
-				break;
-			}
-			l = &a->hash;
+		for(a = arp->hash[haship(ip)]; a != nil; a = x){
+			x = a->hash;
+			if(ipcmp(ip, a->ip) == 0)
+				cleanarpent(arp, a);
 		}
-	
-		if(a){
-			/* take out of re-transmit chain */
-			l = &arp->rxmt;
-			for(fl = *l; fl; fl = fl->nextrxt){
-				if(fl == a){
-					*l = a->nextrxt;
-					break;
-				}
-				l = &fl->nextrxt;
-			}
-
-			a->nextrxt = nil;
-			a->hash = nil;
-			a->hold = nil;
-			a->last = nil;
-			a->ifc = nil;
-			memset(a->ip, 0, sizeof(a->ip));
-			memset(a->mac, 0, sizeof(a->mac));
-		}
 		qunlock(arp);
 	} else
 		error(Ebadarp);
@@ -511,13 +424,6 @@
 	return len;
 }
 
-enum
-{
-	Alinelen=	90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
 static void
 convmac(char *p, uchar *mac, int n)
 {
@@ -526,136 +432,136 @@
 }
 
 int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
 {
+	char mac[2*MAClen+1], *state, *mname, *p;
+	uchar ip[IPaddrlen], ia[IPaddrlen];
+	Ipifc *ifc;
 	Arpent *a;
-	int n;
-	char mac[2*MAClen+1];
+	long n, o;
 
-	if(offset % Alinelen)
-		return 0;
-
-	offset = offset/Alinelen;
-	len = len/Alinelen;
-
-	n = 0;
+	p = s;
+	o = -offset;
 	for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
-		if(a->state == 0)
+		if(a->state == 0 || (ifc = a->ifc) == nil)
 			continue;
-		if(offset > 0){
-			offset--;
+
+		rlock(ifc);
+		qlock(arp);
+		state = arpstate[a->state];
+		ipmove(ip, a->ip);
+		if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+			qunlock(arp);
+			runlock(ifc);
 			continue;
 		}
-		len--;
-		qlock(arp);
-		convmac(mac, a->mac, a->type->maclen);
-		n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+		mname = ifc->m->name;
+		convmac(mac, a->mac, ifc->m->maclen);
 		qunlock(arp);
+		runlock(ifc);
+
+		n = snprint(up->genbuf, sizeof up->genbuf,
+			"%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+			mname, state, ip, mac, ia);
+		o += n;
+		if(o <= 0)
+			continue;
+		if(n > len)
+			break;
+		memmove(p, up->genbuf, n);
+		len -= n;
+		p += n;
 	}
 
-	return n;
+	return p - s;
 }
 
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
 {
-	uint sflag;
-	Block *next, *xp;
-	Arpent *a, *b, **l;
-	Fs *f;
-	uchar ipsrc[IPaddrlen];
-	Ipifc *ifc = nil;
-	long nrxt;
+	uchar targ[IPaddrlen], src[IPaddrlen];
+	Arpent **l;
 
-	qlock(arp);
-	f = arp->f;
+	a->ctime = NOW;
+	if(a->rxtsrem == 0)
+		a->rxtsrem = MAX_MULTICAST_SOLICIT;
+	else
+		a->rxtsrem--;
 
-	a = arp->rxmt;
-	if(a==nil){
-		nrxt = 0;
-		goto dodrops; 		//return nrxt;
-	}
-	nrxt = a->rtime - NOW;
-	if(nrxt > 3*ReTransTimer/4) 
-		goto dodrops; 		//return nrxt;
+	/* put on end of re-transmit chain */
+	for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+		;
+	*l = a;
 
-	for(; a; a = a->nextrxt){
-		ifc = a->ifc;
-		assert(ifc != nil);
-		if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
-			xp = a->hold;
-			a->hold = nil;
+	if(l == &f->arp->rxmt)
+		wakeup(&f->arp->rxmtq);
 
-			if(xp){
-				if(arp->dropl == nil) 
-					arp->dropf = xp;
-				else
-					arp->dropl->list = xp;
-			}
+	/* try to use source address of original packet */
+	ipmove(targ, a->ip);
+	if(a->last != nil){
+		ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+		arprelease(f->arp, a);
 
-			cleanarpent(arp, a);
-		}
-		else
-			break;
+		if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+			goto send;
+	} else {
+		arprelease(f->arp, a);
 	}
-	if(a == nil)
-		goto dodrops;
+	if(!ipv6local(ifc, src, 0, targ))
+		return;
+send:
+	if(!waserror()){
+		icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+		poperror();
+	}
+}
 
+static void
+rxmitsols(Arp *arp)
+{
+	Block *next, *bp;
+	Arpent *a;
+	Ipifc *ifc;
+	Route *r;
 
-	qunlock(arp);	/* for icmpns */
-	if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
-		icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
-
-	runlock(ifc);
-	qlock(arp);	
-
-	/* put to the end of re-transmit chain */
-	l = &arp->rxmt;
-	for(b = *l; b; b = b->nextrxt){
-		if(b == a){
-			*l = a->nextrxt;
-			break;
+	qlock(arp);
+	while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+		if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+			if(a->ifcid == ifc->ifcid){
+				ndpsendsol(arp->f, ifc, a);	/* unlocks arp */
+				runlock(ifc);
+				qlock(arp);
+				continue;
+			}
+			runlock(ifc);
 		}
-		l = &b->nextrxt;
+		cleanarpent(arp, a);
 	}
-	for(b = *l; b; b = b->nextrxt){
-		l = &b->nextrxt;
-	}
-	*l = a;
-	a->rxtsrem--;
-	a->nextrxt = nil;
-	a->rtime = NOW + ReTransTimer;
-
-	a = arp->rxmt;
-	if(a==nil)
-		nrxt = 0;
-	else 
-		nrxt = a->rtime - NOW;
-
-dodrops:
-	xp = arp->dropf;
-	arp->dropf = nil;
-	arp->dropl = nil;
+	bp = arp->dropf;
+	arp->dropf = arp->dropl = nil;
 	qunlock(arp);
 
-	for(; xp; xp = next){
-		next = xp->list;
-		icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+	for(; bp != nil; bp = next){
+		next = bp->list;
+		bp->list = nil;
+		r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+		if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+			if(!waserror()){
+				icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+				poperror();
+			}
+			runlock(ifc);
+		}
+		freeblist(bp);
 	}
-
-	return nrxt;
-
 }
 
 static int
 rxready(void *v)
 {
-	Arp *arp = (Arp *) v;
-	int x;
+	Arp *arp = (Arp *)v;
 
-	x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
-	return x;
+	return arp->rxmt != nil || arp->dropf != nil;
 }
 
 static void
@@ -662,20 +568,15 @@
 rxmitproc(void *v)
 {
 	Arp *arp = v;
-	long wakeupat;
 
 	arp->rxmitp = up;
-	//print("arp rxmitproc started\n");
 	if(waserror()){
-		arp->rxmitp = 0;
+		arp->rxmitp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;){
-		wakeupat = rxmitsols(arp);
-		if(wakeupat == 0) 
-			sleep(&arp->rxmtq, rxready, v); 
-		else if(wakeupat > ReTransTimer/4) 
-			tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+		sleep(&arp->rxmtq, rxready, v);
+		rxmitsols(arp);
+		tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
 	}
 }
-
diff -u a/os/ip//devip.c b/os/ip//devip.c
--- a/os/ip//devip.c
+++ b/os/ip//devip.c
@@ -14,7 +14,6 @@
 	Qbootp,
 	Qndb,
 	Qiproute,
-	Qiprouter,
 	Qipselftab,
 	Qlog,
 
@@ -43,11 +42,11 @@
 	Maskproto=	(1<<Logproto)-1,
 	Shiftproto=	Logtype + Logconv,
 
-	Nfs=		32,
+	Nfs=		128,
 };
-#define TYPE(x) 	( ((u32)(x).path) & Masktype )
-#define CONV(x) 	( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) 	( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) 	( ((ulong)(x).path) & Masktype )
+#define CONV(x) 	( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((ulong)(x).path) >> Shiftproto) & Maskproto )
 #define QID(p, c, y) 	( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
 
 static char network[] = "network";
@@ -58,8 +57,7 @@
 
 extern	void nullmediumlink(void);
 extern	void pktmediumlink(void);
-static	long ndbwrite(Fs*, char*, ulong, int);
-extern void    closeconv(Conv*);
+	long ndbwrite(Fs *f, char *a, ulong off, int n);
 
 static int
 ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
 		mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
 		devdir(c, q, "stats", 0, network, 0444, dp);
 		return 1;
-	}	
+	}
 	return -1;
 }
 
@@ -144,11 +142,10 @@
 		return -1;
 	case Qarp:
 		p = "arp";
+		prot = 0664;
 		break;
 	case Qbootp:
 		p = "bootp";
-		if(bootp == nil)
-			return 0;
 		break;
 	case Qndb:
 		p = "ndb";
@@ -157,14 +154,12 @@
 		break;
 	case Qiproute:
 		p = "iproute";
+		prot = 0664;
 		break;
 	case Qipselftab:
 		p = "ipselftab";
 		prot = 0444;
 		break;
-	case Qiprouter:
-		p = "iprouter";
-		break;
 	case Qlog:
 		p = "log";
 		break;
@@ -188,7 +183,7 @@
 	case Qtopdir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
@@ -206,19 +201,18 @@
 	case Qndb:
 	case Qlog:
 	case Qiproute:
-	case Qiprouter:
 	case Qipselftab:
 		return ip1gen(c, TYPE(c->qid), dp);
 	case Qprotodir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
 		if(s < f->p[PROTO(c->qid)]->ac) {
 			cv = f->p[PROTO(c->qid)]->conv[s];
-			sprint(up->genbuf, "%d", s);
+			snprint(up->genbuf, sizeof up->genbuf, "%d", s);
 			mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
 			devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
 			return 1;
@@ -262,45 +256,14 @@
 	fmtinstall('M', eipfmt);
 }
 
-static Fs*
-ipgetfs(int dev)
-{
-	extern void (*ipprotoinit[])(Fs*);
-	Fs *f;
-	int i;
-
-	if(dev >= Nfs)
-		return nil;
-
-	qlock(&fslock);
-	if(ipfs[dev] == nil){
-		f = smalloc(sizeof(Fs));
-		ip_init(f);
-		arpinit(f);
-		netloginit(f);
-		for(i = 0; ipprotoinit[i]; i++)
-			ipprotoinit[i](f);
-		f->dev = dev;
-		ipfs[dev] = f;
-	}
-	qunlock(&fslock);
-
-	return ipfs[dev];
-}
-
 IPaux*
 newipaux(char *owner, char *tag)
 {
 	IPaux *a;
-	int n;
 
 	a = smalloc(sizeof(*a));
 	kstrdup(&a->owner, owner);
-	memset(a->tag, ' ', sizeof(a->tag));
-	n = strlen(tag);
-	if(n > sizeof(a->tag))
-		n = sizeof(a->tag);
-	memmove(a->tag, tag, n);
+	strncpy(a->tag, tag, sizeof(a->tag));
 	return a;
 }
 
@@ -310,13 +273,29 @@
 ipattach(char* spec)
 {
 	Chan *c;
-	int dev;
+	ulong dev;
 
-	dev = atoi(spec);
+	dev = strtoul(spec, nil, 10);
 	if(dev >= Nfs)
-		error("bad specification");
+		error(Enodev);
 
-	ipgetfs(dev);
+	qlock(&fslock);
+	if(ipfs[dev] == nil){
+		extern void (*ipprotoinit[])(Fs*);
+		Fs *f;
+		int i;
+
+		f = smalloc(sizeof(Fs));
+		ip_init(f);
+		arpinit(f);
+		netloginit(f);
+		for(i = 0; ipprotoinit[i]; i++)
+			ipprotoinit[i](f);
+		f->dev = dev;
+		ipfs[dev] = f;
+	}
+	qunlock(&fslock);
+
 	c = devattach('I', spec);
 	mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
 	c->dev = dev;
@@ -327,7 +306,7 @@
 }
 
 static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
 {
 	IPaux *a = c->aux;
 	Walkqid* w;
@@ -338,8 +317,9 @@
 	return w;
 }
 
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
 {
 	return devstat(c, db, n, nil, 0, ipgen);
 }
@@ -360,7 +340,7 @@
 };
 
 static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
 {
 	Conv *cv, *nc;
 	Proto *p;
@@ -375,7 +355,7 @@
 	default:
 		break;
 	case Qndb:
-		if(omode & (OWRITE|OTRUNC) && !iseve())
+		if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
 			error(Eperm);
 		if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
 			f->ndb[0] = 0;
@@ -383,10 +363,10 @@
 	case Qlog:
 		netlogopen(f);
 		break;
-	case Qiprouter:
-		iprouteropen(f);
-		break;
 	case Qiproute:
+	case Qarp:
+		if(omode != OREAD && !iseve())
+			error(Eperm);
 		break;
 	case Qtopdir:
 	case Qprotodir:
@@ -412,13 +392,8 @@
 	case Qclone:
 		p = f->p[PROTO(c->qid)];
 		qlock(p);
-		if(waserror()){
-			qunlock(p);
-			nexterror();
-		}
 		cv = Fsprotoclone(p, ATTACHER(c));
 		qunlock(p);
-		poperror();
 		if(cv == nil) {
 			error(Enodev);
 			break;
@@ -437,15 +412,12 @@
 			qunlock(p);
 			nexterror();
 		}
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
-		}
-		cv->inuse++;
-		if(cv->inuse == 1){
+		if(++cv->inuse == 1){
 			kstrdup(&cv->owner, ATTACHER(c));
 			cv->perm = 0660;
 		}
@@ -455,24 +427,26 @@
 		break;
 	case Qlisten:
 		cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
-
+		qlock(cv);
+		if(waserror()){
+			qunlock(cv);
+			nexterror();
 		}
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
 		if(cv->state != Announced)
 			error("not announced");
 
+		cv->inuse++;
+		qunlock(cv);
+		poperror();
 		if(waserror()){
 			closeconv(cv);
 			nexterror();
 		}
-		qlock(cv);
-		cv->inuse++;
-		qunlock(cv);
 
 		nc = nil;
 		while(nc == nil) {
@@ -494,7 +468,6 @@
 			if(nc != nil){
 				cv->incall = nc->next;
 				mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
-				kstrdup(&cv->owner, ATTACHER(c));
 			}
 			qunlock(cv);
 
@@ -511,13 +484,25 @@
 	return c;
 }
 
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
 {
-	Dir *d;
+	error(Eperm);
+	return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+	error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+	Dir *dir;
 	Conv *cv;
 	Fs *f;
-	Proto *p;
 
 	f = ipfs[c->dev];
 	switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
 		break;
 	}
 
-	d = smalloc(sizeof(*d)+n);
+	dir = smalloc(sizeof(Dir)+n);
 	if(waserror()){
-		free(d);
+		free(dir);
 		nexterror();
 	}
-	n = convM2D(dp, n, d, (char*)&d[1]);
+	n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
 	if(n == 0)
 		error(Eshortstat);
-	p = f->p[PROTO(c->qid)];
-	cv = p->conv[CONV(c->qid)];
-	if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+	cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+	qlock(cv);
+	if(waserror()){
+		qunlock(cv);
+		nexterror();
+	}
+	if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
 		error(Eperm);
-	if(!emptystr(d->uid))
-		kstrdup(&cv->owner, d->uid);
-	if(d->mode != ~0UL)
-		cv->perm = d->mode & 0777;
+	if(!emptystr(dir->uid)){
+		if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+			error(Eperm);
+		kstrdup(&cv->owner, dir->uid);
+	}
+	if(dir->mode != ~0UL)
+		cv->perm = dir->mode & 0666;
+	qunlock(cv);
 	poperror();
-	free(d);
+
+	free(dir);
+	poperror();
+
 	return n;
 }
 
-extern void
+void
 closeconv(Conv *cv)
 {
 	Conv *nc;
@@ -564,7 +561,7 @@
 	}
 
 	/* close all incoming calls since no listen will ever happen */
-	for(nc = cv->incall; nc; nc = cv->incall){
+	for(nc = cv->incall; nc != nil; nc = cv->incall){
 		cv->incall = nc->next;
 		closeconv(nc);
 	}
@@ -576,9 +573,9 @@
 	while((mp = cv->multi) != nil)
 		ipifcremmulti(cv, mp->ma, mp->ia);
 
-	cv->r = nil;
-	cv->rgen = 0;
-	cv->p->close(cv);
+	if(cv->p->close != nil)
+		(*cv->p->close)(cv);
+
 	cv->state = Idle;
 	qunlock(cv);
 }
@@ -596,10 +593,6 @@
 		if(c->flag & COPEN)
 			netlogclose(f);
 		break;
-	case Qiprouter:
-		if(c->flag & COPEN)
-			iprouterclose(f);
-		break;
 	case Qdata:
 	case Qctl:
 	case Qerr:
@@ -620,13 +613,13 @@
 	Statelen=	32*1024,
 };
 
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
 	char *buf, *p;
-	s32 rv;
+	long rv;
 	Fs *f;
 	ulong offset = off;
 
@@ -648,21 +641,22 @@
 		return readstr(offset, a, n, f->ndb);
 	case Qiproute:
 		return routeread(f, a, offset, n);
-	case Qiprouter:
-		return iprouterread(f, a, n);
 	case Qipselftab:
 		return ipselftabread(f, a, offset, n);
 	case Qlog:
 		return netlogread(f, a, offset, n);
 	case Qctl:
-		sprint(up->genbuf, "%ud", CONV(ch->qid));
-		return readstr(offset, p, n, up->genbuf);
+		buf = smalloc(16);
+		snprint(buf, 16, "%lud", CONV(ch->qid));
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
 	case Qremote:
 		buf = smalloc(Statelen);
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->remote == nil) {
-			sprint(buf, "%I!%d\n", c->raddr, c->rport);
+			snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
 		} else {
 			(*x->remote)(c, buf, Statelen-2);
 		}
@@ -674,7 +668,7 @@
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->local == nil) {
-			sprint(buf, "%I!%d\n", c->laddr, c->lport);
+			snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
 		} else {
 			(*x->local)(c, buf, Statelen-2);
 		}
@@ -711,7 +705,7 @@
 }
 
 static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
 {
 	Conv *c;
 	Proto *x;
@@ -740,7 +734,7 @@
 /*
  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
  */
-static char*
+char*
 setluniqueport(Conv* c, int lport)
 {
 	Proto *p;
@@ -771,51 +765,63 @@
 }
 
 /*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+	int x;
+
+	for(x = 0; x < p->nc && p->conv[x]; x++)
+		if(p->conv[x]->lport == lport)
+			return 1;
+	return 0;
+}
+
+/*
  *  pick a local port and set it
  */
-extern void
+char *
 setlport(Conv* c)
 {
 	Proto *p;
-	ushort *pp;
-	int x, found;
+	int i, port;
 
 	p = c->p;
-	if(c->restricted)
-		pp = &p->nextrport;
-	else
-		pp = &p->nextport;
 	qlock(p);
-	for(;;(*pp)++){
+	if(c->restricted){
+		/* Restricted ports cycle between 600 and 1024. */
+		for(i=0; i<1024-600; i++){
+			if(p->nextrport >= 1024 || p->nextrport < 600)
+				p->nextrport = 600;
+			port = p->nextrport++;
+			if(!lportinuse(p, port))
+				goto chosen;
+		}
+	}else{
 		/*
-		 * Fsproto initialises p->nextport to 0 and the restricted
-		 * ports (p->nextrport) to 600.
-		 * Restricted ports must lie between 600 and 1024.
-		 * For the initial condition or if the unrestricted port number
-		 * has wrapped round, select a random port between 5000 and 1<<15
-		 * to start at.
+		 * Unrestricted ports are chosen randomly
+		 * between 2^15 and 2^16.  There are at most
+		 * 4*Nchan = 4096 ports in use at any given time,
+		 * so even in the worst case, a random probe has a
+		 * 1 - 4096/2^15 = 87% chance of success.
+		 * If 64 successive probes fail, there is a bug somewhere
+		 * (or a once in 10^58 event has happened, but that's
+		 * less likely than a venti collision).
 		 */
-		if(c->restricted){
-			if(*pp >= 1024)
-				*pp = 600;
+		for(i=0; i<64; i++){
+			port = (1<<15) + nrand(1<<15);
+			if(!lportinuse(p, port))
+				goto chosen;
 		}
-		else while(*pp < 5000)
-			*pp = nrand(1<<15);
-
-		found = 0;
-		for(x = 0; x < p->nc; x++){
-			if(p->conv[x] == nil)
-				break;
-			if(p->conv[x]->lport == *pp){
-				found = 1;
-				break;
-			}
-		}
-		if(found == 0)
-			break;
 	}
-	c->lport = (*pp)++;
 	qunlock(p);
+	return "no ports available";
+
+chosen:
+	c->lport = port;
+	qunlock(p);
+	return nil;
 }
 
 /*
@@ -822,7 +828,7 @@
  *  set a local address and port from a string of the form
  *	[address!]port[!r]
  */
-static char*
+char*
 setladdrport(Conv* c, char* str, int announcing)
 {
 	char *p;
@@ -830,8 +836,6 @@
 	ushort lport;
 	uchar addr[IPaddrlen];
 
-	rv = nil;
-
 	/*
 	 *  ignore restricted part if it exists.  it's
 	 *  meaningless on local ports.
@@ -854,8 +858,9 @@
 		if(strcmp(str, "*") == 0)
 			ipmove(c->laddr, IPnoaddr);
 		else {
-			parseip(addr, str);
-			if(ipforme(c->p->f, addr))
+			if(parseip(addr, str) == -1)
+				return Ebadip;
+			if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
 				ipmove(c->laddr, addr);
 			else
 				return "not a local IP address";
@@ -869,9 +874,13 @@
 		return setluniqueport(c, 0);
 	}
 
-	lport = atoi(p);
+	str = p;
+	lport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
 	if(lport <= 0)
-		setlport(c);
+		rv = setlport(c);
 	else
 		rv = setluniqueport(c, lport);
 	return rv;
@@ -886,13 +895,17 @@
 	if(p == nil)
 		return "malformed address";
 	*p++ = 0;
-	parseip(c->raddr, str);
-	c->rport = atoi(p);
-	p = strchr(p, '!');
-	if(p){
-		if(strstr(p, "!r") != nil)
-			c->restricted = 1;
-	}
+	if(parseip(c->raddr, str) == -1)
+		return Ebadip;
+
+	str = p;
+	c->rport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
+	if(strstr(p, "!r") != nil)
+		c->restricted = 1;
+
 	return nil;
 }
 
@@ -912,7 +925,9 @@
 		if(p != nil)
 			return p;
 		setladdr(c);
-		setlport(c);
+		p = setlport(c);
+		if (p != nil)
+			return p;
 		break;
 	case 3:
 		p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
 			return p;
 	}
 
-	if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-		memcmp(c->laddr, v4prefix, IPv4off) == 0)
-		|| ipcmp(c->raddr, IPnoaddr) == 0)
-		c->ipversion = V4;
-	else
-		c->ipversion = V6;
+	c->ipversion = convipvers(c);
 
 	return nil;
 }
@@ -978,10 +988,11 @@
 	c->rport = 0;
 	switch(argc){
 	default:
-		return "bad args to announce";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 1);
 	}
+	return "bad args to announce";
 }
 
 /*
@@ -1028,10 +1039,11 @@
 {
 	switch(argc){
 	default:
-		return "bad args to bind";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 0);
 	}
+	return "bad args to bind";
 }
 
 static void
@@ -1042,7 +1054,7 @@
 	if(x->bind == nil)
 		p = Fsstdbind(c, cb->f, cb->nf);
 	else
-		p = x->bind(c, cb->f, cb->nf);
+		p = (*x->bind)(c, cb->f, cb->nf);
 	if(p != nil)
 		error(p);
 }
@@ -1065,8 +1077,8 @@
 		c->ttl = atoi(cb->f[1]);
 }
 
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
@@ -1075,6 +1087,7 @@
 	uchar ia[IPaddrlen], ma[IPaddrlen];
 	Fs *f;
 	char *a;
+	ulong offset = off;
 
 	a = v;
 	f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
 		netlogctl(f, a, n);
 		return n;
 	case Qndb:
-		return ndbwrite(f, a, off, n);
+		return ndbwrite(f, a, offset, n);
+		break;
 	case Qctl:
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
 			if(cb->nf == 2){
 				if(!ipismulticast(c->raddr))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
+				if (parseip(ia, cb->f[1]) == -1)
+					error(Ebadip);
 				ipifcaddmulti(c, c->raddr, ia);
 			} else {
-				parseip(ma, cb->f[2]);
+				if (parseip(ia, cb->f[1]) == -1 ||
+				    parseip(ma, cb->f[2]) == -1)
+					error(Ebadip);
 				if(!ipismulticast(ma))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
 				ipifcaddmulti(c, ma, ia);
 			}
 		} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
 				error("remmulti needs interface address");
 			if(!ipismulticast(c->raddr))
 				error("remmulti for a non multicast address");
-			parseip(ia, cb->f[1]);
+			if (parseip(ia, cb->f[1]) == -1)
+				error(Ebadip);
 			ipifcremmulti(c, c->raddr, ia);
 		} else if(x->ctl != nil) {
-			p = x->ctl(c, cb->f, cb->nf);
+			p = (*x->ctl)(c, cb->f, cb->nf);
 			if(p != nil)
 				error(p);
 		} else
@@ -1160,13 +1177,12 @@
 	return n;
 }
 
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
 {
 	Conv *c;
 	Proto *x;
 	Fs *f;
-	int n;
 
 	switch(TYPE(ch->qid)){
 	case Qdata:
@@ -1177,11 +1193,7 @@
 		if(c->wq == nil)
 			error(Eperm);
 
-		if(bp->next)
-			bp = concatblock(bp);
-		n = BLEN(bp);
-		qbwrite(c->wq, bp);
-		return n;
+		return qbwrite(c->wq, bp);
 	default:
 		return devbwrite(ch, bp, offset);
 	}
@@ -1198,13 +1210,13 @@
 	ipwalk,
 	ipstat,
 	ipopen,
-	devcreate,
+	ipcreate,
 	ipclose,
 	ipread,
 	ipbread,
 	ipwrite,
 	ipbwrite,
-	devremove,
+	ipremove,
 	ipwstat,
 };
 
@@ -1224,12 +1236,15 @@
 
 	p->qid.type = QTDIR;
 	p->qid.path = QID(f->np, 0, Qprotodir);
+	if(p->nc > Maskconv+1){
+		print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+		p->nc = Maskconv+1;
+	}
 	p->conv = malloc(sizeof(Conv*)*(p->nc+1));
 	if(p->conv == nil)
 		panic("Fsproto");
 
 	p->x = f->np;
-	p->nextport = 0;
 	p->nextrport = 600;
 	f->p[f->np++] = p;
 
@@ -1262,21 +1277,33 @@
 		if(c == nil){
 			c = malloc(sizeof(Conv));
 			if(c == nil)
-				error(Enomem);
-			qlock(c);
+				return nil;
+			if(waserror()){
+				qfree(c->rq);
+				qfree(c->wq);
+				qfree(c->eq);
+				qfree(c->sq);
+				free(c->ptcl);
+				free(c);
+				return nil;
+			}
 			c->p = p;
 			c->x = pp - p->conv;
 			if(p->ptclsize != 0){
 				c->ptcl = malloc(p->ptclsize);
-				if(c->ptcl == nil) {
-					free(c);
+				if(c->ptcl == nil)
 					error(Enomem);
-				}
 			}
-			*pp = c;
-			p->ac++;
 			c->eq = qopen(1024, Qmsg, 0, 0);
+			if(c->eq == nil)
+				error(Enomem);
 			(*p->create)(c);
+			if(c->rq == nil || c->wq == nil)
+				error(Enomem);
+			poperror();
+			qlock(c);
+			*pp = c;
+			p->ac++;
 			break;
 		}
 		if(canqlock(c)){
@@ -1291,8 +1318,11 @@
 		}
 	}
 	if(pp >= ep) {
-		if(p->gc != nil && (*p->gc)(p))
-			goto retry;
+		if(p->gc != nil){
+			print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+			if((*p->gc)(p))
+				goto retry;
+		}
 		return nil;
 	}
 
@@ -1307,8 +1337,9 @@
 	c->lport = 0;
 	c->rport = 0;
 	c->restricted = 0;
+	c->ignoreadvice = 0;
 	c->ttl = MAXTTL;
-	c->tos = DFLTTOS;
+	c->tos = 0;
 	qreopen(c->rq);
 	qreopen(c->wq);
 	qreopen(c->eq);
@@ -1321,7 +1352,7 @@
 Fsconnected(Conv* c, char* msg)
 {
 	if(msg != nil && *msg != '\0')
-		kstrcpy(c->cerr, msg, sizeof(c->cerr));
+		strncpy(c->cerr, msg, ERRMAX-1);
 
 	switch(c->state){
 
@@ -1368,12 +1399,19 @@
 	for(l = &c->incall; *l; l = &(*l)->next)
 		i++;
 	if(i >= Maxincall) {
+		static int beenhere;
+
 		qunlock(c);
+		if (!beenhere) {
+			beenhere = 1;
+			print("Fsnewcall: incall queue full (%d) on port %d\n",
+				i, c->lport);
+		}
 		return nil;
 	}
 
 	/* find a free conversation */
-	nc = Fsprotoclone(c->p, network);
+	nc = Fsprotoclone(c->p, c->owner);
 	if(nc == nil) {
 		qunlock(c);
 		return nil;
@@ -1394,12 +1432,12 @@
 	return nc;
 }
 
-static long
+long
 ndbwrite(Fs *f, char *a, ulong off, int n)
 {
 	if(off > strlen(f->ndb))
 		error(Eio);
-	if(off+n >= sizeof(f->ndb)-1)
+	if(off+n >= sizeof(f->ndb))
 		error(Eio);
 	memmove(f->ndb+off, a, n);
 	f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
 ulong
 scalednconv(void)
 {
-	if(conf.npage*BY2PG >= 128*MB)
+	if(cpuserver && conf.npage*BY2PG >= 128*MB)
 		return Nchans*4;
 	return Nchans;
 }
diff -u a/os/ip//esp.c b/os/ip//esp.c
--- a/os/ip//esp.c
+++ b/os/ip//esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ *	currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ *	transport mode (host-to-host)
+ */
 #include	"u.h"
 #include	"../port/lib.h"
 #include	"mem.h"
@@ -6,47 +14,79 @@
 #include	"../port/error.h"
 
 #include	"ip.h"
+#include	"ipv6.h"
+#include	<libsec.h>
 
-#include	"libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by)  ((by) * BI2BY)
 
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
 typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
 typedef struct Esptail Esptail;
 typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
 
-#define DPRINT if(0)print
+enum {
+	Encrypt,
+	Decrypt,
 
-enum
-{
-	IP_ESPPROTO	= 50,
-	EsphdrSize	= 28,	// includes IP header
-	IphdrSize	= 20,	// options have been striped
-	EsptailSize	= 2,	// does not include pad or auth data
-	UserhdrSize	= 4,	// user visable header size - if enabled
+	IP_ESPPROTO	= 50,	/* IP v4 and v6 protocol number */
+	Esp4hdrlen	= IP4HDR + 8,
+	Esp6hdrlen	= IP6HDR + 8,
+
+	Esptaillen	= 2,	/* does not include pad or auth data */
+	Userhdrlen	= 4,	/* user-visible header size - if enabled */
+
+	Desblk	 = BITS2BYTES(64),
+	Des3keysz = BITS2BYTES(192),
+
+	Aesblk	 = BITS2BYTES(128),
+	Aeskeysz = BITS2BYTES(128),
 };
 
 struct Esphdr
 {
-	/* ip header */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+	uchar	payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ *	 enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ *	orig IP hdrs | ESP hdr |
+ *			enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+	/* ipv4 header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	espproto;	/* Protocol */
 	uchar	espplen[2];	/* Header plus data length */
 	uchar	espsrc[4];	/* Ip source */
 	uchar	espdst[4];	/* Ip destination */
 
-	/* esp header */
-	uchar	espspi[4];	/* Security parameter index */
-	uchar	espseq[4];	/* Sequence number */
+	Esphdr;
 };
 
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+	IPV6HDR;
+	Esphdr;
+};
+
 struct Esptail
 {
 	uchar	pad;
@@ -53,16 +93,28 @@
 	uchar	nexthdr;
 };
 
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+	ulong	version;
+	ulong	iphdrlen;
+	ulong	hdrlen;		/* iphdrlen + esp hdr len */
+	ulong	spi;
+	uchar	laddr[IPaddrlen];
+	uchar	raddr[IPaddrlen];
+};
+
 /* header as seen by the user */
 struct Userhdr
 {
-	uchar	nexthdr;	// next protocol
+	uchar	nexthdr;	/* next protocol */
 	uchar	unused[3];
 };
 
 struct Esppriv
 {
-	ulong	in;
+	uvlong	in;
 	ulong	inerrors;
 };
 
@@ -72,77 +124,68 @@
 struct Espcb
 {
 	int	incoming;
-	int	header;		// user user level header
+	int	header;		/* user-level header */
 	ulong	spi;
-	ulong	seq;		// last seq sent
-	ulong	window;		// for replay attacks
+	ulong	seq;		/* last seq sent */
+	ulong	window;		/* for replay attacks */
+
 	char	*espalg;
-	void	*espstate;	// other state for esp
-	int	espivlen;	// in bytes
+	void	*espstate;	/* other state for esp */
+	int	espivlen;	/* in bytes */
 	int	espblklen;
 	int	(*cipher)(Espcb*, uchar *buf, int len);
+
 	char	*ahalg;
-	void	*ahstate;	// other state for esp
-	int	ahlen;		// auth data length in bytes
+	void	*ahstate;	/* other state for esp */
+	int	ahlen;		/* auth data length in bytes */
 	int	ahblklen;
 	int	(*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+	DigestState *ds;
 };
 
 struct Algorithm
 {
 	char 	*name;
-	int	keylen;		// in bits
-	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+	int	keylen;		/* in bits */
+	void	(*init)(Espcb*, char* name, uchar *key, unsigned keylen);
 };
 
-
-enum {
-	RC4forward	= 10*1024*1024,	// maximum skip forward
-	RC4back = 100*1024,		// maximum look back
-};
-
-struct Esprc4
-{
-	ulong cseq;	// current byte sequence number
-	RC4state current;
-
-	int ovalid;	// old is valid
-	ulong lgseq; // last good sequence
-	ulong oseq;	// old byte sequence number
-	RC4state old;
-};
-
 static	Conv* convlookup(Proto *esp, ulong spi);
 static	char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static	void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
 static	void espkick(void *x);
 
+static	void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static	void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
 static Algorithm espalg[] =
 {
-	"null",			0,	nullespinit,
-	"des_56_cbc",		64,	desespinit,
-	"rc4_128",		128,	rc4espinit,
-	nil,			0,	nil,
+	"null",		0,	nullespinit,
+	"des3_cbc",	192,	des3espinit,	/* new rfc2451, des-ede3 */
+	"aes_128_cbc",	128,	aescbcespinit,	/* new rfc3602 */
+	"aes_ctr",	128,	aesctrespinit,	/* new rfc3686 */
+	"des_56_cbc",	64,	desespinit,	/* rfc2405, deprecated */
+	nil,		0,	nil,
 };
 
 static Algorithm ahalg[] =
 {
-	"null",			0,	nullahinit,
-	"hmac_sha1_96",		128,	shaahinit,
-	"hmac_md5_96",		128,	md5ahinit,
-	nil,			0,	nil,
+	"null",		0,	nullahinit,
+	"hmac_sha1_96",	128,	shaahinit,	/* rfc2404 */
+	"hmac_md5_96",	128,	md5ahinit,	/* rfc2403 */
+	nil,		0,	nil,
 };
 
 static char*
 espconnect(Conv *c, char **argv, int argc)
 {
-	char *p, *pp;
-	char *e = nil;
+	char *p, *pp, *e = nil;
 	ulong spi;
 	Espcb *ecb = (Espcb*)c->ptcl;
 
@@ -157,7 +200,10 @@
 			break;
 		}
 		*p++ = 0;
-		parseip(c->raddr, argv[1]);
+		if (parseip(c->raddr, argv[1]) == -1) {
+			e = Ebadip;
+			break;
+		}
 		findlocalip(c->p->f, c->laddr, c->raddr);
 		ecb->incoming = 0;
 		ecb->seq = 0;
@@ -215,26 +261,86 @@
 	ipmove(c->raddr, IPnoaddr);
 
 	ecb = (Espcb*)c->ptcl;
-	free(ecb->espstate);
-	free(ecb->ahstate);
+	secfree(ecb->espstate);
+	secfree(ecb->ahstate);
 	memset(ecb, 0, sizeof(Espcb));
 }
 
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+	if (*bpp == nil || BLEN(*bpp) == 0) {
+		/* get enough to identify the IP version */
+		*bpp = pullupblock(*bpp, IP4HDR);
+		if(*bpp == nil) {
+			netlog(f, Logesp, "esp: short packet\n");
+			return 0;
+		}
+	}
+	return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
 static void
+getverslens(int version, Versdep *vp)
+{
+	vp->version = version;
+	switch(vp->version) {
+	case V4:
+		vp->iphdrlen = IP4HDR;
+		vp->hdrlen   = Esp4hdrlen;
+		break;
+	case V6:
+		vp->iphdrlen = IP6HDR;
+		vp->hdrlen   = Esp6hdrlen;
+		break;
+	default:
+		panic("esp: getverslens version %d wrong", version);
+	}
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+
+	switch(vp->version) {
+	case V4:
+		eh4 = (Esp4hdr*)pkt;
+		v4tov6(vp->raddr, eh4->espsrc);
+		v4tov6(vp->laddr, eh4->espdst);
+		vp->spi = nhgetl(eh4->espspi);
+		break;
+	case V6:
+		eh6 = (Esp6hdr*)pkt;
+		ipmove(vp->raddr, eh6->src);
+		ipmove(vp->laddr, eh6->dst);
+		vp->spi = nhgetl(eh6->espspi);
+		break;
+	default:
+		panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+	}
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
 espkick(void *x)
 {
+	int nexthdr, payload, pad, align;
+	uchar *auth;
+	Block *bp;
 	Conv *c = x;
-	Esphdr *eh;
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Espcb *ecb;
 	Esptail *et;
 	Userhdr *uh;
-	Espcb *ecb;
-	Block *bp;
-	int nexthdr;
-	int payload;
-	int pad;
-	int align;
-	uchar *auth;
+	Versdep vers;
 
+	getverslens(convipvers(c), &vers);
 	bp = qget(c->wq);
 	if(bp == nil)
 		return;
@@ -244,7 +350,7 @@
 
 	if(ecb->header) {
 		/* make sure the message has a User header */
-		bp = pullupblock(bp, UserhdrSize);
+		bp = pullupblock(bp, Userhdrlen);
 		if(bp == nil) {
 			qunlock(c);
 			return;
@@ -251,15 +357,16 @@
 		}
 		uh = (Userhdr*)bp->rp;
 		nexthdr = uh->nexthdr;
-		bp->rp += UserhdrSize;
+		bp->rp += Userhdrlen;
 	} else {
-		nexthdr = 0;  // what should this be?
+		nexthdr = 0;	/* what should this be? */
 	}
 
 	payload = BLEN(bp) + ecb->espivlen;
 
 	/* Make space to fit ip header */
-	bp = padblock(bp, EsphdrSize + ecb->espivlen);
+	bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+	getpktspiaddrs(bp->rp, &vers);
 
 	align = 4;
 	if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
 		align = ecb->espblklen;
 	if(align % ecb->ahblklen != 0)
 		panic("espkick: ahblklen is important after all");
-	pad = (align-1) - (payload + EsptailSize-1)%align;
+	pad = (align-1) - (payload + Esptaillen-1)%align;
 
 	/*
 	 * Make space for tail
@@ -273,70 +380,88 @@
 	 * this is done by calling padblock with a negative size
 	 * Padblock does not change bp->wp!
 	 */
-	bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
-	bp->wp += pad+EsptailSize+ecb->ahlen;
+	bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+	bp->wp += pad+Esptaillen+ecb->ahlen;
 
-	eh = (Esphdr *)(bp->rp);
-	et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
 
-	// fill in tail
+	/* fill in tail */
 	et->pad = pad;
 	et->nexthdr = nexthdr;
 
-	ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
-	auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+	/* encrypt the payload */
+	ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+	auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
 
-	// fill in head
-	eh->vihl = IP_VER4;
-	hnputl(eh->espspi, ecb->spi);
-	hnputl(eh->espseq, ++ecb->seq);
-	v6tov4(eh->espsrc, c->laddr);
-	v6tov4(eh->espdst, c->raddr);
-	eh->espproto = IP_ESPPROTO;
-	eh->frag[0] = 0;
-	eh->frag[1] = 0;
+	/* fill in head; construct a new IP header and an ESP header */
+	if (vers.version == V4) {
+		eh4 = (Esp4hdr *)bp->rp;
+		eh4->vihl = IP_VER4;
+		v6tov4(eh4->espsrc, c->laddr);
+		v6tov4(eh4->espdst, c->raddr);
+		eh4->espproto = IP_ESPPROTO;
+		eh4->frag[0] = 0;
+		eh4->frag[1] = 0;
 
-	ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+		hnputl(eh4->espspi, ecb->spi);
+		hnputl(eh4->espseq, ++ecb->seq);
+	} else {
+		eh6 = (Esp6hdr *)bp->rp;
+		eh6->vcf[0] = IP_VER6;
+		ipmove(eh6->src, c->laddr);
+		ipmove(eh6->dst, c->raddr);
+		eh6->proto = IP_ESPPROTO;
 
+		hnputl(eh6->espspi, ecb->spi);
+		hnputl(eh6->espseq, ++ecb->seq);
+	}
+
+	/* compute secure hash */
+	ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+		payload + pad + Esptaillen, auth);
+
 	qunlock(c);
-	//print("esp: pass down: %uld\n", BLEN(bp));
-	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	/* print("esp: pass down: %uld\n", BLEN(bp)); */
+	if (vers.version == V4)
+		ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	else
+		ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
 }
 
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
 void
 espiput(Proto *esp, Ipifc*, Block *bp)
 {
-	Esphdr *eh;
-	Esptail *et;
-	Userhdr *uh;
+	int payload, nexthdr;
+	uchar *auth, *espspi;
 	Conv *c;
 	Espcb *ecb;
-	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	Esptail *et;
 	Fs *f;
-	uchar *auth;
-	ulong spi;
-	int payload, nexthdr;
+	Userhdr *uh;
+	Versdep vers;
 
 	f = esp->f;
 
-	bp = pullupblock(bp, EsphdrSize+EsptailSize);
+	getverslens(pktipvers(f, &bp), &vers);
+
+	bp = pullupblock(bp, vers.hdrlen + Esptaillen);
 	if(bp == nil) {
 		netlog(f, Logesp, "esp: short packet\n");
 		return;
 	}
+	getpktspiaddrs(bp->rp, &vers);
 
-	eh = (Esphdr*)(bp->rp);
-	spi = nhgetl(eh->espspi);
-	v4tov6(raddr, eh->espsrc);
-	v4tov6(laddr, eh->espdst);
-
 	qlock(esp);
 	/* Look for a conversation structure for this port */
-	c = convlookup(esp, spi);
+	c = convlookup(esp, vers.spi);
 	if(c == nil) {
 		qunlock(esp);
-		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		icmpnoconv(f, bp);
 		freeblist(bp);
 		return;
@@ -346,76 +471,83 @@
 	qunlock(esp);
 
 	ecb = c->ptcl;
-	// too hard to do decryption/authentication on block lists
-	if(bp->next)
+	/* too hard to do decryption/authentication on block lists */
+	if(bp->next != nil)
 		bp = concatblock(bp);
 
-	if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+	if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	eh = (Esphdr*)(bp->rp);
 	auth = bp->wp - ecb->ahlen;
-	if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+	espspi = vers.version == V4?	((Esp4hdr*)bp->rp)->espspi:
+					((Esp6hdr*)bp->rp)->espspi;
+
+	/* compute secure hash and authenticate */
+	if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
 		qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
-			laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
-	if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+	payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+	if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
-			laddr, spi, payload, BLEN(bp));
+		netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+			vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
 		freeb(bp);
 		return;
 	}
-	if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+	/* decrypt payload */
+	if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
 		qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
-			laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+			vers.raddr, vers.laddr, vers.spi, up->errstr);
 		freeb(bp);
 		return;
 	}
 
-	payload -= EsptailSize;
-	et = (Esptail*)(bp->rp + EsphdrSize + payload);
+	payload -= Esptaillen;
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload);
 	payload -= et->pad + ecb->espivlen;
 	nexthdr = et->nexthdr;
 	if(payload <= 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+			vers.raddr, vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	// trim packet
-	bp->rp += EsphdrSize + ecb->espivlen;
+	/* trim packet */
+	bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
 	bp->wp = bp->rp + payload;
 	if(ecb->header) {
-		// assume UserhdrSize < EsphdrSize
-		bp->rp -= UserhdrSize;
+		/* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+		bp->rp -= Userhdrlen;
 		uh = (Userhdr*)bp->rp;
-		memset(uh, 0, UserhdrSize);
+		memset(uh, 0, Userhdrlen);
 		uh->nexthdr = nexthdr;
 	}
 
+	/* ingress filtering here? */
+
 	if(qfull(c->rq)){
-		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeblist(bp);
 	}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
-		qpass(c->rq, bp);
+//		print("esp: pass up: %uld\n", BLEN(bp));
+		qpass(c->rq, bp);	/* pass packet up the read queue */
 	}
 
 	qunlock(c);
@@ -440,19 +572,19 @@
 	return e;
 }
 
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
 void
 espadvise(Proto *esp, Block *bp, char *msg)
 {
-	Esphdr *h;
 	Conv *c;
-	ulong spi;
+	Versdep vers;
 
-	h = (Esphdr*)(bp->rp);
+	getverslens(pktipvers(esp->f, &bp), &vers);
+	getpktspiaddrs(bp->rp, &vers);
 
-	spi = nhgets(h->espspi);
 	qlock(esp);
-	c = convlookup(esp, spi);
-	if(c != nil) {
+	c = convlookup(esp, vers.spi);
+	if(c != nil && !c->ignoreadvice) {
 		qhangup(c->rq, msg);
 		qhangup(c->wq, msg);
 	}
@@ -466,7 +598,7 @@
 	Esppriv *upriv;
 
 	upriv = esp->priv;
-	return snprint(buf, len, "%lud %lud\n",
+	return snprint(buf, len, "%llud %lud\n",
 		upriv->in,
 		upriv->inerrors);
 }
@@ -520,10 +652,10 @@
 setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
 {
 	uchar *key;
-	int i, nbyte, nchar;
-	int c;
+	int c, nbyte, nchar;
+	uint i;
 
-	if(n < 2)
+	if(n < 2 || n > 3)
 		return "bad format";
 	for(; alg->name; alg++)
 		if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
 	if(alg->name == nil)
 		return "unknown algorithm";
 
-	if(n != 3)
-		return "bad format";
 	nbyte = (alg->keylen + 7) >> 3;
-	nchar = strlen(f[2]);
+	if (n == 2)
+		nchar = 0;
+	else
+		nchar = strlen(f[2]);
+	if(nchar != 2 * nbyte)			/* TODO: maybe < is ok */
+		return "key not required length";
+	/* convert hex digits from ascii, in place */
 	for(i=0; i<nchar; i++) {
 		c = f[2][i];
 		if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
 		else if(c >= 'A' && c <= 'F')
 			f[2][i] -= 'A'-10;
 		else
-			return "bad character in key";
+			return "non-hex character in key";
 	}
-	key = smalloc(nbyte);
-	for(i=0; i<nchar && i*2<nbyte; i++) {
+	/* collapse hex digits into complete bytes in reverse order in key */
+	key = secalloc(nbyte);
+	for(i = 0; i < nchar && i/2 < nbyte; i++) {
 		c = f[2][nchar-i-1];
 		if(i&1)
 			c <<= 4;
-		key[i>>1] |= c;
+		key[i/2] |= c;
 	}
-
+	memset(f[2], 0, nchar);
 	alg->init(ecb, alg->name, key, alg->keylen);
-	free(key);
+	secfree(key);
 	return nil;
 }
 
+
+/*
+ * null encryption
+ */
+
 static int
 nullcipher(Espcb*, uchar*, int)
 {
@@ -566,7 +708,7 @@
 }
 
 static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->espalg = name;
 	ecb->espblklen = 1;
@@ -581,7 +723,7 @@
 }
 
 static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
 	ecb->auth = nullauth;
 }
 
-void
+
+/*
+ * sha1
+ */
+
+static void
 seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
 	DigestState *digest;
-	uchar innerhash[SHA1dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = sha1(ipad, 64, nil, nil);
+	digest = sha1(ipad, Hmacblksz, nil, nil);
 	sha1(t, tlen, innerhash, digest);
-	digest = sha1(opad, 64, nil, nil);
+	digest = sha1(opad, Hmacblksz, nil, nil);
 	sha1(innerhash, SHA1dlen, hash, digest);
 }
 
@@ -615,11 +759,11 @@
 static int
 shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
 {
-	uchar hash[SHA1dlen];
 	int r;
+	uchar hash[SHA1dlen];
 
 	memset(hash, 0, SHA1dlen);
-	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -626,40 +770,162 @@
 }
 
 static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("shaahinit: bad keylen");
-	klen >>= 8;	// convert to bytes
+	klen /= BI2BY;
 
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = shaauth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aeskeysz], ivec[Aeskeysz];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aeskeysz);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aescbccipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aesblk], ivec[Aesblk];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aesblk);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aesctrcipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
 seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
 	DigestState *digest;
-	uchar innerhash[MD5dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = md5(ipad, 64, nil, nil);
+	digest = md5(ipad, Hmacblksz, nil, nil);
 	md5(t, tlen, innerhash, digest);
-	digest = md5(opad, 64, nil, nil);
+	digest = md5(opad, Hmacblksz, nil, nil);
 	md5(innerhash, MD5dlen, hash, digest);
 }
 
@@ -670,7 +936,7 @@
 	int r;
 
 	memset(hash, 0, MD5dlen);
-	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -677,168 +943,102 @@
 }
 
 static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("md5ahinit: bad keylen");
-	klen >>= 3;	// convert to bytes
-
-
+	klen = BITS2BYTES(klen);
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = md5auth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
+
+/*
+ * des, single and triple
+ */
+
 static int
 descipher(Espcb *ecb, uchar *p, int n)
 {
-	uchar tmp[8];
-	uchar *pp, *tp, *ip, *eip, *ep;
 	DESstate *ds = ecb->espstate;
 
-	ep = p + n;
 	if(ecb->incoming) {
-		memmove(ds->ivec, p, 8);
-		p += 8;
-		while(p < ep){
-			memmove(tmp, p, 8);
-			block_cipher(ds->expanded, p, 1);
-			tp = tmp;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; ){
-				*p++ ^= *ip;
-				*ip++ = *tp++;
-			}
-		}
+		memmove(ds->ivec, p, Desblk);
+		desCBCdecrypt(p + Desblk, n - Desblk, ds);
 	} else {
-		memmove(p, ds->ivec, 8);
-		for(p += 8; p < ep; p += 8){
-			pp = p;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; )
-				*pp++ ^= *ip++;
-			block_cipher(ds->expanded, p, 0);
-			memmove(ds->ivec, p, 8);
-		}
+		memmove(p, ds->ivec, Desblk);
+		desCBCencrypt(p + Desblk, n - Desblk, ds);
 	}
 	return 1;
 }
-	
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+	DES3state *ds = ecb->espstate;
+
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, Desblk);
+		des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+	} else {
+		memmove(p, ds->ivec, Desblk);
+		des3CBCencrypt(p + Desblk, n - Desblk, ds);
+	}
+	return 1;
+}
+
 static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	uchar key[8];
-	uchar ivec[8];
-	int i;
-	
-	// bits to bytes
-	n = (n+7)>>3;
-	if(n > 8)
-		n = 8;
+	uchar key[Desblk], ivec[Desblk];
+
+	n = BITS2BYTES(n);
+	if(n > Desblk)
+		n = Desblk;
 	memset(key, 0, sizeof(key));
 	memmove(key, k, n);
-	for(i=0; i<8; i++)
-		ivec[i] = nrand(256);
+	prng(ivec, Desblk);
 	ecb->espalg = name;
-	ecb->espblklen = 8;
-	ecb->espivlen = 8;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
+
 	ecb->cipher = descipher;
-	ecb->espstate = smalloc(sizeof(DESstate));
+	ecb->espstate = secalloc(sizeof(DESstate));
 	setupDESstate(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	Esprc4 *esprc4;
-	RC4state tmpstate;
-	ulong seq;
-	long d, dd;
+	uchar key[3][Desblk], ivec[Desblk];
 
-	if(n < 4)
-		return 0;
+	n = BITS2BYTES(n);
+	if(n > Des3keysz)
+		n = Des3keysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Desblk);
+	ecb->espalg = name;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
 
-	esprc4 = ecb->espstate;
-	if(ecb->incoming) {
-		seq = nhgetl(p);
-		p += 4;
-		n -= 4;
-		d = seq-esprc4->cseq;
-		if(d == 0) {
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq += n;
-			if(esprc4->ovalid) {
-				dd = esprc4->cseq - esprc4->lgseq;
-				if(dd > RC4back)
-					esprc4->ovalid = 0;
-			}
-		} else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
-			// this link is hosed
-			if(d > RC4forward) {
-				strcpy(up->errstr, "rc4cipher: skipped too much");
-				return 0;
-			}
-			esprc4->lgseq = seq;
-			if(!esprc4->ovalid) {
-				esprc4->ovalid = 1;
-				esprc4->oseq = esprc4->cseq;
-				memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
-			}
-			rc4skip(&esprc4->current, d);
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq = seq+n;
-		} else {
-print("reordered packet: %uld %ld\n", seq, d);
-			dd = seq - esprc4->oseq;
-			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
-				strcpy(up->errstr, "rc4cipher: too far back");
-				return 0;
-			}
-			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
-			rc4skip(&tmpstate, dd);
-			rc4(&tmpstate, p, n);
-			return 1;
-		}
-
-		// move old state up
-		if(esprc4->ovalid) {
-			dd = esprc4->cseq - RC4back - esprc4->oseq;
-			if(dd > 0) {
-				rc4skip(&esprc4->old, dd);
-				esprc4->oseq += dd;
-			}
-		}
-	} else {
-		hnputl(p, esprc4->cseq);
-		p += 4;
-		n -= 4;
-		rc4(&esprc4->current, p, n);
-		esprc4->cseq += n;
-	}
-	return 1;
+	ecb->cipher = des3cipher;
+	ecb->espstate = secalloc(sizeof(DES3state));
+	setupDES3state(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{	
-	Esprc4 *esprc4;
 
-	// bits to bytes
-	n = (n+7)>>3;
-	esprc4 = smalloc(sizeof(Esprc4));
-	memset(esprc4, 0, sizeof(Esprc4));
-	setupRC4state(&esprc4->current, k, n);
-	ecb->espalg = name;
-	ecb->espblklen = 4;
-	ecb->espivlen = 4;
-	ecb->cipher = rc4cipher;
-	ecb->espstate = esprc4;
-}
-	
+/*
+ * interfacing to devip
+ */
 void
 espinit(Fs *fs)
 {
diff -u a/os/ip//ethermedium.c b/os/ip//ethermedium.c
--- a/os/ip//ethermedium.c
+++ b/os/ip//ethermedium.c
@@ -5,9 +5,9 @@
 #include "fns.h"
 #include "../port/error.h"
 
+#include "../port/netif.h"
 #include "ip.h"
 #include "ipv6.h"
-#include "kernel.h"
 
 typedef struct Etherhdr Etherhdr;
 struct Etherhdr
@@ -18,10 +18,10 @@
 };
 
 static uchar ipbroadcast[IPaddrlen] = {
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
 	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
 };
 
 static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
 static void	etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
 static void	etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
 static void	etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void	etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 static Block*	multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
 static void	sendarp(Ipifc *ifc, Arpent *a);
-static void	sendgarp(Ipifc *ifc, uchar*);
+static void	sendndp(Ipifc *ifc, Arpent *a);
 static int	multicastea(uchar *ea, uchar *ip);
 static void	recvarpproc(void*);
-static void	resolveaddr6(Ipifc *ifc, Arpent *a);
 static void	etherpref2addr(uchar *pref, uchar *ea);
 
 Medium ethermedium =
@@ -53,8 +53,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -70,8 +69,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -94,9 +92,6 @@
  */
 enum
 {
-	ETARP		= 0x0806,
-	ETIP4		= 0x0800,
-	ETIP6		= 0x86DD,
 	ARPREQUEST	= 1,
 	ARPREPLY	= 2,
 };
@@ -127,128 +122,92 @@
 static void
 etherbind(Ipifc *ifc, int argc, char **argv)
 {
-	Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
-	char addr[Maxpath];	//char addr[2*KNAMELEN];
-	char dir[Maxpath];	//char dir[2*KNAMELEN];
-	char *buf;
-	int fd, cfd, n;
-	char *ptr;
+	char addr[Maxpath], dir[Maxpath];
 	Etherrock *er;
+	Chan *c;
+	int n;
 
 	if(argc < 2)
 		error(Ebadarg);
 
-	mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
-	buf = nil;
+	/*
+	 *  get mac address
+	 */
+	snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+	c = namec(addr, Aopen, OREAD, 0);
 	if(waserror()){
-		if(mchan4 != nil)
-			cclose(mchan4);
-		if(cchan4 != nil)
-			cclose(cchan4);
-		if(achan != nil)
-			cclose(achan);
-		if(mchan6 != nil)
-			cclose(mchan6);
-		if(cchan6 != nil)
-			cclose(cchan6);
-		if(buf != nil)
-			free(buf);
-		nexterror(); 
+		cclose(c);
+		nexterror();
 	}
+	n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+	if(n < 0)
+		error(Eio);
+	addr[n] = 0;
+	if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+		error("could not find mac address");
+	cclose(c);
+	poperror();
 
+	er = smalloc(sizeof(*er));
+	er->read4p = er->read6p = er->arpp = (void*)-1;
+	er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+	er->f = ifc->conv->p->f;
+
+	if(waserror()){
+		if(er->mchan4 != nil)
+			cclose(er->mchan4);
+		if(er->cchan4 != nil)
+			cclose(er->cchan4);
+		if(er->mchan6 != nil)
+			cclose(er->mchan6);
+		if(er->cchan6 != nil)
+			cclose(er->cchan6);
+		if(er->achan != nil)
+			cclose(er->achan);
+		free(er);
+		nexterror();
+	}
+
 	/*
-	 *  open ip converstation
+	 *  open ipv4 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x800 failed: %s", up->env->errstr);
-	mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);	/* ETIP4 */
+	er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
 
 	/*
-	 *  get mac address and speed
-	 */
-	snprint(addr, sizeof(addr), "%s/stats", dir);
-	fd = kopen(addr, OREAD);
-	if(fd < 0)
-		errorf("can't open ether stats: %s", up->env->errstr);
-
-	buf = smalloc(512);
-	n = kread(fd, buf, 511);
-	kclose(fd);
-	if(n <= 0)
-		error(Eio);
-	buf[n] = 0;
-
-	ptr = strstr(buf, "addr: ");
-	if(!ptr)
-		error(Eio);
-	ptr += 6;
-	parsemac(ifc->mac, ptr, 6);
-
-	ptr = strstr(buf, "mbps: ");
-	if(ptr){
-		ptr += 6;
-		ifc->mbps = atoi(ptr);
-	} else
-		ifc->mbps = 100;
-
-	/*
- 	 *  open arp conversation
-	 */
-	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
-	fd = kdial(addr, nil, nil, nil);
-	if(fd < 0)
-		errorf("dial 0x806 failed: %s", up->env->errstr);
-	achan = commonfdtochan(fd, ORDWR, 0, 1);
-	kclose(fd);
-
-	/*
-	 *  open ip conversation
+	 *  open ipv6 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x86DD failed: %s", up->env->errstr);
-	mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);	/* ETIP6 */
+	er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
 
-	er = smalloc(sizeof(*er));
-	er->mchan4 = mchan4;
-	er->cchan4 = cchan4;
-	er->achan = achan;
-	er->mchan6 = mchan6;
-	er->cchan6 = cchan6;
-	er->f = ifc->conv->p->f;
-	ifc->arg = er;
-
-	free(buf);
+	/*
+ 	 *  open arp conversation
+	 */
+	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);	/* ETARP */
+	er->achan = chandial(addr, nil, nil, nil);
 	poperror();
 
-	kproc("etherread4", etherread4, ifc, 0);
-	kproc("recvarpproc", recvarpproc, ifc, 0);
-	kproc("etherread6", etherread6, ifc, 0);
+	ifc->arg = er;
+
+	kproc("etherread4", etherread4, ifc);
+	kproc("etherread6", etherread6, ifc);
+	kproc("recvarpproc", recvarpproc, ifc);
 }
 
 /*
@@ -259,21 +218,35 @@
 {
 	Etherrock *er = ifc->arg;
 
-	if(er->read4p)
+	while(waserror())
+		;
+
+	/* wait for readers to start */
+	while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->read4p != nil)
 		postnote(er->read4p, 1, "unbind", 0);
-	if(er->read6p)
+	if(er->read6p != nil)
 		postnote(er->read6p, 1, "unbind", 0);
-	if(er->arpp)
+	if(er->arpp != nil)
 		postnote(er->arpp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for readers to die */
-	while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+	while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan4 != nil)
 		cclose(er->mchan4);
-	if(er->achan != nil)
-		cclose(er->achan);
 	if(er->cchan4 != nil)
 		cclose(er->cchan4);
 	if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
 		cclose(er->mchan6);
 	if(er->cchan6 != nil)