code: 9ferno

Download patch

ref: c11957a7915d355d80ff7e6a21721ba05441ca29
parent: d8b894afc0ff3c81fc750826ac0016250e4f81e4
author: 9ferno <gophone2015@gmail.com>
date: Fri Aug 20 18:39:52 EDT 2021

replacing ip with 9front ip

diff: cannot open b/os/ip.original//null: file does not exist: 'b/os/ip.original//null'
--- a/os/ip/arp.c
+++ b/os/ip/arp.c
@@ -47,7 +47,8 @@
 
 #define haship(s) ((s)[IPaddrlen-1]%NHASH)
 
-extern int 	ReTransTimer = RETRANS_TIMER;
+int 	ReTransTimer = RETRANS_TIMER;
+
 static void 	rxmitproc(void *v);
 
 void
@@ -57,145 +58,121 @@
 	f->arp->f = f;
 	f->arp->rxmt = nil;
 	f->arp->dropf = f->arp->dropl = nil;
-	kproc("rxmitproc", rxmitproc, f->arp, 0);
+	kproc("rxmitproc", rxmitproc, f->arp);
 }
 
-/*
- *  create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
 {
-	uint t;
-	Block *next, *xp;
-	Arpent *a, *e, *f, **l;
-	Medium *m = ifc->m;
-	int empty;
+	Block *next;
 
-	/* find oldest entry */
-	e = &arp->cache[NCACHE];
-	a = arp->cache;
-	t = a->utime;
-	for(f = a; f < e; f++){
-		if(f->utime < t){
-			t = f->utime;
-			a = f;
-		}
+	while(bp != nil){
+		next = bp->list;
+		freeblist(bp);
+		bp = next;
 	}
+}
 
-	/* dump waiting packets */
-	xp = a->hold;
-	a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+	Arpent **l;
 
-	if(isv4(a->ip)){
-		while(xp){
-			next = xp->list;
-			freeblist(xp);
-			xp = next;
+	for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+		if(*l == a){
+			*l = a->nextrxt;
+			break;
 		}
 	}
-	else {	// queue icmp unreachable for rxmitproc later on, w/o arp lock
-		if(xp){
-			if(arp->dropl == nil) 
-				arp->dropf = xp;
-			else
-				arp->dropl->list = xp;
+	a->nextrxt = nil;
+	return l;
+}
 
-			for(next = xp->list; next; next = next->list)
-				xp = next;
-			arp->dropl = xp;
-			wakeup(&arp->rxmtq);
-		}
-	}
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+	Arpent **l;
+	Block *bp;
 
 	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
+	for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+		if(*l == a){
 			*l = a->hash;
 			break;
 		}
-		l = &f->hash;
 	}
+	a->hash = nil;
 
-	/* insert into new chain */
-	l = &arp->hash[haship(ip)];
-	a->hash = *l;
-	*l = a;
+	/* dump waiting packets */
+	bp = a->hold;
+	a->hold = nil;
+	if(isv4(a->ip))
+		freeblistchain(bp);
+	else {
+		rxmtunchain(arp, a);
 
-	memmove(a->ip, ip, sizeof(a->ip));
-	a->utime = NOW;
-	a->ctime = 0;
-	a->type = m;
+		/* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+		if(bp != nil){
+			if(arp->dropf == nil)
+				arp->dropf = bp;
+			else
+				arp->dropl->list = bp;
+			arp->dropl = a->last;
 
-	a->rtime = NOW + ReTransTimer;
-	a->rxtsrem = MAX_MULTICAST_SOLICIT;
-	a->ifc = ifc;
-	a->ifcid = ifc->ifcid;
-
-	/* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
-	if(!ipismulticast(a->ip) && addrxt){
-		l = &arp->rxmt;
-		empty = (*l==nil);
-
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
+			if(bp == arp->dropf)
+				wakeup(&arp->rxmtq);
 		}
-		for(f = *l; f; f = f->nextrxt){
-			l = &f->nextrxt;
-		}
-		*l = a;
-		if(empty) 
-			wakeup(&arp->rxmtq);
 	}
+	a->last = nil;
 
-	a->nextrxt = nil;
+	a->ifc = nil;
+	a->ifcid = 0;
 
-	return a;
-}
+	a->state = 0;
+	a->rxtsrem = 0;
 
-/* called with arp qlocked */
+	a->utime = 0;
+	a->ctime = 0;
 
-void
-cleanarpent(Arp *arp, Arpent *a)
+	memset(a->ip, 0, sizeof(a->ip));
+	memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ *  create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
 {
-	Arpent *f, **l;
+	Arpent *a, *e, *f, **l;
+	ulong t;
 
-	a->utime = 0;
-	a->ctime = 0;
-	a->type = 0;
-	a->state = 0;
-	
-	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
-			*l = a->hash;
-			break;
+	/* find oldest entry */
+	e = &arp->cache[NCACHE];
+	a = arp->cache;
+	t = a->utime;
+	for(f = a; f < e; f++){
+		if(f->utime < t){
+			t = f->utime;
+			a = f;
 		}
-		l = &f->hash;
 	}
+	cleanarpent(arp, a);
 
-	/* take out of re-transmit chain */
-	l = &arp->rxmt;
-	for(f = *l; f; f = f->nextrxt){
-		if(f == a){
-			*l = a->nextrxt;
-			break;
-		}
-		l = &f->nextrxt;
-	}
-	a->nextrxt = nil;
-	a->hash = nil;
-	a->hold = nil;
-	a->last = nil;
-	a->ifc = nil;
+	ipmove(a->ip, ip);
+	a->ifc = ifc;
+	a->ifcid = ifc->ifcid;
+
+	/* insert into new chain */
+	l = &arp->hash[haship(ip)];
+	a->hash = *l;
+	*l = a;
+
+	return a;
 }
 
+
 /*
  *  fill in the media address if we have it.  Otherwise return an
  *  Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
 {
 	int hash;
 	Arpent *a;
-	Medium *type = ifc->m;
 	uchar v6ip[IPaddrlen];
 
 	if(version == V4){
@@ -217,30 +193,28 @@
 
 	qlock(arp);
 	hash = haship(ip);
-	for(a = arp->hash[hash]; a; a = a->hash){
-		if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
-		if(type == a->type)
+	for(a = arp->hash[hash]; a != nil; a = a->hash){
+		if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
 			break;
 	}
-
 	if(a == nil){
-		a = newarp6(arp, ip, ifc, (version != V4));
+		a = newarpent(arp, ip, ifc);
 		a->state = AWAIT;
 	}
 	a->utime = NOW;
 	if(a->state == AWAIT){
 		if(bp != nil){
-			if(a->hold)
-				a->last->list = bp;
-			else
+			bp->list = nil; 
+			if(a->hold == nil)
 				a->hold = bp;
+			else
+				a->last->list = bp;
 			a->last = bp;
-			bp->list = nil; 
 		}
 		return a;		/* return with arp qlocked */
 	}
 
-	memmove(mac, a->mac, a->type->maclen);
+	memmove(mac, a->mac, ifc->m->maclen);
 
 	/* remove old entries */
 	if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
 arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
 {
 	Block *bp;
-	Arpent *f, **l;
 
-	if(!isv4(a->ip)){
-		l = &arp->rxmt;
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
-		}
-	}
-
 	memmove(a->mac, mac, type->maclen);
-	a->type = type;
+	if(a->state == AWAIT && !isv4(a->ip)){
+		rxmtunchain(arp, a);
+		a->rxtsrem = 0;
+	}
 	a->state = AOK;
-	a->utime = NOW;
+	a->ctime = a->utime = NOW;
 	bp = a->hold;
-	a->hold = nil;
+	a->hold = a->last = nil;
 	qunlock(arp);
 
 	return bp;
 }
 
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
 {
-	Arp *arp;
-	Route *r;
-	Arpent *a, *f, **l;
-	Ipifc *ifc;
-	Medium *type;
-	Block *bp, *next;
 	uchar v6ip[IPaddrlen];
+	Block *bp, *next;
+	Arpent *a;
+	Route *r;
+	Arp *arp;
 
-	arp = fs->arp;
+	if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+		return -1;
 
-	if(n != 6){
-//		print("arp: len = %d\n", n);
-		return;
-	}
-
 	switch(version){
 	case V4:
-		r = v4lookup(fs, ip, nil);
+		r = v4lookup(fs, ip, ia, nil);
 		v4tov6(v6ip, ip);
 		ip = v6ip;
 		break;
 	case V6:
-		r = v6lookup(fs, ip, nil);
+		r = v6lookup(fs, ip, ia, nil);
 		break;
 	default:
 		panic("arpenter: version %d", version);
-		return;	/* to supress warnings */
+		return -1;	/* to supress warnings */
 	}
 
-	if(r == nil){
-//		print("arp: no route for entry\n");
-		return;
-	}
+	if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+		return -1;
 
-	ifc = r->ifc;
-	type = ifc->m;
-
+	arp = fs->arp;
 	qlock(arp);
-	for(a = arp->hash[haship(ip)]; a; a = a->hash){
-		if(a->type != type || (a->state != AWAIT && a->state != AOK))
+	for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+		if(a->ifc != ifc || a->ifcid != ifc->ifcid)
 			continue;
-
 		if(ipcmp(a->ip, ip) == 0){
-			a->state = AOK;
-			memmove(a->mac, mac, type->maclen);
-
-			if(version == V6){
-				/* take out of re-transmit chain */
-				l = &arp->rxmt;
-				for(f = *l; f; f = f->nextrxt){
-					if(f == a){
-						*l = a->nextrxt;
-						break;
-					}
-					l = &f->nextrxt;
-				}
-			}
-
-			a->ifc = ifc;
-			a->ifcid = ifc->ifcid;
-			bp = a->hold;
-			a->hold = nil;
 			if(version == V4)
 				ip += IPv4off;
-			a->utime = NOW;
-			a->ctime = a->utime;
-			qunlock(arp);
-
-			while(bp){
+			bp = arpresolve(arp, a, ifc->m, mac);	/* unlocks arp */
+			for(; bp != nil; bp = next){
 				next = bp->list;
-				if(ifc != nil){
-					if(waserror()){
-						runlock(ifc);
-						nexterror();
-					}
-					rlock(ifc);
-					if(ifc->m != nil)
-						ifc->m->bwrite(ifc, bp, version, ip);
-					else
-						freeb(bp);
-					runlock(ifc);
-					poperror();
-				} else
-					freeb(bp);
-				bp = next;
+				bp->list = nil;
+				if(waserror()){
+					freeblistchain(next);
+					break;
+				}
+				ipifcoput(ifc, bp, version, ip);
+				poperror();
 			}
-			return;
+			return 1;
 		}
 	}
 
 	if(refresh == 0){
-		a = newarp6(arp, ip, ifc, 0);
+		a = newarpent(arp, ip, ifc);
 		a->state = AOK;
-		a->type = type;
-		a->ctime = NOW;
-		memmove(a->mac, mac, type->maclen);
+		a->ctime = a->utime = NOW;
+		memmove(a->mac, mac, n);
 	}
-
 	qunlock(arp);
+
+	return refresh == 0;
 }
 
 int
@@ -401,13 +325,12 @@
 arpwrite(Fs *fs, char *s, int len)
 {
 	int n;
-	Route *r;
 	Arp *arp;
-	Block *bp;
-	Arpent *a, *fl, **l;
+	Arpent *a, *x;
 	Medium *m;
-	char *f[4], buf[256];
-	uchar ip[IPaddrlen], mac[MAClen];
+	Ipifc *ifc;
+	char *f[5], buf[256];
+	uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
 
 	arp = fs->arp;
 
@@ -420,7 +343,7 @@
 	if(len > 0 && buf[len-1] == '\n')
 		buf[len-1] = 0;
 
-	n = getfields(buf, f, 4, 1, " ");
+	n = getfields(buf, f, nelem(f), 1, " ");
 	if(strcmp(f[0], "flush") == 0){
 		qlock(arp);
 		for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
 			memset(a->ip, 0, sizeof(a->ip));
 			memset(a->mac, 0, sizeof(a->mac));
 			a->hash = nil;
+			a->nextrxt = nil;
+			a->ifc = nil;
+			a->ifcid = 0;
 			a->state = 0;
+			a->rxtsrem = 0;
+			a->ctime = 0;
 			a->utime = 0;
-			while(a->hold != nil){
-				bp = a->hold->list;
-				freeblist(a->hold);
-				a->hold = bp;
-			}
+			freeblistchain(a->hold);
+			a->hold = a->last = nil;
 		}
 		memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+		freeblistchain(arp->dropf);
+		arp->dropf = arp->dropl = nil;
 		arp->rxmt = nil;
-		arp->dropf = nil;
-		arp->dropl = nil;
 		qunlock(arp);
 	} else if(strcmp(f[0], "add") == 0){
 		switch(n){
@@ -446,64 +370,53 @@
 		default:
 			error(Ebadarg);
 		case 3:
-			parseip(ip, f[1]);
-			if(isv4(ip))
-				r = v4lookup(fs, ip+IPv4off, nil);
-			else
-				r = v6lookup(fs, ip, nil);
-			if(r == nil)
-				error("Destination unreachable");
-			m = r->ifc->m;
-			n = parsemac(mac, f[2], m->maclen);
+			if(parseip(ip, f[1]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
 		case 4:
 			m = ipfindmedium(f[1]);
-			if(m == nil)
+			if(m == nil || m->maclen == 0)
 				error(Ebadarp);
-			parseip(ip, f[2]);
-			n = parsemac(mac, f[3], m->maclen);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
+		case 5:
+			m = ipfindmedium(f[1]);
+			if(m == nil || m->maclen == 0)
+				error(Ebadarp);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			if(parseip(ia, f[4]) == -1)
+				error(Ebadip);
+			break;
 		}
-
-		if(m->ares == nil)
-			error(Ebadarp);
-
-		m->ares(fs, V6, ip, mac, n, 0);
+		if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+			error("no interface");
+		rlock(ifc);
+		if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+			runlock(ifc);
+			error("destination unreachable");
+		}
+		runlock(ifc);
 	} else if(strcmp(f[0], "del") == 0){
-		if(n != 2)
+		if (n != 2)
 			error(Ebadarg);
-
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			error(Ebadip);
 		qlock(arp);
-
-		l = &arp->hash[haship(ip)];
-		for(a = *l; a; a = a->hash){
-			if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
-				*l = a->hash;
-				break;
-			}
-			l = &a->hash;
+		for(a = arp->hash[haship(ip)]; a != nil; a = x){
+			x = a->hash;
+			if(ipcmp(ip, a->ip) == 0)
+				cleanarpent(arp, a);
 		}
-	
-		if(a){
-			/* take out of re-transmit chain */
-			l = &arp->rxmt;
-			for(fl = *l; fl; fl = fl->nextrxt){
-				if(fl == a){
-					*l = a->nextrxt;
-					break;
-				}
-				l = &fl->nextrxt;
-			}
-
-			a->nextrxt = nil;
-			a->hash = nil;
-			a->hold = nil;
-			a->last = nil;
-			a->ifc = nil;
-			memset(a->ip, 0, sizeof(a->ip));
-			memset(a->mac, 0, sizeof(a->mac));
-		}
 		qunlock(arp);
 	} else
 		error(Ebadarp);
@@ -511,13 +424,6 @@
 	return len;
 }
 
-enum
-{
-	Alinelen=	90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
 static void
 convmac(char *p, uchar *mac, int n)
 {
@@ -526,136 +432,136 @@
 }
 
 int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
 {
+	char mac[2*MAClen+1], *state, *mname, *p;
+	uchar ip[IPaddrlen], ia[IPaddrlen];
+	Ipifc *ifc;
 	Arpent *a;
-	int n;
-	char mac[2*MAClen+1];
+	long n, o;
 
-	if(offset % Alinelen)
-		return 0;
-
-	offset = offset/Alinelen;
-	len = len/Alinelen;
-
-	n = 0;
+	p = s;
+	o = -offset;
 	for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
-		if(a->state == 0)
+		if(a->state == 0 || (ifc = a->ifc) == nil)
 			continue;
-		if(offset > 0){
-			offset--;
+
+		rlock(ifc);
+		qlock(arp);
+		state = arpstate[a->state];
+		ipmove(ip, a->ip);
+		if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+			qunlock(arp);
+			runlock(ifc);
 			continue;
 		}
-		len--;
-		qlock(arp);
-		convmac(mac, a->mac, a->type->maclen);
-		n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+		mname = ifc->m->name;
+		convmac(mac, a->mac, ifc->m->maclen);
 		qunlock(arp);
+		runlock(ifc);
+
+		n = snprint(up->genbuf, sizeof up->genbuf,
+			"%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+			mname, state, ip, mac, ia);
+		o += n;
+		if(o <= 0)
+			continue;
+		if(n > len)
+			break;
+		memmove(p, up->genbuf, n);
+		len -= n;
+		p += n;
 	}
 
-	return n;
+	return p - s;
 }
 
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
 {
-	uint sflag;
-	Block *next, *xp;
-	Arpent *a, *b, **l;
-	Fs *f;
-	uchar ipsrc[IPaddrlen];
-	Ipifc *ifc = nil;
-	long nrxt;
+	uchar targ[IPaddrlen], src[IPaddrlen];
+	Arpent **l;
 
-	qlock(arp);
-	f = arp->f;
+	a->ctime = NOW;
+	if(a->rxtsrem == 0)
+		a->rxtsrem = MAX_MULTICAST_SOLICIT;
+	else
+		a->rxtsrem--;
 
-	a = arp->rxmt;
-	if(a==nil){
-		nrxt = 0;
-		goto dodrops; 		//return nrxt;
-	}
-	nrxt = a->rtime - NOW;
-	if(nrxt > 3*ReTransTimer/4) 
-		goto dodrops; 		//return nrxt;
+	/* put on end of re-transmit chain */
+	for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+		;
+	*l = a;
 
-	for(; a; a = a->nextrxt){
-		ifc = a->ifc;
-		assert(ifc != nil);
-		if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
-			xp = a->hold;
-			a->hold = nil;
+	if(l == &f->arp->rxmt)
+		wakeup(&f->arp->rxmtq);
 
-			if(xp){
-				if(arp->dropl == nil) 
-					arp->dropf = xp;
-				else
-					arp->dropl->list = xp;
-			}
+	/* try to use source address of original packet */
+	ipmove(targ, a->ip);
+	if(a->last != nil){
+		ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+		arprelease(f->arp, a);
 
-			cleanarpent(arp, a);
-		}
-		else
-			break;
+		if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+			goto send;
+	} else {
+		arprelease(f->arp, a);
 	}
-	if(a == nil)
-		goto dodrops;
+	if(!ipv6local(ifc, src, 0, targ))
+		return;
+send:
+	if(!waserror()){
+		icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+		poperror();
+	}
+}
 
+static void
+rxmitsols(Arp *arp)
+{
+	Block *next, *bp;
+	Arpent *a;
+	Ipifc *ifc;
+	Route *r;
 
-	qunlock(arp);	/* for icmpns */
-	if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
-		icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
-
-	runlock(ifc);
-	qlock(arp);	
-
-	/* put to the end of re-transmit chain */
-	l = &arp->rxmt;
-	for(b = *l; b; b = b->nextrxt){
-		if(b == a){
-			*l = a->nextrxt;
-			break;
+	qlock(arp);
+	while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+		if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+			if(a->ifcid == ifc->ifcid){
+				ndpsendsol(arp->f, ifc, a);	/* unlocks arp */
+				runlock(ifc);
+				qlock(arp);
+				continue;
+			}
+			runlock(ifc);
 		}
-		l = &b->nextrxt;
+		cleanarpent(arp, a);
 	}
-	for(b = *l; b; b = b->nextrxt){
-		l = &b->nextrxt;
-	}
-	*l = a;
-	a->rxtsrem--;
-	a->nextrxt = nil;
-	a->rtime = NOW + ReTransTimer;
-
-	a = arp->rxmt;
-	if(a==nil)
-		nrxt = 0;
-	else 
-		nrxt = a->rtime - NOW;
-
-dodrops:
-	xp = arp->dropf;
-	arp->dropf = nil;
-	arp->dropl = nil;
+	bp = arp->dropf;
+	arp->dropf = arp->dropl = nil;
 	qunlock(arp);
 
-	for(; xp; xp = next){
-		next = xp->list;
-		icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+	for(; bp != nil; bp = next){
+		next = bp->list;
+		bp->list = nil;
+		r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+		if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+			if(!waserror()){
+				icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+				poperror();
+			}
+			runlock(ifc);
+		}
+		freeblist(bp);
 	}
-
-	return nrxt;
-
 }
 
 static int
 rxready(void *v)
 {
-	Arp *arp = (Arp *) v;
-	int x;
+	Arp *arp = (Arp *)v;
 
-	x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
-	return x;
+	return arp->rxmt != nil || arp->dropf != nil;
 }
 
 static void
@@ -662,20 +568,15 @@
 rxmitproc(void *v)
 {
 	Arp *arp = v;
-	long wakeupat;
 
 	arp->rxmitp = up;
-	//print("arp rxmitproc started\n");
 	if(waserror()){
-		arp->rxmitp = 0;
+		arp->rxmitp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;){
-		wakeupat = rxmitsols(arp);
-		if(wakeupat == 0) 
-			sleep(&arp->rxmtq, rxready, v); 
-		else if(wakeupat > ReTransTimer/4) 
-			tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+		sleep(&arp->rxmtq, rxready, v);
+		rxmitsols(arp);
+		tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
 	}
 }
-
--- a/os/ip/bootp.c
+++ /dev/null
@@ -1,231 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-
-static	ulong	fsip;
-static	ulong	auip;
-static	ulong	gwip;
-static	ulong	ipmask;
-static	ulong	ipaddr;
-
-enum
-{
-	Bootrequest = 1,
-	Bootreply   = 2,
-};
-
-typedef struct Bootp
-{
-	/* udp.c oldheader */
-	uchar	raddr[IPaddrlen];
-	uchar	laddr[IPaddrlen];
-	uchar	rport[2];
-	uchar	lport[2];
-	/* bootp itself */
-	uchar	op;		/* opcode */
-	uchar	htype;		/* hardware type */
-	uchar	hlen;		/* hardware address len */
-	uchar	hops;		/* hops */
-	uchar	xid[4];		/* a random number */
-	uchar	secs[2];	/* elapsed snce client started booting */
-	uchar	pad[2];
-	uchar	ciaddr[4];	/* client IP address (client tells server) */
-	uchar	yiaddr[4];	/* client IP address (server tells client) */
-	uchar	siaddr[4];	/* server IP address */
-	uchar	giaddr[4];	/* gateway IP address */
-	uchar	chaddr[16];	/* client hardware address */
-	uchar	sname[64];	/* server host name (optional) */
-	uchar	file[128];	/* boot file name */
-	uchar	vend[128];	/* vendor-specific goo */
-} Bootp;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d"
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
- */
-
-static	Bootp	req;
-static	Proc*	rcvprocp;
-static	int	recv;
-static	int	done;
-static	Rendez	bootpr;
-static	char	rcvbuf[512+2*IPaddrlen+2*2];
-
-static void
-rcvbootp(void *a)
-{
-	int n, fd;
-	Bootp *rp;
-	char *field[4];
-	uchar ip[IPaddrlen];
-
-	if(waserror())
-		pexit("", 0);
-	rcvprocp = up;	/* store for postnote below */
-	fd = (int)a;	/* at compilation: warning: ../ip/bootp.c:78 conversion of pointer to shorter integer */
-	while(done == 0) {
-		n = kread(fd, rcvbuf, sizeof(rcvbuf));
-		if(n <= 0)
-			break;
-		rp = (Bootp*)rcvbuf;
-		/* currently ignore udp's header */
-		if(memcmp(req.chaddr, rp->chaddr, 6) == 0
-		&& rp->htype == 1 && rp->hlen == 6
-		&& getfields((char*)rp->vend+4, field, 4, 1, " ") == 4
-		&& strncmp((char*)rp->vend, "p9  ", 4) == 0){
-			if(ipaddr == 0)
-				ipaddr = nhgetl(rp->yiaddr);
-			if(ipmask == 0)
-				ipmask = parseip(ip, field[0]);
-			if(fsip == 0)
-				fsip = parseip(ip, field[1]);
-			if(auip == 0)
-				auip = parseip(ip, field[2]);
-			if(gwip == 0)
-				gwip = parseip(ip, field[3]);
-			break;
-		}
-	}
-	poperror();
-	rcvprocp = nil;
-
-	recv = 1;
-	wakeup(&bootpr);
-	pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
-	int cfd, dfd, tries, n;
-	char ia[5+3*24], im[16], *av[3];
-	uchar nipaddr[4], ngwip[4], nipmask[4];
-	char dir[Maxpath];
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	cfd = kannounce("udp!*!68", dir);
-	if(cfd < 0)
-		return "bootp announce failed";
-	strcat(dir, "/data");
-	if(kwrite(cfd, "headers", 7) < 0){
-		kclose(cfd);
-		return "bootp ctl headers failed";
-	}
-	kwrite(cfd, "oldheaders", 10);
-	dfd = kopen(dir, ORDWR);
-	if(dfd < 0){
-		kclose(cfd);
-		return "bootp open data failed";
-	}
-	kclose(cfd);
-	
-
-	/* create request */
-	memset(&req, 0, sizeof(req));
-	ipmove(req.raddr, IPv4bcast);
-	hnputs(req.rport, 67);
-	req.op = Bootrequest;
-	req.htype = 1;			/* ethernet (all we know) */
-	req.hlen = 6;			/* ethernet (all we know) */
-
-	/* Hardware MAC address */
-	memmove(req.chaddr, ifc->mac, 6);
-	/* Fill in the local IP address if we know it */
-	ipv4local(ifc, req.ciaddr);
-	memset(req.file, 0, sizeof(req.file));
-	strcpy((char*)req.vend, "p9  ");
-
-	done = 0;
-	recv = 0;
-
-	kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
-
-	/*
-	 * broadcast bootp's till we get a reply,
-	 * or fixed number of tries
-	 */
-	tries = 0;
-	while(recv == 0) {
-		if(kwrite(dfd, &req, sizeof(req)) < 0)
-			print("bootp: write: %s\n", commonerror());
-
-		tsleep(&bootpr, return0, 0, 1000);
-		if(++tries > 10) {
-			print("bootp: timed out\n");
-			break;
-		}
-	}
-	kclose(dfd);
-	done = 1;
-	if(rcvprocp != nil){
-		postnote(rcvprocp, 1, "timeout", 0);
-		rcvprocp = nil;
-	}
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcrem(ifc, av, 3);
-
-	hnputl(nipaddr, ipaddr);
-	sprint(ia, "%V", nipaddr);
-	hnputl(nipmask, ipmask);
-	sprint(im, "%V", nipmask);
-	av[1] = ia;
-	av[2] = im;
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	if(gwip != 0) {
-		hnputl(ngwip, gwip);
-		n = snprint(ia, sizeof(ia), "add 0.0.0.0 0.0.0.0 %V", ngwip);
-		routewrite(ifc->conv->p->f, nil, ia, n);
-	}
-	return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
-	int n;
-	char *buf;
-	uchar a[4];
-
-	buf = smalloc(READSTR);
-	if(waserror()){
-		free(buf);
-		nexterror();
-	}
-	hnputl(a, fsip);
-	n = snprint(buf, READSTR, "fsip %15V\n", a);
-	hnputl(a, auip);
-	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
-	hnputl(a, gwip);
-	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
-	hnputl(a, ipmask);
-	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
-	hnputl(a, ipaddr);
-	snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
-
-	len = readstr(offset, bp, len, buf);
-	poperror();
-	free(buf);
-	return len;
-}
-
-char*	(*bootp)(Ipifc*) = rbootp;
-int	(*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/compress.c
+++ /dev/null
@@ -1,520 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-
-#include	"ip.h"
-#include	"ppp.h"
-
-typedef struct Iphdr	Iphdr;
-typedef struct Tcphdr	Tcphdr;
-typedef struct Ilhdr	Ilhdr;
-typedef struct Hdr	Hdr;
-typedef struct Tcpc	Tcpc;
-
-struct Iphdr
-{
-	uchar	vihl;		/* Version and header length */
-	uchar	tos;		/* Type of service */
-	uchar	length[2];	/* packet length */
-	uchar	id[2];		/* Identification */
-	uchar	frag[2];	/* Fragment information */
-	uchar	ttl;		/* Time to live */
-	uchar	proto;		/* Protocol */
-	uchar	cksum[2];	/* Header checksum */
-	ulong	src;		/* Ip source (byte ordering unimportant) */
-	ulong	dst;		/* Ip destination (byte ordering unimportant) */
-};
-
-struct Tcphdr
-{
-	ulong	ports;		/* defined as a ulong to make comparisons easier */
-	uchar	seq[4];
-	uchar	ack[4];
-	uchar	flag[2];
-	uchar	win[2];
-	uchar	cksum[2];
-	uchar	urg[2];
-};
-
-struct Ilhdr
-{
-	uchar	sum[2];	/* Checksum including header */
-	uchar	len[2];	/* Packet length */
-	uchar	type;		/* Packet type */
-	uchar	spec;		/* Special */
-	uchar	src[2];	/* Src port */
-	uchar	dst[2];	/* Dst port */
-	uchar	id[4];	/* Sequence id */
-	uchar	ack[4];	/* Acked sequence */
-};
-
-enum
-{
-	URG		= 0x20,		/* Data marked urgent */
-	ACK		= 0x10,		/* Aknowledge is valid */
-	PSH		= 0x08,		/* Whole data pipe is pushed */
-	RST		= 0x04,		/* Reset connection */
-	SYN		= 0x02,		/* Pkt. is synchronise */
-	FIN		= 0x01,		/* Start close down */
-
-	IP_DF		= 0x4000,	/* Don't fragment */
-
-	IP_TCPPROTO	= 6,
-	IP_ILPROTO	= 40,
-	IL_IPHDR	= 20,
-};
-
-struct Hdr
-{
-	uchar	buf[128];
-	Iphdr	*ip;
-	Tcphdr	*tcp;
-	int	len;
-};
-
-struct Tcpc
-{
-	uchar	lastrecv;
-	uchar	lastxmit;
-	uchar	basexmit;
-	uchar	err;
-	uchar	compressid;
-	Hdr	t[MAX_STATES];
-	Hdr	r[MAX_STATES];
-};
-
-enum
-{	/* flag bits for what changed in a packet */
-	NEW_U=(1<<0),	/* tcp only */
-	NEW_W=(1<<1),	/* tcp only */
-	NEW_A=(1<<2),	/* il tcp */
-	NEW_S=(1<<3),	/* tcp only */
-	NEW_P=(1<<4),	/* tcp only */
-	NEW_I=(1<<5),	/* il tcp */
-	NEW_C=(1<<6),	/* il tcp */
-	NEW_T=(1<<7),	/* il only */
-	TCP_PUSH_BIT	= 0x10,
-};
-
-/* reserved, special-case values of above for tcp */
-#define SPECIAL_I (NEW_S|NEW_W|NEW_U)		/* echoed interactive traffic */
-#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U)	/* unidirectional data */
-#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
-
-int
-encode(void *p, ulong n)
-{
-	uchar	*cp;
-
-	cp = p;
-	if(n >= 256 || n == 0) {
-		*cp++ = 0;
-		cp[0] = n >> 8;
-		cp[1] = n;
-		return 3;
-	} else 
-		*cp = n;
-	return 1;
-}
-
-#define DECODEL(f) { \
-	if (*cp == 0) {\
-		hnputl(f, nhgetl(f) + ((cp[1] << 8) | cp[2])); \
-		cp += 3; \
-	} else { \
-		hnputl(f, nhgetl(f) + (ulong)*cp++); \
-	} \
-}
-#define DECODES(f) { \
-	if (*cp == 0) {\
-		hnputs(f, nhgets(f) + ((cp[1] << 8) | cp[2])); \
-		cp += 3; \
-	} else { \
-		hnputs(f, nhgets(f) + (ulong)*cp++); \
-	} \
-}
-
-ushort
-tcpcompress(Tcpc *comp, Block *b, Fs *)
-{
-	Iphdr	*ip;		/* current packet */
-	Tcphdr	*tcp;		/* current pkt */
-	ulong 	iplen, tcplen, hlen;	/* header length in bytes */
-	ulong 	deltaS, deltaA;	/* general purpose temporaries */
-	ulong 	changes;	/* change mask */
-	uchar	new_seq[16];	/* changes from last to current */
-	uchar	*cp;
-	Hdr	*h;		/* last packet */
-	int 	i, j;
-
-	/*
-	 * Bail if this is not a compressible TCP/IP packet
-	 */
-	ip = (Iphdr*)b->rp;
-	iplen = (ip->vihl & 0xf) << 2;
-	tcp = (Tcphdr*)(b->rp + iplen);
-	tcplen = (tcp->flag[0] & 0xf0) >> 2;
-	hlen = iplen + tcplen;
-	if((tcp->flag[1] & (SYN|FIN|RST|ACK)) != ACK)
-		return Pip;	/* connection control */
-
-	/*
-	 * Packet is compressible, look for a connection
-	 */
-	changes = 0;
-	cp = new_seq;
-	j = comp->lastxmit;
-	h = &comp->t[j];
-	if(ip->src != h->ip->src || ip->dst != h->ip->dst
-	|| tcp->ports != h->tcp->ports) {
-		for(i = 0; i < MAX_STATES; ++i) {
-			j = (comp->basexmit + i) % MAX_STATES;
-			h = &comp->t[j];
-			if(ip->src == h->ip->src && ip->dst == h->ip->dst
-			&& tcp->ports == h->tcp->ports)
-				goto found;
-		}
-
-		/* no connection, reuse the oldest */
-		if(i == MAX_STATES) {
-			j = comp->basexmit;
-			j = (j + MAX_STATES - 1) % MAX_STATES;
-			comp->basexmit = j;
-			h = &comp->t[j];
-			goto raise;
-		}
-	}
-found:
-
-	/*
-	 * Make sure that only what we expect to change changed. 
-	 */
-	if(ip->vihl  != h->ip->vihl || ip->tos   != h->ip->tos ||
-	   ip->ttl   != h->ip->ttl  || ip->proto != h->ip->proto)
-		goto raise;	/* headers changed */
-	if(iplen != sizeof(Iphdr) && memcmp(ip+1, h->ip+1, iplen - sizeof(Iphdr)))
-		goto raise;	/* ip options changed */
-	if(tcplen != sizeof(Tcphdr) && memcmp(tcp+1, h->tcp+1, tcplen - sizeof(Tcphdr)))
-		goto raise;	/* tcp options changed */
-
-	if(tcp->flag[1] & URG) {
-		cp += encode(cp, nhgets(tcp->urg));
-		changes |= NEW_U;
-	} else if(memcmp(tcp->urg, h->tcp->urg, sizeof(tcp->urg)) != 0)
-		goto raise;
-	if(deltaS = nhgets(tcp->win) - nhgets(h->tcp->win)) {
-		cp += encode(cp, deltaS);
-		changes |= NEW_W;
-	}
-	if(deltaA = nhgetl(tcp->ack) - nhgetl(h->tcp->ack)) {
-		if(deltaA > 0xffff)
-			goto raise;
-		cp += encode(cp, deltaA);
-		changes |= NEW_A;
-	}
-	if(deltaS = nhgetl(tcp->seq) - nhgetl(h->tcp->seq)) {
-		if (deltaS > 0xffff)
-			goto raise;
-		cp += encode(cp, deltaS);
-		changes |= NEW_S;
-	}
-
-	/*
-	 * Look for the special-case encodings.
-	 */
-	switch(changes) {
-	case 0:
-		/*
-		 * Nothing changed. If this packet contains data and the last
-		 * one didn't, this is probably a data packet following an
-		 * ack (normal on an interactive connection) and we send it
-		 * compressed. Otherwise it's probably a retransmit,
-		 * retransmitted ack or window probe.  Send it uncompressed
-		 * in case the other side missed the compressed version.
-		 */
-		if(nhgets(ip->length) == nhgets(h->ip->length) ||
-		   nhgets(h->ip->length) != hlen)
-			goto raise;
-		break;
-	case SPECIAL_I:
-	case SPECIAL_D:
-		/*
-		 * Actual changes match one of our special case encodings --
-		 * send packet uncompressed.
-		 */
-		goto raise;
-	case NEW_S | NEW_A:
-		if (deltaS == deltaA &&
-			deltaS == nhgets(h->ip->length) - hlen) {
-			/* special case for echoed terminal traffic */
-			changes = SPECIAL_I;
-			cp = new_seq;
-		}
-		break;
-	case NEW_S:
-		if (deltaS == nhgets(h->ip->length) - hlen) {
-			/* special case for data xfer */
-			changes = SPECIAL_D;
-			cp = new_seq;
-		}
-		break;
-	}
-	deltaS = nhgets(ip->id) - nhgets(h->ip->id);
-	if(deltaS != 1) {
-		cp += encode(cp, deltaS);
-		changes |= NEW_I;
-	}
-	if (tcp->flag[1] & PSH)
-		changes |= TCP_PUSH_BIT;
-	/*
-	 * Grab the cksum before we overwrite it below. Then update our
-	 * state with this packet's header.
-	 */
-	deltaA = nhgets(tcp->cksum);
-	memmove(h->buf, b->rp, hlen);
-	h->len = hlen;
-	h->tcp = (Tcphdr*)(h->buf + iplen);
-
-	/*
-	 * We want to use the original packet as our compressed packet. (cp -
-	 * new_seq) is the number of bytes we need for compressed sequence
-	 * numbers. In addition we need one byte for the change mask, one
-	 * for the connection id and two for the tcp checksum. So, (cp -
-	 * new_seq) + 4 bytes of header are needed. hlen is how many bytes
-	 * of the original packet to toss so subtract the two to get the new
-	 * packet size. The temporaries are gross -egs.
-	 */
-	deltaS = cp - new_seq;
-	cp = b->rp;
-	if(comp->lastxmit != j || comp->compressid == 0) {
-		comp->lastxmit = j;
-		hlen -= deltaS + 4;
-		cp += hlen;
-		*cp++ = (changes | NEW_C);
-		*cp++ = j;
-	} else {
-		hlen -= deltaS + 3;
-		cp += hlen;
-		*cp++ = changes;
-	}
-	b->rp += hlen;
-	hnputs(cp, deltaA);
-	cp += 2;
-	memmove(cp, new_seq, deltaS);
-	return Pvjctcp;
-
-raise:
-	/*
-	 * Update connection state & send uncompressed packet
-	 */
-	memmove(h->buf, b->rp, hlen);
-	h->tcp = (Tcphdr*)(h->buf + iplen);
-	h->len = hlen;
-	h->ip->proto = j;
-	comp->lastxmit = j;
-	return Pvjutcp;
-}
-
-Block*
-tcpuncompress(Tcpc *comp, Block *b, ushort type, Fs *f)
-{
-	uchar	*cp, changes;
-	int	i;
-	int	iplen, len;
-	Iphdr	*ip;
-	Tcphdr	*tcp;
-	Hdr	*h;
-
-	if(type == Pvjutcp) {
-		/*
-		 *  Locate the saved state for this connection. If the state
-		 *  index is legal, clear the 'discard' flag.
-		 */
-		ip = (Iphdr*)b->rp;
-		if(ip->proto >= MAX_STATES)
-			goto raise;
-		iplen = (ip->vihl & 0xf) << 2;
-		tcp = (Tcphdr*)(b->rp + iplen);
-		comp->lastrecv = ip->proto;
-		len = iplen + ((tcp->flag[0] & 0xf0) >> 2);
-		comp->err = 0;
-netlog(f, Logcompress, "uncompressed %d\n", comp->lastrecv);
-		/*
-		 * Restore the IP protocol field then save a copy of this
-		 * packet header. The checksum is zeroed in the copy so we
-		 * don't have to zero it each time we process a compressed
-		 * packet.
-		 */
-		ip->proto = IP_TCPPROTO;
-		h = &comp->r[comp->lastrecv];
-		memmove(h->buf, b->rp, len);
-		h->tcp = (Tcphdr*)(h->buf + iplen);
-		h->len = len;
-		h->ip->cksum[0] = h->ip->cksum[1] = 0;
-		return b;
-	}
-
-	cp = b->rp;
-	changes = *cp++;
-	if(changes & NEW_C) {
-		/*
-		 * Make sure the state index is in range, then grab the
-		 * state. If we have a good state index, clear the 'discard'
-		 * flag.
-		 */
-		if(*cp >= MAX_STATES)
-			goto raise;
-		comp->err = 0;
-		comp->lastrecv = *cp++;
-netlog(f, Logcompress, "newc %d\n", comp->lastrecv);
-	} else {
-		/*
-		 * This packet has no state index. If we've had a
-		 * line error since the last time we got an explicit state
-		 * index, we have to toss the packet.
-		 */
-		if(comp->err != 0){
-			freeblist(b);
-			return nil;
-		}
-netlog(f, Logcompress, "oldc %d\n", comp->lastrecv);
-	}
-
-	/*
-	 * Find the state then fill in the TCP checksum and PUSH bit.
-	 */
-	h = &comp->r[comp->lastrecv];
-	ip = h->ip;
-	tcp = h->tcp;
-	len = h->len;
-	memmove(tcp->cksum, cp, sizeof tcp->cksum);
-	cp += 2;
-	if(changes & TCP_PUSH_BIT)
-		tcp->flag[1] |= PSH;
-	else
-		tcp->flag[1] &= ~PSH;
-	/*
-	 * Fix up the state's ack, seq, urg and win fields based on the
-	 * changemask.
-	 */
-	switch (changes & SPECIALS_MASK) {
-	case SPECIAL_I:
-		i = nhgets(ip->length) - len;
-		hnputl(tcp->ack, nhgetl(tcp->ack) + i);
-		hnputl(tcp->seq, nhgetl(tcp->seq) + i);
-		break;
-
-	case SPECIAL_D:
-		hnputl(tcp->seq, nhgetl(tcp->seq) + nhgets(ip->length) - len);
-		break;
-
-	default:
-		if(changes & NEW_U) {
-			tcp->flag[1] |= URG;
-			if(*cp == 0){
-				hnputs(tcp->urg, nhgets(cp+1));
-				cp += 3;
-			}else
-				hnputs(tcp->urg, *cp++);
-		} else
-			tcp->flag[1] &= ~URG;
-		if(changes & NEW_W)
-			DECODES(tcp->win)
-		if(changes & NEW_A)
-			DECODEL(tcp->ack)
-		if(changes & NEW_S)
-			DECODEL(tcp->seq)
-		break;
-	}
-
-	/* Update the IP ID */
-	if(changes & NEW_I)
-		DECODES(ip->id)
-	else
-		hnputs(ip->id, nhgets(ip->id) + 1);
-
-	/*
-	 *  At this point, cp points to the first byte of data in the packet.
-	 *  Back up cp by the TCP/IP header length to make room for the
-	 *  reconstructed header.
-	 *  We assume the packet we were handed has enough space to prepend
-	 *  up to 128 bytes of header.
-	 */
-	b->rp = cp;
-	if(b->rp - b->base < len){
-		b = padblock(b, len);
-		b = pullupblock(b, blocklen(b));
-	} else
-		b->rp -= len;
-	hnputs(ip->length, BLEN(b));
-	memmove(b->rp, ip, len);
-	
-	/* recompute the ip header checksum */
-	ip = (Iphdr*)b->rp;
-	hnputs(ip->cksum, ipcsum(b->rp));
-	return b;
-
-raise:
-	netlog(f, Logcompress, "Bad Packet!\n");
-	comp->err = 1;
-	freeblist(b);
-	return nil;
-}
-
-Tcpc*
-compress_init(Tcpc *c)
-{
-	int i;
-	Hdr *h;
-
-	if(c == nil){
-		c = malloc(sizeof(Tcpc));
-		if(c == nil)
-			return nil;
-	}
-	memset(c, 0, sizeof(*c));
-	for(i = 0; i < MAX_STATES; i++){
-		h = &c->t[i];
-		h->ip = (Iphdr*)h->buf;
-		h->tcp = (Tcphdr*)(h->buf + 10);
-		h->len = 20;
-		h = &c->r[i];
-		h->ip = (Iphdr*)h->buf;
-		h->tcp = (Tcphdr*)(h->buf + 10);
-		h->len = 20;
-	}
-
-	return c;
-}
-
-ushort
-compress(Tcpc *tcp, Block *b, Fs *f)
-{
-	Iphdr		*ip;
-
-	/*
-	 * Bail if this is not a compressible IP packet
-	 */
-	ip = (Iphdr*)b->rp;
-	if((nhgets(ip->frag) & 0x3fff) != 0)
-		return Pip;
-
-	switch(ip->proto) {
-	case IP_TCPPROTO:
-		return tcpcompress(tcp, b, f);
-	default:
-		return Pip;
-	}
-}
-
-int
-compress_negotiate(Tcpc *tcp, uchar *data)
-{
-	if(data[0] != MAX_STATES - 1)
-		return -1;
-	tcp->compressid = data[1];
-	return 0;
-}
--- a/os/ip/devip.c
+++ b/os/ip/devip.c
@@ -14,7 +14,6 @@
 	Qbootp,
 	Qndb,
 	Qiproute,
-	Qiprouter,
 	Qipselftab,
 	Qlog,
 
@@ -43,11 +42,11 @@
 	Maskproto=	(1<<Logproto)-1,
 	Shiftproto=	Logtype + Logconv,
 
-	Nfs=		32,
+	Nfs=		128,
 };
-#define TYPE(x) 	( ((u32)(x).path) & Masktype )
-#define CONV(x) 	( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) 	( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) 	( ((ulong)(x).path) & Masktype )
+#define CONV(x) 	( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((ulong)(x).path) >> Shiftproto) & Maskproto )
 #define QID(p, c, y) 	( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
 
 static char network[] = "network";
@@ -58,8 +57,7 @@
 
 extern	void nullmediumlink(void);
 extern	void pktmediumlink(void);
-static	long ndbwrite(Fs*, char*, ulong, int);
-extern void    closeconv(Conv*);
+	long ndbwrite(Fs *f, char *a, ulong off, int n);
 
 static int
 ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
 		mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
 		devdir(c, q, "stats", 0, network, 0444, dp);
 		return 1;
-	}	
+	}
 	return -1;
 }
 
@@ -144,11 +142,10 @@
 		return -1;
 	case Qarp:
 		p = "arp";
+		prot = 0664;
 		break;
 	case Qbootp:
 		p = "bootp";
-		if(bootp == nil)
-			return 0;
 		break;
 	case Qndb:
 		p = "ndb";
@@ -157,14 +154,12 @@
 		break;
 	case Qiproute:
 		p = "iproute";
+		prot = 0664;
 		break;
 	case Qipselftab:
 		p = "ipselftab";
 		prot = 0444;
 		break;
-	case Qiprouter:
-		p = "iprouter";
-		break;
 	case Qlog:
 		p = "log";
 		break;
@@ -188,7 +183,7 @@
 	case Qtopdir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
@@ -206,19 +201,18 @@
 	case Qndb:
 	case Qlog:
 	case Qiproute:
-	case Qiprouter:
 	case Qipselftab:
 		return ip1gen(c, TYPE(c->qid), dp);
 	case Qprotodir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
 		if(s < f->p[PROTO(c->qid)]->ac) {
 			cv = f->p[PROTO(c->qid)]->conv[s];
-			sprint(up->genbuf, "%d", s);
+			snprint(up->genbuf, sizeof up->genbuf, "%d", s);
 			mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
 			devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
 			return 1;
@@ -262,45 +256,14 @@
 	fmtinstall('M', eipfmt);
 }
 
-static Fs*
-ipgetfs(int dev)
-{
-	extern void (*ipprotoinit[])(Fs*);
-	Fs *f;
-	int i;
-
-	if(dev >= Nfs)
-		return nil;
-
-	qlock(&fslock);
-	if(ipfs[dev] == nil){
-		f = smalloc(sizeof(Fs));
-		ip_init(f);
-		arpinit(f);
-		netloginit(f);
-		for(i = 0; ipprotoinit[i]; i++)
-			ipprotoinit[i](f);
-		f->dev = dev;
-		ipfs[dev] = f;
-	}
-	qunlock(&fslock);
-
-	return ipfs[dev];
-}
-
 IPaux*
 newipaux(char *owner, char *tag)
 {
 	IPaux *a;
-	int n;
 
 	a = smalloc(sizeof(*a));
 	kstrdup(&a->owner, owner);
-	memset(a->tag, ' ', sizeof(a->tag));
-	n = strlen(tag);
-	if(n > sizeof(a->tag))
-		n = sizeof(a->tag);
-	memmove(a->tag, tag, n);
+	strncpy(a->tag, tag, sizeof(a->tag));
 	return a;
 }
 
@@ -310,13 +273,29 @@
 ipattach(char* spec)
 {
 	Chan *c;
-	int dev;
+	ulong dev;
 
-	dev = atoi(spec);
+	dev = strtoul(spec, nil, 10);
 	if(dev >= Nfs)
-		error("bad specification");
+		error(Enodev);
 
-	ipgetfs(dev);
+	qlock(&fslock);
+	if(ipfs[dev] == nil){
+		extern void (*ipprotoinit[])(Fs*);
+		Fs *f;
+		int i;
+
+		f = smalloc(sizeof(Fs));
+		ip_init(f);
+		arpinit(f);
+		netloginit(f);
+		for(i = 0; ipprotoinit[i]; i++)
+			ipprotoinit[i](f);
+		f->dev = dev;
+		ipfs[dev] = f;
+	}
+	qunlock(&fslock);
+
 	c = devattach('I', spec);
 	mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
 	c->dev = dev;
@@ -327,7 +306,7 @@
 }
 
 static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
 {
 	IPaux *a = c->aux;
 	Walkqid* w;
@@ -338,8 +317,9 @@
 	return w;
 }
 
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
 {
 	return devstat(c, db, n, nil, 0, ipgen);
 }
@@ -360,7 +340,7 @@
 };
 
 static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
 {
 	Conv *cv, *nc;
 	Proto *p;
@@ -375,7 +355,7 @@
 	default:
 		break;
 	case Qndb:
-		if(omode & (OWRITE|OTRUNC) && !iseve())
+		if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
 			error(Eperm);
 		if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
 			f->ndb[0] = 0;
@@ -383,10 +363,10 @@
 	case Qlog:
 		netlogopen(f);
 		break;
-	case Qiprouter:
-		iprouteropen(f);
-		break;
 	case Qiproute:
+	case Qarp:
+		if(omode != OREAD && !iseve())
+			error(Eperm);
 		break;
 	case Qtopdir:
 	case Qprotodir:
@@ -412,13 +392,8 @@
 	case Qclone:
 		p = f->p[PROTO(c->qid)];
 		qlock(p);
-		if(waserror()){
-			qunlock(p);
-			nexterror();
-		}
 		cv = Fsprotoclone(p, ATTACHER(c));
 		qunlock(p);
-		poperror();
 		if(cv == nil) {
 			error(Enodev);
 			break;
@@ -437,15 +412,12 @@
 			qunlock(p);
 			nexterror();
 		}
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
-		}
-		cv->inuse++;
-		if(cv->inuse == 1){
+		if(++cv->inuse == 1){
 			kstrdup(&cv->owner, ATTACHER(c));
 			cv->perm = 0660;
 		}
@@ -455,24 +427,26 @@
 		break;
 	case Qlisten:
 		cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
-
+		qlock(cv);
+		if(waserror()){
+			qunlock(cv);
+			nexterror();
 		}
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
 		if(cv->state != Announced)
 			error("not announced");
 
+		cv->inuse++;
+		qunlock(cv);
+		poperror();
 		if(waserror()){
 			closeconv(cv);
 			nexterror();
 		}
-		qlock(cv);
-		cv->inuse++;
-		qunlock(cv);
 
 		nc = nil;
 		while(nc == nil) {
@@ -494,7 +468,6 @@
 			if(nc != nil){
 				cv->incall = nc->next;
 				mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
-				kstrdup(&cv->owner, ATTACHER(c));
 			}
 			qunlock(cv);
 
@@ -511,13 +484,25 @@
 	return c;
 }
 
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
 {
-	Dir *d;
+	error(Eperm);
+	return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+	error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+	Dir *dir;
 	Conv *cv;
 	Fs *f;
-	Proto *p;
 
 	f = ipfs[c->dev];
 	switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
 		break;
 	}
 
-	d = smalloc(sizeof(*d)+n);
+	dir = smalloc(sizeof(Dir)+n);
 	if(waserror()){
-		free(d);
+		free(dir);
 		nexterror();
 	}
-	n = convM2D(dp, n, d, (char*)&d[1]);
+	n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
 	if(n == 0)
 		error(Eshortstat);
-	p = f->p[PROTO(c->qid)];
-	cv = p->conv[CONV(c->qid)];
-	if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+	cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+	qlock(cv);
+	if(waserror()){
+		qunlock(cv);
+		nexterror();
+	}
+	if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
 		error(Eperm);
-	if(!emptystr(d->uid))
-		kstrdup(&cv->owner, d->uid);
-	if(d->mode != ~0UL)
-		cv->perm = d->mode & 0777;
+	if(!emptystr(dir->uid)){
+		if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+			error(Eperm);
+		kstrdup(&cv->owner, dir->uid);
+	}
+	if(dir->mode != ~0UL)
+		cv->perm = dir->mode & 0666;
+	qunlock(cv);
 	poperror();
-	free(d);
+
+	free(dir);
+	poperror();
+
 	return n;
 }
 
-extern void
+void
 closeconv(Conv *cv)
 {
 	Conv *nc;
@@ -564,7 +561,7 @@
 	}
 
 	/* close all incoming calls since no listen will ever happen */
-	for(nc = cv->incall; nc; nc = cv->incall){
+	for(nc = cv->incall; nc != nil; nc = cv->incall){
 		cv->incall = nc->next;
 		closeconv(nc);
 	}
@@ -576,9 +573,9 @@
 	while((mp = cv->multi) != nil)
 		ipifcremmulti(cv, mp->ma, mp->ia);
 
-	cv->r = nil;
-	cv->rgen = 0;
-	cv->p->close(cv);
+	if(cv->p->close != nil)
+		(*cv->p->close)(cv);
+
 	cv->state = Idle;
 	qunlock(cv);
 }
@@ -596,10 +593,6 @@
 		if(c->flag & COPEN)
 			netlogclose(f);
 		break;
-	case Qiprouter:
-		if(c->flag & COPEN)
-			iprouterclose(f);
-		break;
 	case Qdata:
 	case Qctl:
 	case Qerr:
@@ -620,13 +613,13 @@
 	Statelen=	32*1024,
 };
 
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
 	char *buf, *p;
-	s32 rv;
+	long rv;
 	Fs *f;
 	ulong offset = off;
 
@@ -648,21 +641,22 @@
 		return readstr(offset, a, n, f->ndb);
 	case Qiproute:
 		return routeread(f, a, offset, n);
-	case Qiprouter:
-		return iprouterread(f, a, n);
 	case Qipselftab:
 		return ipselftabread(f, a, offset, n);
 	case Qlog:
 		return netlogread(f, a, offset, n);
 	case Qctl:
-		sprint(up->genbuf, "%ud", CONV(ch->qid));
-		return readstr(offset, p, n, up->genbuf);
+		buf = smalloc(16);
+		snprint(buf, 16, "%lud", CONV(ch->qid));
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
 	case Qremote:
 		buf = smalloc(Statelen);
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->remote == nil) {
-			sprint(buf, "%I!%d\n", c->raddr, c->rport);
+			snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
 		} else {
 			(*x->remote)(c, buf, Statelen-2);
 		}
@@ -674,7 +668,7 @@
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->local == nil) {
-			sprint(buf, "%I!%d\n", c->laddr, c->lport);
+			snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
 		} else {
 			(*x->local)(c, buf, Statelen-2);
 		}
@@ -711,7 +705,7 @@
 }
 
 static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
 {
 	Conv *c;
 	Proto *x;
@@ -740,7 +734,7 @@
 /*
  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
  */
-static char*
+char*
 setluniqueport(Conv* c, int lport)
 {
 	Proto *p;
@@ -771,51 +765,63 @@
 }
 
 /*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+	int x;
+
+	for(x = 0; x < p->nc && p->conv[x]; x++)
+		if(p->conv[x]->lport == lport)
+			return 1;
+	return 0;
+}
+
+/*
  *  pick a local port and set it
  */
-extern void
+char *
 setlport(Conv* c)
 {
 	Proto *p;
-	ushort *pp;
-	int x, found;
+	int i, port;
 
 	p = c->p;
-	if(c->restricted)
-		pp = &p->nextrport;
-	else
-		pp = &p->nextport;
 	qlock(p);
-	for(;;(*pp)++){
+	if(c->restricted){
+		/* Restricted ports cycle between 600 and 1024. */
+		for(i=0; i<1024-600; i++){
+			if(p->nextrport >= 1024 || p->nextrport < 600)
+				p->nextrport = 600;
+			port = p->nextrport++;
+			if(!lportinuse(p, port))
+				goto chosen;
+		}
+	}else{
 		/*
-		 * Fsproto initialises p->nextport to 0 and the restricted
-		 * ports (p->nextrport) to 600.
-		 * Restricted ports must lie between 600 and 1024.
-		 * For the initial condition or if the unrestricted port number
-		 * has wrapped round, select a random port between 5000 and 1<<15
-		 * to start at.
+		 * Unrestricted ports are chosen randomly
+		 * between 2^15 and 2^16.  There are at most
+		 * 4*Nchan = 4096 ports in use at any given time,
+		 * so even in the worst case, a random probe has a
+		 * 1 - 4096/2^15 = 87% chance of success.
+		 * If 64 successive probes fail, there is a bug somewhere
+		 * (or a once in 10^58 event has happened, but that's
+		 * less likely than a venti collision).
 		 */
-		if(c->restricted){
-			if(*pp >= 1024)
-				*pp = 600;
+		for(i=0; i<64; i++){
+			port = (1<<15) + nrand(1<<15);
+			if(!lportinuse(p, port))
+				goto chosen;
 		}
-		else while(*pp < 5000)
-			*pp = nrand(1<<15);
-
-		found = 0;
-		for(x = 0; x < p->nc; x++){
-			if(p->conv[x] == nil)
-				break;
-			if(p->conv[x]->lport == *pp){
-				found = 1;
-				break;
-			}
-		}
-		if(found == 0)
-			break;
 	}
-	c->lport = (*pp)++;
 	qunlock(p);
+	return "no ports available";
+
+chosen:
+	c->lport = port;
+	qunlock(p);
+	return nil;
 }
 
 /*
@@ -822,7 +828,7 @@
  *  set a local address and port from a string of the form
  *	[address!]port[!r]
  */
-static char*
+char*
 setladdrport(Conv* c, char* str, int announcing)
 {
 	char *p;
@@ -830,8 +836,6 @@
 	ushort lport;
 	uchar addr[IPaddrlen];
 
-	rv = nil;
-
 	/*
 	 *  ignore restricted part if it exists.  it's
 	 *  meaningless on local ports.
@@ -854,8 +858,9 @@
 		if(strcmp(str, "*") == 0)
 			ipmove(c->laddr, IPnoaddr);
 		else {
-			parseip(addr, str);
-			if(ipforme(c->p->f, addr))
+			if(parseip(addr, str) == -1)
+				return Ebadip;
+			if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
 				ipmove(c->laddr, addr);
 			else
 				return "not a local IP address";
@@ -869,9 +874,13 @@
 		return setluniqueport(c, 0);
 	}
 
-	lport = atoi(p);
+	str = p;
+	lport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
 	if(lport <= 0)
-		setlport(c);
+		rv = setlport(c);
 	else
 		rv = setluniqueport(c, lport);
 	return rv;
@@ -886,13 +895,17 @@
 	if(p == nil)
 		return "malformed address";
 	*p++ = 0;
-	parseip(c->raddr, str);
-	c->rport = atoi(p);
-	p = strchr(p, '!');
-	if(p){
-		if(strstr(p, "!r") != nil)
-			c->restricted = 1;
-	}
+	if(parseip(c->raddr, str) == -1)
+		return Ebadip;
+
+	str = p;
+	c->rport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
+	if(strstr(p, "!r") != nil)
+		c->restricted = 1;
+
 	return nil;
 }
 
@@ -912,7 +925,9 @@
 		if(p != nil)
 			return p;
 		setladdr(c);
-		setlport(c);
+		p = setlport(c);
+		if (p != nil)
+			return p;
 		break;
 	case 3:
 		p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
 			return p;
 	}
 
-	if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-		memcmp(c->laddr, v4prefix, IPv4off) == 0)
-		|| ipcmp(c->raddr, IPnoaddr) == 0)
-		c->ipversion = V4;
-	else
-		c->ipversion = V6;
+	c->ipversion = convipvers(c);
 
 	return nil;
 }
@@ -978,10 +988,11 @@
 	c->rport = 0;
 	switch(argc){
 	default:
-		return "bad args to announce";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 1);
 	}
+	return "bad args to announce";
 }
 
 /*
@@ -1028,10 +1039,11 @@
 {
 	switch(argc){
 	default:
-		return "bad args to bind";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 0);
 	}
+	return "bad args to bind";
 }
 
 static void
@@ -1042,7 +1054,7 @@
 	if(x->bind == nil)
 		p = Fsstdbind(c, cb->f, cb->nf);
 	else
-		p = x->bind(c, cb->f, cb->nf);
+		p = (*x->bind)(c, cb->f, cb->nf);
 	if(p != nil)
 		error(p);
 }
@@ -1065,8 +1077,8 @@
 		c->ttl = atoi(cb->f[1]);
 }
 
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
@@ -1075,6 +1087,7 @@
 	uchar ia[IPaddrlen], ma[IPaddrlen];
 	Fs *f;
 	char *a;
+	ulong offset = off;
 
 	a = v;
 	f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
 		netlogctl(f, a, n);
 		return n;
 	case Qndb:
-		return ndbwrite(f, a, off, n);
+		return ndbwrite(f, a, offset, n);
+		break;
 	case Qctl:
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
 			if(cb->nf == 2){
 				if(!ipismulticast(c->raddr))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
+				if (parseip(ia, cb->f[1]) == -1)
+					error(Ebadip);
 				ipifcaddmulti(c, c->raddr, ia);
 			} else {
-				parseip(ma, cb->f[2]);
+				if (parseip(ia, cb->f[1]) == -1 ||
+				    parseip(ma, cb->f[2]) == -1)
+					error(Ebadip);
 				if(!ipismulticast(ma))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
 				ipifcaddmulti(c, ma, ia);
 			}
 		} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
 				error("remmulti needs interface address");
 			if(!ipismulticast(c->raddr))
 				error("remmulti for a non multicast address");
-			parseip(ia, cb->f[1]);
+			if (parseip(ia, cb->f[1]) == -1)
+				error(Ebadip);
 			ipifcremmulti(c, c->raddr, ia);
 		} else if(x->ctl != nil) {
-			p = x->ctl(c, cb->f, cb->nf);
+			p = (*x->ctl)(c, cb->f, cb->nf);
 			if(p != nil)
 				error(p);
 		} else
@@ -1160,13 +1177,12 @@
 	return n;
 }
 
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
 {
 	Conv *c;
 	Proto *x;
 	Fs *f;
-	int n;
 
 	switch(TYPE(ch->qid)){
 	case Qdata:
@@ -1177,11 +1193,7 @@
 		if(c->wq == nil)
 			error(Eperm);
 
-		if(bp->next)
-			bp = concatblock(bp);
-		n = BLEN(bp);
-		qbwrite(c->wq, bp);
-		return n;
+		return qbwrite(c->wq, bp);
 	default:
 		return devbwrite(ch, bp, offset);
 	}
@@ -1198,13 +1210,13 @@
 	ipwalk,
 	ipstat,
 	ipopen,
-	devcreate,
+	ipcreate,
 	ipclose,
 	ipread,
 	ipbread,
 	ipwrite,
 	ipbwrite,
-	devremove,
+	ipremove,
 	ipwstat,
 };
 
@@ -1224,12 +1236,15 @@
 
 	p->qid.type = QTDIR;
 	p->qid.path = QID(f->np, 0, Qprotodir);
+	if(p->nc > Maskconv+1){
+		print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+		p->nc = Maskconv+1;
+	}
 	p->conv = malloc(sizeof(Conv*)*(p->nc+1));
 	if(p->conv == nil)
 		panic("Fsproto");
 
 	p->x = f->np;
-	p->nextport = 0;
 	p->nextrport = 600;
 	f->p[f->np++] = p;
 
@@ -1262,21 +1277,33 @@
 		if(c == nil){
 			c = malloc(sizeof(Conv));
 			if(c == nil)
-				error(Enomem);
-			qlock(c);
+				return nil;
+			if(waserror()){
+				qfree(c->rq);
+				qfree(c->wq);
+				qfree(c->eq);
+				qfree(c->sq);
+				free(c->ptcl);
+				free(c);
+				return nil;
+			}
 			c->p = p;
 			c->x = pp - p->conv;
 			if(p->ptclsize != 0){
 				c->ptcl = malloc(p->ptclsize);
-				if(c->ptcl == nil) {
-					free(c);
+				if(c->ptcl == nil)
 					error(Enomem);
-				}
 			}
-			*pp = c;
-			p->ac++;
 			c->eq = qopen(1024, Qmsg, 0, 0);
+			if(c->eq == nil)
+				error(Enomem);
 			(*p->create)(c);
+			if(c->rq == nil || c->wq == nil)
+				error(Enomem);
+			poperror();
+			qlock(c);
+			*pp = c;
+			p->ac++;
 			break;
 		}
 		if(canqlock(c)){
@@ -1291,8 +1318,11 @@
 		}
 	}
 	if(pp >= ep) {
-		if(p->gc != nil && (*p->gc)(p))
-			goto retry;
+		if(p->gc != nil){
+			print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+			if((*p->gc)(p))
+				goto retry;
+		}
 		return nil;
 	}
 
@@ -1307,8 +1337,9 @@
 	c->lport = 0;
 	c->rport = 0;
 	c->restricted = 0;
+	c->ignoreadvice = 0;
 	c->ttl = MAXTTL;
-	c->tos = DFLTTOS;
+	c->tos = 0;
 	qreopen(c->rq);
 	qreopen(c->wq);
 	qreopen(c->eq);
@@ -1321,7 +1352,7 @@
 Fsconnected(Conv* c, char* msg)
 {
 	if(msg != nil && *msg != '\0')
-		kstrcpy(c->cerr, msg, sizeof(c->cerr));
+		strncpy(c->cerr, msg, ERRMAX-1);
 
 	switch(c->state){
 
@@ -1368,12 +1399,19 @@
 	for(l = &c->incall; *l; l = &(*l)->next)
 		i++;
 	if(i >= Maxincall) {
+		static int beenhere;
+
 		qunlock(c);
+		if (!beenhere) {
+			beenhere = 1;
+			print("Fsnewcall: incall queue full (%d) on port %d\n",
+				i, c->lport);
+		}
 		return nil;
 	}
 
 	/* find a free conversation */
-	nc = Fsprotoclone(c->p, network);
+	nc = Fsprotoclone(c->p, c->owner);
 	if(nc == nil) {
 		qunlock(c);
 		return nil;
@@ -1394,12 +1432,12 @@
 	return nc;
 }
 
-static long
+long
 ndbwrite(Fs *f, char *a, ulong off, int n)
 {
 	if(off > strlen(f->ndb))
 		error(Eio);
-	if(off+n >= sizeof(f->ndb)-1)
+	if(off+n >= sizeof(f->ndb))
 		error(Eio);
 	memmove(f->ndb+off, a, n);
 	f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
 ulong
 scalednconv(void)
 {
-	if(conf.npage*BY2PG >= 128*MB)
+	if(cpuserver && conf.npage*BY2PG >= 128*MB)
 		return Nchans*4;
 	return Nchans;
 }
--- a/os/ip/dhcp.c
+++ /dev/null
@@ -1,447 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-#include "ppp.h"
-
-Ipaddr pppdns[2];
-
-static	ulong	fsip;
-static	ulong	auip;
-static	ulong	gwip;
-static	ulong	ipmask;
-static	ulong	ipaddr;
-static	ulong	dns1ip;
-static	ulong	dns2ip;
-
-int		dhcpmsgtype;
-int		debug=0;
-enum
-{
-	Bootrequest = 1,
-	Bootreply   = 2,
-};
-
-typedef struct Bootp
-{
-	/* udp.c oldheader */
-	uchar	raddr[IPaddrlen];
-	uchar	laddr[IPaddrlen];
-	uchar	rport[2];
-	uchar	lport[2];
-	/* bootp itself */
-	uchar	op;			/* opcode */
-	uchar	htype;		/* hardware type */
-	uchar	hlen;			/* hardware address len */
-	uchar	hops;		/* hops */
-	uchar	xid[4];		/* a random number */
-	uchar	secs[2];		/* elapsed snce client started booting */
-	uchar	flags[2];		/* flags */
-	uchar	ciaddr[4];		/* client IP address (client tells server) */
-	uchar	yiaddr[4];		/* client IP address (server tells client) */
-	uchar	siaddr[4];		/* server IP address */
-	uchar	giaddr[4];		/* gateway IP address */
-	uchar	chaddr[16];	/* client hardware address */
-	uchar	sname[64];	/* server host name (optional) */
-	uchar	file[128];		/* boot file name */
-	uchar	vend[128];	/* vendor-specific goo 340 */
-} Bootp;
-
-static	Bootp	req;
-static	Proc*	rcvprocp;
-static	int	recv;
-static	int	done;
-static	Rendez	bootpr;
-static	char	rcvbuf[512+2*IPaddrlen+2*2];	  /* 576 */
-static	uchar sid[4];
-static	ulong iplease;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d
- * dns1ip	d.d.d.d
- * dns2ip	d.d.d.d
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
-	Last change:  SUN  13 Sep 2001    4:36 pm
- */
-
-/*
- * Parse the vendor specific fields according to RFC 1084.
- * We are overloading the "cookie server" to be the Inferno 
- * authentication server and the "resource location server"
- * to be the Inferno file server.
- *
- * If the vendor specific field is formatted properly, it
- * will being with the four bytes 99.130.83.99 and end with
- * an 0xFF byte.
- */
-static int
-parsevend(uchar* pvend)
-{	
-	uchar *vend=pvend;
-	int dhcpmsg=0;
-	/* The field must start with 99.130.83.99 to be compliant */
-	if ((vend[0] != 99) || (vend[1] != 130) || (vend[2] != 83) || (vend[3] != 99)){
-		print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
-		return -1;
-	}
-
-	/* Skip over the magic cookie */
-	vend += 4;
-
-	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
-		int i;
-//	
-		if(debug){
-			print(">>>Opt[%d] [%d]", vend[0], vend[1]);
-			for(i=0; i<vend[1]; i++)
-				print(" %2.2x", vend[i+2]);
-			print("\n");
-		}
-//
-		switch (vend[0]) {
-		case 1:	/* Subnet mask field */
-			/* There must be only one subnet mask */
-			if (vend[1] == 4)
-				ipmask = (vend[2]<<24)|(vend[3]<<16)| (vend[4]<<8)| vend[5];
-			else{ 
-				return -1;
-			}
-			break;
-
-		case 3:	/* Gateway/router field */
-			/* We are only concerned with first address */
-			if (vend[1] >0 && vend[1]%4==0)
-				gwip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
-			else 
-				return -1;
-			break;
-		case 6:	/* domain name server */
-			if(vend[1]>0 && vend[1] %4==0){
-				dns1ip=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
-				if(vend[1]>4)
-					dns2ip=(vend[6]<<24)|(vend[7]<<16)|(vend[8]<<8)|vend[9];
-			}else
-				return -1;
-			break;
-
-		case 8:	/* "Cookie server" (auth server) field */
-			/* We are only concerned with first address */
-			if (vend[1] > 0 && vend[1]%4==0)
-				auip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
-			else
-				return -1;
-			break;
-
-		case 11:	/* "Resource loc server" (file server) field */
-			/* We are only concerned with first address */
-			if (vend[1] > 0 && vend[1]%4==0)
-				fsip = (vend[2]<<24)| (vend[3]<<16)| (vend[4]<<8)| vend[5];
-			else
-				return -1;
-			break;
-		case 51:	/* ip lease time */
-			if(vend[1]==4){
-				iplease=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
-			}else
-				return -1;
-			break;
-		case 53:	/* DHCP message type */
-			if(vend[1]==1)
-				dhcpmsg=vend[2];
-			else
-				return -1;
-			break;
-		case 54:	/* server identifier */
-			if(vend[1]==4){
-				memmove(sid, vend+2, 4);
-			}else
-				return -1;
-			break;
-
-		default:	/* Everything else stops us */
-			break;
-		}
-
-		/* Skip over the field */
-		vend += vend[1] + 2;
-	}
-	if(debug)
-		print(">>>Opt[%d] [%d]\n", vend[0], vend[1]);
-	return dhcpmsg;
-}
-
-static void
-dispvend(uchar* pvend)
-{	
-	uchar *vend=pvend;
-
-	//print("<<<Magic : %2.2x%2.2x%2.2x%2.2x\n", vend[0], vend[1], vend[2], vend[3]);
-	
-	vend += 4;		/* Skip over the magic cookie */
-	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
-	//	int i;
-	  //	print("<<<Opt[%d] [%d]", vend[0], vend[1]);
-		//for(i=0; i<vend[1]; i++)
-		//	print(" %2.2x", vend[i+2]);
-		//print("\n");
-	
-		vend += vend[1] + 2;
-	}
-	//print("<<<Opt[ %2.2x] [%2.2x]\n", vend[0], vend[1]);
-}
-
-static void
-rcvbootp(void *a)
-{
-	int n, fd, dhcp;
-	Bootp *rp;
-
-	if(waserror())
-		pexit("", 0);
-	rcvprocp = up;	/* store for postnote below */
-	fd = (int)a;
-	while(done == 0) {
-		if(debug)
-			print("rcvbootp:looping\n");
-
-		n = kread(fd, rcvbuf, sizeof(rcvbuf));
-		if(n <= 0)
-			break;
-		rp = (Bootp*)rcvbuf;
-		if (memcmp(req.chaddr, rp->chaddr, 6) == 0 && rp->htype == 1 && rp->hlen == 6) {
-			ipaddr = (rp->yiaddr[0]<<24)| (rp->yiaddr[1]<<16)| (rp->yiaddr[2]<<8)| rp->yiaddr[3];
-			if(debug)
-				print("ipaddr = %2.2x %2.2x %2.2x %2.2x \n", rp->yiaddr[0], rp->yiaddr[1], rp->yiaddr[2], rp->yiaddr[3]);
-			//memmove(req.siaddr, rp->siaddr, 4);	/* siaddr */
-			dhcp = parsevend(rp->vend);
-	
-			if(dhcpmsgtype < dhcp){
-				dhcpmsgtype=dhcp;
-				recv = 1;
-				wakeup(&bootpr);
-				if(dhcp==0 || dhcp ==5 || dhcp == 6 )
-					break;
-			}
-		}
-	}
-	poperror();
-	rcvprocp = nil;
-
-	if(debug)
-		print("rcvbootp exit\n");
-	pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
-	int cfd, dfd, tries, n;
-	char ia[5+3*16], im[16], *av[3];
-	uchar nipaddr[4], ngwip[4], nipmask[4];
-	char dir[Maxpath];
-	static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
-	uchar *vend;
-
-	/*
-	 * broadcast bootp's till we get a reply,
-	 * or fixed number of tries
-	 */
-	if(debug)
-	    print("dhcp: bootp() called\n");
-	tries = 0;
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	cfd = kannounce("udp!*!68", dir);
-	if(cfd < 0)
-		return "dhcp announce failed";
-	strcat(dir, "/data");
-	if(kwrite(cfd, "headers", 7) < 0){
-		kclose(cfd);
-		return "dhcp ctl headers failed";
-	}
-	kwrite(cfd, "oldheaders", 10);
-	dfd = kopen(dir, ORDWR);
-	if(dfd < 0){
-		kclose(cfd);
-		return "dhcp open data failed";
-	}
-	kclose(cfd);
-	
-	while(tries<1){
-		tries++;
-		memset(sid, 0, 4);
-		iplease=0;
-		dhcpmsgtype=-2;
-/* DHCPDISCOVER*/
-		done = 0;
-		recv = 0;
-		kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
-		/* Prepare DHCPDISCOVER */	
-		memset(&req, 0, sizeof(req));
-		ipmove(req.raddr, IPv4bcast);
-		hnputs(req.rport, 67);
-		req.op = Bootrequest;
-		req.htype = 1;			/* ethernet (all we know) */
-		req.hlen = 6;			/* ethernet (all we know) */
-		
-		memmove(req.chaddr, ifc->mac, 6);	/* Hardware MAC address */
-		//ipv4local(ifc, req.ciaddr);				/* Fill in the local IP address if we know it */
-		memset(req.file, 0, sizeof(req.file));
-		vend=req.vend;
-		memmove(vend, vend_rfc1048, 4); vend+=4;
-		*vend++=53; *vend++=1;*vend++=1;		/* dhcp msg type==3, dhcprequest */
-		
-		*vend++=61;*vend++=7;*vend++=1;
-		memmove(vend, ifc->mac, 6);vend+=6;
-		*vend=0xff;
-
-		if(debug)
-			dispvend(req.vend); 
-		for(n=0;n<4;n++){
-			if(kwrite(dfd, &req, sizeof(req))<0)	/* SEND DHCPDISCOVER */
-				print("DHCPDISCOVER: %r");
-		
-			tsleep(&bootpr, return0, 0, 1000);	/* wait DHCPOFFER */
-			if(debug)
-				print("[DHCP] DISCOVER: msgtype = %d\n", dhcpmsgtype);
-
-			if(dhcpmsgtype==2)		/* DHCPOFFER */
-				break;
-			else if(dhcpmsgtype==0)	/* bootp */
-				return nil;
-			else if(dhcpmsgtype== -2)	/* time out */
-				continue;
-			else
-				break;
-			
-		}
-		if(dhcpmsgtype!=2)
-			continue;
-
-/* DHCPREQUEST */	
-		memset(req.vend, 0, sizeof(req.vend));
-		vend=req.vend;
-		memmove(vend, vend_rfc1048, 4);vend+=4;	
-
-		*vend++=53; *vend++=1;*vend++=3;		/* dhcp msg type==3, dhcprequest */
-
-		*vend++=50;	*vend++=4;				/* requested ip address */
-		*vend++=(ipaddr >> 24)&0xff;
-		*vend++=(ipaddr >> 16)&0xff;
-		*vend++=(ipaddr >> 8) & 0xff;
-		*vend++=ipaddr & 0xff;
-
-		*vend++=51;*vend++=4;					/* lease time */
-		*vend++=(iplease>>24)&0xff; *vend++=(iplease>>16)&0xff; *vend++=(iplease>>8)&0xff; *vend++=iplease&0xff;
-
-		*vend++=54; *vend++=4;					/* server identifier */
-		memmove(vend, sid, 4);	vend+=4;
-	
-		*vend++=61;*vend++=07;*vend++=01;		/* client identifier */
-		memmove(vend, ifc->mac, 6);vend+=6;
-		*vend=0xff;
-		if(debug) 
-			dispvend(req.vend); 
-		if(kwrite(dfd, &req, sizeof(req))<0){
-			print("DHCPREQUEST: %r");
-			continue;
-		}
-		tsleep(&bootpr, return0, 0, 2000);
-		if(dhcpmsgtype==5)		/* wait for DHCPACK */
-			break;
-		else
-			continue;
-		/* CHECK ARP */
-		/* DHCPDECLINE */
-	}
-	kclose(dfd);
-	done = 1;
-	if(rcvprocp != nil){
-		postnote(rcvprocp, 1, "timeout", 0);
-		rcvprocp = nil;
-	}
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcrem(ifc, av, 3);
-
-	hnputl(nipaddr, ipaddr);
-	sprint(ia, "%V", nipaddr);
-	hnputl(nipmask, ipmask);
-	sprint(im, "%V", nipmask);
-	av[1] = ia;
-	av[2] = im;
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	if(gwip != 0) {
-		hnputl(ngwip, gwip);
-		n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
-		routewrite(ifc->conv->p->f, nil, ia, n);
-	}
-	return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
-	int n, i;
-	char *buf;
-	uchar a[4];
-
-	if(debug)
-		print("dhcp: bootpread() \n");
-	buf = smalloc(READSTR);
-	if(waserror()){
-		free(buf);
-		nexterror();
-	}
-
-	hnputl(a, fsip);
-	n = snprint(buf, READSTR, "fsip %15V\n", a);
-	hnputl(a, auip);
-	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
-	hnputl(a, gwip);
-	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
-	hnputl(a, ipmask);
-	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
-	hnputl(a, ipaddr);
-	n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
-	n += snprint(buf+n, READSTR-n, "expired %lud\n", iplease);
-
-	n += snprint(buf + n, READSTR-n, "dns");
-	if(dns2ip){
-		hnputl(a, dns2ip);
-		n+=snprint(buf + n, READSTR-n, " %15V", a);
-	}
-	if(dns1ip){
-		hnputl(a, dns1ip);
-		n += snprint(buf + n, READSTR-n, " %15V", a);
-	}
-
-	for(i=0; i<2; i++)
-		if(ipcmp(pppdns[i], IPnoaddr) != 0 && ipcmp(pppdns[i], v4prefix) != 0)
-			n += snprint(buf + n, READSTR-n, " %15I", pppdns[i]);
-
-	snprint(buf + n, READSTR-n, "\n");
-	len = readstr(offset, bp, len, buf);
-	poperror();
-	free(buf);
-	return len;
-}
-
-char*	(*bootp)(Ipifc*) = rbootp;
-int	(*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/eipconvtest.c
+++ /dev/null
@@ -1,152 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-enum
-{
-	Isprefix= 16,
-};
-
-uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-uchar v4prefix[16] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0, 0, 0, 0
-};
-
-void
-hnputl(void *p, ulong v)
-{
-	uchar *a;
-
-	a = p;
-	a[0] = v>>24;
-	a[1] = v>>16;
-	a[2] = v>>8;
-	a[3] = v;
-}
-
-int
-eipconv(va_list *arg, Fconv *f)
-{
-	char buf[8*5];
-	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
-	static char *ifmt = "%d.%d.%d.%d";
-	uchar *p, ip[16];
-	ulong *lp;
-	ushort s;
-	int i, j, n, eln, eli;
-
-	switch(f->chr) {
-	case 'E':		/* Ethernet address */
-		p = va_arg(*arg, uchar*);
-		sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-		break;
-	case 'I':		/* Ip address */
-		p = va_arg(*arg, uchar*);
-common:
-		if(memcmp(p, v4prefix, 12) == 0)
-			sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
-		else {
-			/* find longest elision */
-			eln = eli = -1;
-			for(i = 0; i < 16; i += 2){
-				for(j = i; j < 16; j += 2)
-					if(p[j] != 0 || p[j+1] != 0)
-						break;
-				if(j > i && j - i > eln){
-					eli = i;
-					eln = j - i;
-				}
-			}
-
-			/* print with possible elision */
-			n = 0;
-			for(i = 0; i < 16; i += 2){
-				if(i == eli){
-					n += sprint(buf+n, "::");
-					i += eln;
-					if(i >= 16)
-						break;
-				} else if(i != 0)
-					n += sprint(buf+n, ":");
-				s = (p[i]<<8) + p[i+1];
-				n += sprint(buf+n, "%ux", s);
-			}
-		}
-		break;
-	case 'i':		/* v6 address as 4 longs */
-		lp = va_arg(*arg, ulong*);
-		for(i = 0; i < 4; i++)
-			hnputl(ip+4*i, *lp++);
-		p = ip;
-		goto common;
-	case 'V':		/* v4 ip address */
-		p = va_arg(*arg, uchar*);
-		sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
-		break;
-	case 'M':		/* ip mask */
-		p = va_arg(*arg, uchar*);
-
-		/* look for a prefix mask */
-		for(i = 0; i < 16; i++)
-			if(p[i] != 0xff)
-				break;
-		if(i < 16){
-			if((prefixvals[p[i]] & Isprefix) == 0)
-				goto common;
-			for(j = i+1; j < 16; j++)
-				if(p[j] != 0)
-					goto common;
-			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
-		} else
-			n = 8*16;
-
-		/* got one, use /xx format */
-		sprint(buf, "/%d", n);
-		break;
-	default:
-		strcpy(buf, "(eipconv)");
-	}
-	strconv(buf, f);
-	return sizeof(uchar*);
-}
-
-uchar testvec[11][16] =
-{
- { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
- { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
- { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
- { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
-};
-
-void
-main(void)
-{
-	int i;
-
-	fmtinstall('I', eipconv);
-	fmtinstall('M', eipconv);
-	for(i = 0; i < 11; i++)
-		print("%I\n%M\n", testvec[i], testvec[i]);
-	exits(0);
-}
--- a/os/ip/esp.c
+++ b/os/ip/esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ *	currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ *	transport mode (host-to-host)
+ */
 #include	"u.h"
 #include	"../port/lib.h"
 #include	"mem.h"
@@ -6,47 +14,79 @@
 #include	"../port/error.h"
 
 #include	"ip.h"
+#include	"ipv6.h"
+#include	<libsec.h>
 
-#include	"libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by)  ((by) * BI2BY)
 
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
 typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
 typedef struct Esptail Esptail;
 typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
 
-#define DPRINT if(0)print
+enum {
+	Encrypt,
+	Decrypt,
 
-enum
-{
-	IP_ESPPROTO	= 50,
-	EsphdrSize	= 28,	// includes IP header
-	IphdrSize	= 20,	// options have been striped
-	EsptailSize	= 2,	// does not include pad or auth data
-	UserhdrSize	= 4,	// user visable header size - if enabled
+	IP_ESPPROTO	= 50,	/* IP v4 and v6 protocol number */
+	Esp4hdrlen	= IP4HDR + 8,
+	Esp6hdrlen	= IP6HDR + 8,
+
+	Esptaillen	= 2,	/* does not include pad or auth data */
+	Userhdrlen	= 4,	/* user-visible header size - if enabled */
+
+	Desblk	 = BITS2BYTES(64),
+	Des3keysz = BITS2BYTES(192),
+
+	Aesblk	 = BITS2BYTES(128),
+	Aeskeysz = BITS2BYTES(128),
 };
 
 struct Esphdr
 {
-	/* ip header */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+	uchar	payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ *	 enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ *	orig IP hdrs | ESP hdr |
+ *			enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+	/* ipv4 header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	espproto;	/* Protocol */
 	uchar	espplen[2];	/* Header plus data length */
 	uchar	espsrc[4];	/* Ip source */
 	uchar	espdst[4];	/* Ip destination */
 
-	/* esp header */
-	uchar	espspi[4];	/* Security parameter index */
-	uchar	espseq[4];	/* Sequence number */
+	Esphdr;
 };
 
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+	IPV6HDR;
+	Esphdr;
+};
+
 struct Esptail
 {
 	uchar	pad;
@@ -53,16 +93,28 @@
 	uchar	nexthdr;
 };
 
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+	ulong	version;
+	ulong	iphdrlen;
+	ulong	hdrlen;		/* iphdrlen + esp hdr len */
+	ulong	spi;
+	uchar	laddr[IPaddrlen];
+	uchar	raddr[IPaddrlen];
+};
+
 /* header as seen by the user */
 struct Userhdr
 {
-	uchar	nexthdr;	// next protocol
+	uchar	nexthdr;	/* next protocol */
 	uchar	unused[3];
 };
 
 struct Esppriv
 {
-	ulong	in;
+	uvlong	in;
 	ulong	inerrors;
 };
 
@@ -72,77 +124,68 @@
 struct Espcb
 {
 	int	incoming;
-	int	header;		// user user level header
+	int	header;		/* user-level header */
 	ulong	spi;
-	ulong	seq;		// last seq sent
-	ulong	window;		// for replay attacks
+	ulong	seq;		/* last seq sent */
+	ulong	window;		/* for replay attacks */
+
 	char	*espalg;
-	void	*espstate;	// other state for esp
-	int	espivlen;	// in bytes
+	void	*espstate;	/* other state for esp */
+	int	espivlen;	/* in bytes */
 	int	espblklen;
 	int	(*cipher)(Espcb*, uchar *buf, int len);
+
 	char	*ahalg;
-	void	*ahstate;	// other state for esp
-	int	ahlen;		// auth data length in bytes
+	void	*ahstate;	/* other state for esp */
+	int	ahlen;		/* auth data length in bytes */
 	int	ahblklen;
 	int	(*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+	DigestState *ds;
 };
 
 struct Algorithm
 {
 	char 	*name;
-	int	keylen;		// in bits
-	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+	int	keylen;		/* in bits */
+	void	(*init)(Espcb*, char* name, uchar *key, unsigned keylen);
 };
 
-
-enum {
-	RC4forward	= 10*1024*1024,	// maximum skip forward
-	RC4back = 100*1024,		// maximum look back
-};
-
-struct Esprc4
-{
-	ulong cseq;	// current byte sequence number
-	RC4state current;
-
-	int ovalid;	// old is valid
-	ulong lgseq; // last good sequence
-	ulong oseq;	// old byte sequence number
-	RC4state old;
-};
-
 static	Conv* convlookup(Proto *esp, ulong spi);
 static	char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static	void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
 static	void espkick(void *x);
 
+static	void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static	void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
 static Algorithm espalg[] =
 {
-	"null",			0,	nullespinit,
-	"des_56_cbc",		64,	desespinit,
-	"rc4_128",		128,	rc4espinit,
-	nil,			0,	nil,
+	"null",		0,	nullespinit,
+	"des3_cbc",	192,	des3espinit,	/* new rfc2451, des-ede3 */
+	"aes_128_cbc",	128,	aescbcespinit,	/* new rfc3602 */
+	"aes_ctr",	128,	aesctrespinit,	/* new rfc3686 */
+	"des_56_cbc",	64,	desespinit,	/* rfc2405, deprecated */
+	nil,		0,	nil,
 };
 
 static Algorithm ahalg[] =
 {
-	"null",			0,	nullahinit,
-	"hmac_sha1_96",		128,	shaahinit,
-	"hmac_md5_96",		128,	md5ahinit,
-	nil,			0,	nil,
+	"null",		0,	nullahinit,
+	"hmac_sha1_96",	128,	shaahinit,	/* rfc2404 */
+	"hmac_md5_96",	128,	md5ahinit,	/* rfc2403 */
+	nil,		0,	nil,
 };
 
 static char*
 espconnect(Conv *c, char **argv, int argc)
 {
-	char *p, *pp;
-	char *e = nil;
+	char *p, *pp, *e = nil;
 	ulong spi;
 	Espcb *ecb = (Espcb*)c->ptcl;
 
@@ -157,7 +200,10 @@
 			break;
 		}
 		*p++ = 0;
-		parseip(c->raddr, argv[1]);
+		if (parseip(c->raddr, argv[1]) == -1) {
+			e = Ebadip;
+			break;
+		}
 		findlocalip(c->p->f, c->laddr, c->raddr);
 		ecb->incoming = 0;
 		ecb->seq = 0;
@@ -215,26 +261,86 @@
 	ipmove(c->raddr, IPnoaddr);
 
 	ecb = (Espcb*)c->ptcl;
-	free(ecb->espstate);
-	free(ecb->ahstate);
+	secfree(ecb->espstate);
+	secfree(ecb->ahstate);
 	memset(ecb, 0, sizeof(Espcb));
 }
 
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+	if (*bpp == nil || BLEN(*bpp) == 0) {
+		/* get enough to identify the IP version */
+		*bpp = pullupblock(*bpp, IP4HDR);
+		if(*bpp == nil) {
+			netlog(f, Logesp, "esp: short packet\n");
+			return 0;
+		}
+	}
+	return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
 static void
+getverslens(int version, Versdep *vp)
+{
+	vp->version = version;
+	switch(vp->version) {
+	case V4:
+		vp->iphdrlen = IP4HDR;
+		vp->hdrlen   = Esp4hdrlen;
+		break;
+	case V6:
+		vp->iphdrlen = IP6HDR;
+		vp->hdrlen   = Esp6hdrlen;
+		break;
+	default:
+		panic("esp: getverslens version %d wrong", version);
+	}
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+
+	switch(vp->version) {
+	case V4:
+		eh4 = (Esp4hdr*)pkt;
+		v4tov6(vp->raddr, eh4->espsrc);
+		v4tov6(vp->laddr, eh4->espdst);
+		vp->spi = nhgetl(eh4->espspi);
+		break;
+	case V6:
+		eh6 = (Esp6hdr*)pkt;
+		ipmove(vp->raddr, eh6->src);
+		ipmove(vp->laddr, eh6->dst);
+		vp->spi = nhgetl(eh6->espspi);
+		break;
+	default:
+		panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+	}
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
 espkick(void *x)
 {
+	int nexthdr, payload, pad, align;
+	uchar *auth;
+	Block *bp;
 	Conv *c = x;
-	Esphdr *eh;
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Espcb *ecb;
 	Esptail *et;
 	Userhdr *uh;
-	Espcb *ecb;
-	Block *bp;
-	int nexthdr;
-	int payload;
-	int pad;
-	int align;
-	uchar *auth;
+	Versdep vers;
 
+	getverslens(convipvers(c), &vers);
 	bp = qget(c->wq);
 	if(bp == nil)
 		return;
@@ -244,7 +350,7 @@
 
 	if(ecb->header) {
 		/* make sure the message has a User header */
-		bp = pullupblock(bp, UserhdrSize);
+		bp = pullupblock(bp, Userhdrlen);
 		if(bp == nil) {
 			qunlock(c);
 			return;
@@ -251,15 +357,16 @@
 		}
 		uh = (Userhdr*)bp->rp;
 		nexthdr = uh->nexthdr;
-		bp->rp += UserhdrSize;
+		bp->rp += Userhdrlen;
 	} else {
-		nexthdr = 0;  // what should this be?
+		nexthdr = 0;	/* what should this be? */
 	}
 
 	payload = BLEN(bp) + ecb->espivlen;
 
 	/* Make space to fit ip header */
-	bp = padblock(bp, EsphdrSize + ecb->espivlen);
+	bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+	getpktspiaddrs(bp->rp, &vers);
 
 	align = 4;
 	if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
 		align = ecb->espblklen;
 	if(align % ecb->ahblklen != 0)
 		panic("espkick: ahblklen is important after all");
-	pad = (align-1) - (payload + EsptailSize-1)%align;
+	pad = (align-1) - (payload + Esptaillen-1)%align;
 
 	/*
 	 * Make space for tail
@@ -273,70 +380,88 @@
 	 * this is done by calling padblock with a negative size
 	 * Padblock does not change bp->wp!
 	 */
-	bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
-	bp->wp += pad+EsptailSize+ecb->ahlen;
+	bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+	bp->wp += pad+Esptaillen+ecb->ahlen;
 
-	eh = (Esphdr *)(bp->rp);
-	et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
 
-	// fill in tail
+	/* fill in tail */
 	et->pad = pad;
 	et->nexthdr = nexthdr;
 
-	ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
-	auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+	/* encrypt the payload */
+	ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+	auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
 
-	// fill in head
-	eh->vihl = IP_VER4;
-	hnputl(eh->espspi, ecb->spi);
-	hnputl(eh->espseq, ++ecb->seq);
-	v6tov4(eh->espsrc, c->laddr);
-	v6tov4(eh->espdst, c->raddr);
-	eh->espproto = IP_ESPPROTO;
-	eh->frag[0] = 0;
-	eh->frag[1] = 0;
+	/* fill in head; construct a new IP header and an ESP header */
+	if (vers.version == V4) {
+		eh4 = (Esp4hdr *)bp->rp;
+		eh4->vihl = IP_VER4;
+		v6tov4(eh4->espsrc, c->laddr);
+		v6tov4(eh4->espdst, c->raddr);
+		eh4->espproto = IP_ESPPROTO;
+		eh4->frag[0] = 0;
+		eh4->frag[1] = 0;
 
-	ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+		hnputl(eh4->espspi, ecb->spi);
+		hnputl(eh4->espseq, ++ecb->seq);
+	} else {
+		eh6 = (Esp6hdr *)bp->rp;
+		eh6->vcf[0] = IP_VER6;
+		ipmove(eh6->src, c->laddr);
+		ipmove(eh6->dst, c->raddr);
+		eh6->proto = IP_ESPPROTO;
 
+		hnputl(eh6->espspi, ecb->spi);
+		hnputl(eh6->espseq, ++ecb->seq);
+	}
+
+	/* compute secure hash */
+	ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+		payload + pad + Esptaillen, auth);
+
 	qunlock(c);
-	//print("esp: pass down: %uld\n", BLEN(bp));
-	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	/* print("esp: pass down: %uld\n", BLEN(bp)); */
+	if (vers.version == V4)
+		ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	else
+		ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
 }
 
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
 void
 espiput(Proto *esp, Ipifc*, Block *bp)
 {
-	Esphdr *eh;
-	Esptail *et;
-	Userhdr *uh;
+	int payload, nexthdr;
+	uchar *auth, *espspi;
 	Conv *c;
 	Espcb *ecb;
-	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	Esptail *et;
 	Fs *f;
-	uchar *auth;
-	ulong spi;
-	int payload, nexthdr;
+	Userhdr *uh;
+	Versdep vers;
 
 	f = esp->f;
 
-	bp = pullupblock(bp, EsphdrSize+EsptailSize);
+	getverslens(pktipvers(f, &bp), &vers);
+
+	bp = pullupblock(bp, vers.hdrlen + Esptaillen);
 	if(bp == nil) {
 		netlog(f, Logesp, "esp: short packet\n");
 		return;
 	}
+	getpktspiaddrs(bp->rp, &vers);
 
-	eh = (Esphdr*)(bp->rp);
-	spi = nhgetl(eh->espspi);
-	v4tov6(raddr, eh->espsrc);
-	v4tov6(laddr, eh->espdst);
-
 	qlock(esp);
 	/* Look for a conversation structure for this port */
-	c = convlookup(esp, spi);
+	c = convlookup(esp, vers.spi);
 	if(c == nil) {
 		qunlock(esp);
-		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		icmpnoconv(f, bp);
 		freeblist(bp);
 		return;
@@ -346,76 +471,83 @@
 	qunlock(esp);
 
 	ecb = c->ptcl;
-	// too hard to do decryption/authentication on block lists
-	if(bp->next)
+	/* too hard to do decryption/authentication on block lists */
+	if(bp->next != nil)
 		bp = concatblock(bp);
 
-	if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+	if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	eh = (Esphdr*)(bp->rp);
 	auth = bp->wp - ecb->ahlen;
-	if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+	espspi = vers.version == V4?	((Esp4hdr*)bp->rp)->espspi:
+					((Esp6hdr*)bp->rp)->espspi;
+
+	/* compute secure hash and authenticate */
+	if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
 		qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
-			laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
-	if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+	payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+	if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
-			laddr, spi, payload, BLEN(bp));
+		netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+			vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
 		freeb(bp);
 		return;
 	}
-	if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+	/* decrypt payload */
+	if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
 		qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
-			laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+			vers.raddr, vers.laddr, vers.spi, up->errstr);
 		freeb(bp);
 		return;
 	}
 
-	payload -= EsptailSize;
-	et = (Esptail*)(bp->rp + EsphdrSize + payload);
+	payload -= Esptaillen;
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload);
 	payload -= et->pad + ecb->espivlen;
 	nexthdr = et->nexthdr;
 	if(payload <= 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+			vers.raddr, vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	// trim packet
-	bp->rp += EsphdrSize + ecb->espivlen;
+	/* trim packet */
+	bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
 	bp->wp = bp->rp + payload;
 	if(ecb->header) {
-		// assume UserhdrSize < EsphdrSize
-		bp->rp -= UserhdrSize;
+		/* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+		bp->rp -= Userhdrlen;
 		uh = (Userhdr*)bp->rp;
-		memset(uh, 0, UserhdrSize);
+		memset(uh, 0, Userhdrlen);
 		uh->nexthdr = nexthdr;
 	}
 
+	/* ingress filtering here? */
+
 	if(qfull(c->rq)){
-		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeblist(bp);
 	}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
-		qpass(c->rq, bp);
+//		print("esp: pass up: %uld\n", BLEN(bp));
+		qpass(c->rq, bp);	/* pass packet up the read queue */
 	}
 
 	qunlock(c);
@@ -440,19 +572,19 @@
 	return e;
 }
 
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
 void
 espadvise(Proto *esp, Block *bp, char *msg)
 {
-	Esphdr *h;
 	Conv *c;
-	ulong spi;
+	Versdep vers;
 
-	h = (Esphdr*)(bp->rp);
+	getverslens(pktipvers(esp->f, &bp), &vers);
+	getpktspiaddrs(bp->rp, &vers);
 
-	spi = nhgets(h->espspi);
 	qlock(esp);
-	c = convlookup(esp, spi);
-	if(c != nil) {
+	c = convlookup(esp, vers.spi);
+	if(c != nil && !c->ignoreadvice) {
 		qhangup(c->rq, msg);
 		qhangup(c->wq, msg);
 	}
@@ -466,7 +598,7 @@
 	Esppriv *upriv;
 
 	upriv = esp->priv;
-	return snprint(buf, len, "%lud %lud\n",
+	return snprint(buf, len, "%llud %lud\n",
 		upriv->in,
 		upriv->inerrors);
 }
@@ -520,10 +652,10 @@
 setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
 {
 	uchar *key;
-	int i, nbyte, nchar;
-	int c;
+	int c, nbyte, nchar;
+	uint i;
 
-	if(n < 2)
+	if(n < 2 || n > 3)
 		return "bad format";
 	for(; alg->name; alg++)
 		if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
 	if(alg->name == nil)
 		return "unknown algorithm";
 
-	if(n != 3)
-		return "bad format";
 	nbyte = (alg->keylen + 7) >> 3;
-	nchar = strlen(f[2]);
+	if (n == 2)
+		nchar = 0;
+	else
+		nchar = strlen(f[2]);
+	if(nchar != 2 * nbyte)			/* TODO: maybe < is ok */
+		return "key not required length";
+	/* convert hex digits from ascii, in place */
 	for(i=0; i<nchar; i++) {
 		c = f[2][i];
 		if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
 		else if(c >= 'A' && c <= 'F')
 			f[2][i] -= 'A'-10;
 		else
-			return "bad character in key";
+			return "non-hex character in key";
 	}
-	key = smalloc(nbyte);
-	for(i=0; i<nchar && i*2<nbyte; i++) {
+	/* collapse hex digits into complete bytes in reverse order in key */
+	key = secalloc(nbyte);
+	for(i = 0; i < nchar && i/2 < nbyte; i++) {
 		c = f[2][nchar-i-1];
 		if(i&1)
 			c <<= 4;
-		key[i>>1] |= c;
+		key[i/2] |= c;
 	}
-
+	memset(f[2], 0, nchar);
 	alg->init(ecb, alg->name, key, alg->keylen);
-	free(key);
+	secfree(key);
 	return nil;
 }
 
+
+/*
+ * null encryption
+ */
+
 static int
 nullcipher(Espcb*, uchar*, int)
 {
@@ -566,7 +708,7 @@
 }
 
 static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->espalg = name;
 	ecb->espblklen = 1;
@@ -581,7 +723,7 @@
 }
 
 static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
 	ecb->auth = nullauth;
 }
 
-void
+
+/*
+ * sha1
+ */
+
+static void
 seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
 	DigestState *digest;
-	uchar innerhash[SHA1dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = sha1(ipad, 64, nil, nil);
+	digest = sha1(ipad, Hmacblksz, nil, nil);
 	sha1(t, tlen, innerhash, digest);
-	digest = sha1(opad, 64, nil, nil);
+	digest = sha1(opad, Hmacblksz, nil, nil);
 	sha1(innerhash, SHA1dlen, hash, digest);
 }
 
@@ -615,11 +759,11 @@
 static int
 shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
 {
-	uchar hash[SHA1dlen];
 	int r;
+	uchar hash[SHA1dlen];
 
 	memset(hash, 0, SHA1dlen);
-	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -626,40 +770,162 @@
 }
 
 static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("shaahinit: bad keylen");
-	klen >>= 8;	// convert to bytes
+	klen /= BI2BY;
 
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = shaauth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aeskeysz], ivec[Aeskeysz];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aeskeysz);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aescbccipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aesblk], ivec[Aesblk];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aesblk);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aesctrcipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
 seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
 	DigestState *digest;
-	uchar innerhash[MD5dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = md5(ipad, 64, nil, nil);
+	digest = md5(ipad, Hmacblksz, nil, nil);
 	md5(t, tlen, innerhash, digest);
-	digest = md5(opad, 64, nil, nil);
+	digest = md5(opad, Hmacblksz, nil, nil);
 	md5(innerhash, MD5dlen, hash, digest);
 }
 
@@ -670,7 +936,7 @@
 	int r;
 
 	memset(hash, 0, MD5dlen);
-	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -677,168 +943,102 @@
 }
 
 static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("md5ahinit: bad keylen");
-	klen >>= 3;	// convert to bytes
-
-
+	klen = BITS2BYTES(klen);
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = md5auth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
+
+/*
+ * des, single and triple
+ */
+
 static int
 descipher(Espcb *ecb, uchar *p, int n)
 {
-	uchar tmp[8];
-	uchar *pp, *tp, *ip, *eip, *ep;
 	DESstate *ds = ecb->espstate;
 
-	ep = p + n;
 	if(ecb->incoming) {
-		memmove(ds->ivec, p, 8);
-		p += 8;
-		while(p < ep){
-			memmove(tmp, p, 8);
-			block_cipher(ds->expanded, p, 1);
-			tp = tmp;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; ){
-				*p++ ^= *ip;
-				*ip++ = *tp++;
-			}
-		}
+		memmove(ds->ivec, p, Desblk);
+		desCBCdecrypt(p + Desblk, n - Desblk, ds);
 	} else {
-		memmove(p, ds->ivec, 8);
-		for(p += 8; p < ep; p += 8){
-			pp = p;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; )
-				*pp++ ^= *ip++;
-			block_cipher(ds->expanded, p, 0);
-			memmove(ds->ivec, p, 8);
-		}
+		memmove(p, ds->ivec, Desblk);
+		desCBCencrypt(p + Desblk, n - Desblk, ds);
 	}
 	return 1;
 }
-	
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+	DES3state *ds = ecb->espstate;
+
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, Desblk);
+		des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+	} else {
+		memmove(p, ds->ivec, Desblk);
+		des3CBCencrypt(p + Desblk, n - Desblk, ds);
+	}
+	return 1;
+}
+
 static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	uchar key[8];
-	uchar ivec[8];
-	int i;
-	
-	// bits to bytes
-	n = (n+7)>>3;
-	if(n > 8)
-		n = 8;
+	uchar key[Desblk], ivec[Desblk];
+
+	n = BITS2BYTES(n);
+	if(n > Desblk)
+		n = Desblk;
 	memset(key, 0, sizeof(key));
 	memmove(key, k, n);
-	for(i=0; i<8; i++)
-		ivec[i] = nrand(256);
+	prng(ivec, Desblk);
 	ecb->espalg = name;
-	ecb->espblklen = 8;
-	ecb->espivlen = 8;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
+
 	ecb->cipher = descipher;
-	ecb->espstate = smalloc(sizeof(DESstate));
+	ecb->espstate = secalloc(sizeof(DESstate));
 	setupDESstate(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	Esprc4 *esprc4;
-	RC4state tmpstate;
-	ulong seq;
-	long d, dd;
+	uchar key[3][Desblk], ivec[Desblk];
 
-	if(n < 4)
-		return 0;
+	n = BITS2BYTES(n);
+	if(n > Des3keysz)
+		n = Des3keysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Desblk);
+	ecb->espalg = name;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
 
-	esprc4 = ecb->espstate;
-	if(ecb->incoming) {
-		seq = nhgetl(p);
-		p += 4;
-		n -= 4;
-		d = seq-esprc4->cseq;
-		if(d == 0) {
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq += n;
-			if(esprc4->ovalid) {
-				dd = esprc4->cseq - esprc4->lgseq;
-				if(dd > RC4back)
-					esprc4->ovalid = 0;
-			}
-		} else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
-			// this link is hosed
-			if(d > RC4forward) {
-				strcpy(up->errstr, "rc4cipher: skipped too much");
-				return 0;
-			}
-			esprc4->lgseq = seq;
-			if(!esprc4->ovalid) {
-				esprc4->ovalid = 1;
-				esprc4->oseq = esprc4->cseq;
-				memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
-			}
-			rc4skip(&esprc4->current, d);
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq = seq+n;
-		} else {
-print("reordered packet: %uld %ld\n", seq, d);
-			dd = seq - esprc4->oseq;
-			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
-				strcpy(up->errstr, "rc4cipher: too far back");
-				return 0;
-			}
-			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
-			rc4skip(&tmpstate, dd);
-			rc4(&tmpstate, p, n);
-			return 1;
-		}
-
-		// move old state up
-		if(esprc4->ovalid) {
-			dd = esprc4->cseq - RC4back - esprc4->oseq;
-			if(dd > 0) {
-				rc4skip(&esprc4->old, dd);
-				esprc4->oseq += dd;
-			}
-		}
-	} else {
-		hnputl(p, esprc4->cseq);
-		p += 4;
-		n -= 4;
-		rc4(&esprc4->current, p, n);
-		esprc4->cseq += n;
-	}
-	return 1;
+	ecb->cipher = des3cipher;
+	ecb->espstate = secalloc(sizeof(DES3state));
+	setupDES3state(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{	
-	Esprc4 *esprc4;
 
-	// bits to bytes
-	n = (n+7)>>3;
-	esprc4 = smalloc(sizeof(Esprc4));
-	memset(esprc4, 0, sizeof(Esprc4));
-	setupRC4state(&esprc4->current, k, n);
-	ecb->espalg = name;
-	ecb->espblklen = 4;
-	ecb->espivlen = 4;
-	ecb->cipher = rc4cipher;
-	ecb->espstate = esprc4;
-}
-	
+/*
+ * interfacing to devip
+ */
 void
 espinit(Fs *fs)
 {
--- a/os/ip/ethermedium.c
+++ b/os/ip/ethermedium.c
@@ -5,9 +5,9 @@
 #include "fns.h"
 #include "../port/error.h"
 
+#include "../port/netif.h"
 #include "ip.h"
 #include "ipv6.h"
-#include "kernel.h"
 
 typedef struct Etherhdr Etherhdr;
 struct Etherhdr
@@ -18,10 +18,10 @@
 };
 
 static uchar ipbroadcast[IPaddrlen] = {
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
 	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
 };
 
 static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
 static void	etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
 static void	etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
 static void	etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void	etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 static Block*	multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
 static void	sendarp(Ipifc *ifc, Arpent *a);
-static void	sendgarp(Ipifc *ifc, uchar*);
+static void	sendndp(Ipifc *ifc, Arpent *a);
 static int	multicastea(uchar *ea, uchar *ip);
 static void	recvarpproc(void*);
-static void	resolveaddr6(Ipifc *ifc, Arpent *a);
 static void	etherpref2addr(uchar *pref, uchar *ea);
 
 Medium ethermedium =
@@ -53,8 +53,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -70,8 +69,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -94,9 +92,6 @@
  */
 enum
 {
-	ETARP		= 0x0806,
-	ETIP4		= 0x0800,
-	ETIP6		= 0x86DD,
 	ARPREQUEST	= 1,
 	ARPREPLY	= 2,
 };
@@ -127,128 +122,92 @@
 static void
 etherbind(Ipifc *ifc, int argc, char **argv)
 {
-	Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
-	char addr[Maxpath];	//char addr[2*KNAMELEN];
-	char dir[Maxpath];	//char dir[2*KNAMELEN];
-	char *buf;
-	int fd, cfd, n;
-	char *ptr;
+	char addr[Maxpath], dir[Maxpath];
 	Etherrock *er;
+	Chan *c;
+	int n;
 
 	if(argc < 2)
 		error(Ebadarg);
 
-	mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
-	buf = nil;
+	/*
+	 *  get mac address
+	 */
+	snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+	c = namec(addr, Aopen, OREAD, 0);
 	if(waserror()){
-		if(mchan4 != nil)
-			cclose(mchan4);
-		if(cchan4 != nil)
-			cclose(cchan4);
-		if(achan != nil)
-			cclose(achan);
-		if(mchan6 != nil)
-			cclose(mchan6);
-		if(cchan6 != nil)
-			cclose(cchan6);
-		if(buf != nil)
-			free(buf);
-		nexterror(); 
+		cclose(c);
+		nexterror();
 	}
+	n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+	if(n < 0)
+		error(Eio);
+	addr[n] = 0;
+	if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+		error("could not find mac address");
+	cclose(c);
+	poperror();
 
+	er = smalloc(sizeof(*er));
+	er->read4p = er->read6p = er->arpp = (void*)-1;
+	er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+	er->f = ifc->conv->p->f;
+
+	if(waserror()){
+		if(er->mchan4 != nil)
+			cclose(er->mchan4);
+		if(er->cchan4 != nil)
+			cclose(er->cchan4);
+		if(er->mchan6 != nil)
+			cclose(er->mchan6);
+		if(er->cchan6 != nil)
+			cclose(er->cchan6);
+		if(er->achan != nil)
+			cclose(er->achan);
+		free(er);
+		nexterror();
+	}
+
 	/*
-	 *  open ip converstation
+	 *  open ipv4 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x800 failed: %s", up->env->errstr);
-	mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);	/* ETIP4 */
+	er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
 
 	/*
-	 *  get mac address and speed
-	 */
-	snprint(addr, sizeof(addr), "%s/stats", dir);
-	fd = kopen(addr, OREAD);
-	if(fd < 0)
-		errorf("can't open ether stats: %s", up->env->errstr);
-
-	buf = smalloc(512);
-	n = kread(fd, buf, 511);
-	kclose(fd);
-	if(n <= 0)
-		error(Eio);
-	buf[n] = 0;
-
-	ptr = strstr(buf, "addr: ");
-	if(!ptr)
-		error(Eio);
-	ptr += 6;
-	parsemac(ifc->mac, ptr, 6);
-
-	ptr = strstr(buf, "mbps: ");
-	if(ptr){
-		ptr += 6;
-		ifc->mbps = atoi(ptr);
-	} else
-		ifc->mbps = 100;
-
-	/*
- 	 *  open arp conversation
-	 */
-	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
-	fd = kdial(addr, nil, nil, nil);
-	if(fd < 0)
-		errorf("dial 0x806 failed: %s", up->env->errstr);
-	achan = commonfdtochan(fd, ORDWR, 0, 1);
-	kclose(fd);
-
-	/*
-	 *  open ip conversation
+	 *  open ipv6 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x86DD failed: %s", up->env->errstr);
-	mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);	/* ETIP6 */
+	er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
 
-	er = smalloc(sizeof(*er));
-	er->mchan4 = mchan4;
-	er->cchan4 = cchan4;
-	er->achan = achan;
-	er->mchan6 = mchan6;
-	er->cchan6 = cchan6;
-	er->f = ifc->conv->p->f;
-	ifc->arg = er;
-
-	free(buf);
+	/*
+ 	 *  open arp conversation
+	 */
+	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);	/* ETARP */
+	er->achan = chandial(addr, nil, nil, nil);
 	poperror();
 
-	kproc("etherread4", etherread4, ifc, 0);
-	kproc("recvarpproc", recvarpproc, ifc, 0);
-	kproc("etherread6", etherread6, ifc, 0);
+	ifc->arg = er;
+
+	kproc("etherread4", etherread4, ifc);
+	kproc("etherread6", etherread6, ifc);
+	kproc("recvarpproc", recvarpproc, ifc);
 }
 
 /*
@@ -259,21 +218,35 @@
 {
 	Etherrock *er = ifc->arg;
 
-	if(er->read4p)
+	while(waserror())
+		;
+
+	/* wait for readers to start */
+	while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->read4p != nil)
 		postnote(er->read4p, 1, "unbind", 0);
-	if(er->read6p)
+	if(er->read6p != nil)
 		postnote(er->read6p, 1, "unbind", 0);
-	if(er->arpp)
+	if(er->arpp != nil)
 		postnote(er->arpp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for readers to die */
-	while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+	while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan4 != nil)
 		cclose(er->mchan4);
-	if(er->achan != nil)
-		cclose(er->achan);
 	if(er->cchan4 != nil)
 		cclose(er->cchan4);
 	if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
 		cclose(er->mchan6);
 	if(er->cchan6 != nil)
 		cclose(er->cchan6);
+	if(er->achan != nil)
+		cclose(er->achan);
 
 	free(er);
 }
@@ -297,16 +272,16 @@
 
 	/* get mac address of destination */
 	a = arpget(er->f->arp, bp, version, ifc, ip, mac);
-	if(a){
+	if(a != nil){
 		/* check for broadcast or multicast */
 		bp = multicastarp(er->f, a, ifc->m, mac);
-		if(bp==nil){
+		if(bp == nil){
 			switch(version){
 			case V4:
 				sendarp(ifc, a);
 				break;
-			case V6: 
-				resolveaddr6(ifc, a);
+			case V6:
+				sendndp(ifc, a);
 				break;
 			default:
 				panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
 
 	/* make it a single block with space for the ether header */
 	bp = padblock(bp, ifc->m->hsize);
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 	eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read4p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read4p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput4(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read4p = nil;
+	pexit("hangup", 1);
 }
 
 
@@ -397,29 +369,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read6p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read6p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput6(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read6p = nil;
+	pexit("hangup", 1);
 }
 
 static void
@@ -477,6 +448,7 @@
 	Block *bp;
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
+	uchar targ[IPv4addrlen], src[IPv4addrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
 		return;
 	}
 
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+
 	/* remove all but the last message */
 	while((bp = a->hold) != nil){
 		if(bp == a->last)
@@ -492,18 +467,20 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
+	memmove(targ, a->ip+IPv4off, IPv4addrlen);
 	arprelease(er->f->arp, a);
 
+	if(!ipv4local(ifc, src, 0, targ))
+		return;
+
 	n = sizeof(Etherarp);
-	if(n < a->type->mintu)
-		n = a->type->mintu;
+	if(n < ifc->m->mintu)
+		n = ifc->m->mintu;
 	bp = allocb(n);
 	memset(bp->rp, 0, n);
 	e = (Etherarp*)bp->rp;
-	memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
-	ipv4local(ifc, e->spa);
+	memmove(e->tpa, targ, sizeof(e->tpa));
+	memmove(e->spa, src, sizeof(e->spa));
 	memmove(e->sha, ifc->mac, sizeof(e->sha));
 	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
 	memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("arp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
 {
-	int sflag;
 	Block *bp;
 	Etherrock *er = ifc->arg;
-	uchar ipsrc[IPaddrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
-	a->rtime = NOW + ReTransTimer;
-	if(a->rxtsrem <= 0) {
-		arprelease(er->f->arp, a);
-		return;
-	}
-
-	a->rxtsrem--;
-	arprelease(er->f->arp, a);
-
-	if(sflag = ipv6anylocal(ifc, ipsrc)) 
-		icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+	ndpsendsol(er->f, ifc, a);	/* unlocks arp */
 }
 
 /*
@@ -569,10 +530,6 @@
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
 
-	/* don't arp for our initial non address */
-	if(ipcmp(ip, IPnoaddr) == 0)
-		return;
-
 	n = sizeof(Etherarp);
 	if(n < ifc->m->mintu)
 		n = ifc->m->mintu;
@@ -593,15 +550,13 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("garp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
 recvarp(Ipifc *ifc)
 {
-	int n;
+	int n, forme;
 	Block *ebp, *rbp;
 	Etherarp *e, *r;
 	uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
 	Etherrock *er = ifc->arg;
 
 	ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
-	if(ebp == nil) {
-		print("arp: rcv: %r\n");
+	if(ebp == nil)
 		return;
-	}
 
+	rlock(ifc);
+
 	e = (Etherarp*)ebp->rp;
 	switch(nhgets(e->op)) {
 	default:
@@ -620,9 +575,13 @@
 		break;
 
 	case ARPREPLY:
+		/* make sure not to enter multi/broadcat address */
+		if(e->sha[0] & 1)
+			break;
+
 		/* check for machine using my ip address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
 				print("arprep: 0x%E/0x%E also has ip addr %V\n",
 					e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
 			}
 		}
 
-		/* make sure we're not entering broadcast addresses */
-		if(ipcmp(ip, ipbroadcast) == 0 ||
-			!memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
-			print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
-				e->s, e->sha, e->spa);
-			break;
-		}
-
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+		/* refresh what we know about sender */
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
 		break;
 
 	case ARPREQUEST:
+		/* don't reply to multi/broadcat addresses */
+		if(e->sha[0] & 1)
+			break;
+
 		/* don't answer arps till we know who we are */
-		if(ifc->lifc == 0)
+		if(ifc->lifc == nil)
 			break;
 
 		/* check for machine using my ip or ether address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
-				if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+				if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
 					/* print only once */
-					print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+					print("arpreq: 0x%E also has ip addr %V\n",
+						e->sha, e->spa);
 					memmove(eprinted, e->spa, sizeof(e->spa));
 				}
+				break;
 			}
 		} else {
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
-				print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+				print("arpreq: %V also has ether addr %E\n",
+					e->spa, e->sha);
 				break;
 			}
 		}
 
-		/* refresh what we know about sender */
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
-		/* answer only requests for our address or systems we're proxying for */
+		/*
+		 * when request is for our address or systems we're proxying for,
+		 * enter senders address into arp table and reply, otherwise just
+		 * refresh the senders address.
+		 */
 		v4tov6(ip, e->tpa);
-		if(!iplocalonifc(ifc, ip))
-		if(!ipproxyifc(er->f, ifc, ip))
+		forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+		if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
 			break;
 
 		n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
 		memmove(r->s, ifc->mac, sizeof(r->s));
 		rbp->wp += n;
 
-		n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
-		if(n < 0)
-			print("arp: write: %r\n");
+		runlock(ifc);
+		freeb(ebp);
+
+		devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+		return;
 	}
+
+	runlock(ifc);
 	freeb(ebp);
 }
 
@@ -707,7 +671,7 @@
 
 	er->arpp = up;
 	if(waserror()){
-		er->arpp = 0;
+		er->arpp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;)
@@ -749,14 +713,9 @@
 multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
 {
 	/* is it broadcast? */
-	switch(ipforme(f, a->ip)){
-	case Runi:
-		return nil;
-	case Rbcast:
-		memset(mac, 0xff, 6);
+	if(ipforme(f, a->ip) == Rbcast){
+		memset(mac, 0xff, medium->maclen);
 		return arpresolve(f->arp, a, medium, mac);
-	default:
-		break;
 	}
 
 	/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
 }
 
 
-static void 
+static void
 etherpref2addr(uchar *pref, uchar *ea)
 {
-	pref[8]  = ea[0] | 0x2;
-	pref[9]  = ea[1];
+	pref[8] = ea[0] ^ 0x2;
+	pref[9] = ea[1];
 	pref[10] = ea[2];
 	pref[11] = 0xFF;
 	pref[12] = 0xFE;
@@ -789,4 +748,41 @@
 	pref[13] = ea[3];
 	pref[14] = ea[4];
 	pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+	static char tdad[] = "dad6";
+	uchar a[IPaddrlen];
+
+	if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+		return;
+
+	if(isv4(ip)){
+		sendgarp(ifc, ip);
+		return;
+	}
+
+	if((lifc->type&Rv4) != 0)
+		return;
+
+	if(!lifc->tentative){
+		icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+		return;
+	}
+
+	if(ipcmp(lifc->local, ip) != 0)
+		return;
+
+	/* temporarily add route for duplicate address detection */
+	ipv62smcast(a, ip);
+	addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+	if(waserror()){
+		remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+		nexterror();
+	}
+	icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+	remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
 }
--- a/os/ip/gre.c
+++ b/os/ip/gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,10 +10,7 @@
 
 #include "ip.h"
 
-#define DPRINT if(0)print
-
-enum
-{
+enum {
 	GRE_IPONLY	= 12,		/* size of ip header */
 	GRE_IPPLUSGRE	= 12,		/* minimum size of GRE header */
 	IP_GREPROTO	= 47,
@@ -18,10 +18,33 @@
 	GRErxms		= 200,
 	GREtickms	= 100,
 	GREmaxxmit	= 10,
+
+	K		= 1024,
+	GREqlen		= 256 * K,
+
+	GRE_cksum	= 0x8000,
+	GRE_routing	= 0x4000,
+	GRE_key		= 0x2000,
+	GRE_seq		= 0x1000,
+
+	Nring		= 1 << 10,	/* power of two, please */
+	Ringmask	= Nring - 1,
+
+	GREctlraw	= 0,
+	GREctlcooked,
+	GREctlretunnel,
+	GREctlreport,
+	GREctldlsuspend,
+	GREctlulsuspend,
+	GREctldlresume,
+	GREctlulresume,
+	GREctlforward,
+	GREctlulkey,
+	Ncmds,
 };
 
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
 	/* ip header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
@@ -28,7 +51,7 @@
 	uchar	len[2];		/* packet length (including headers) */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	ttl;
 	uchar	proto;		/* Protocol */
 	uchar	cksum[2];	/* checksum */
 	uchar	src[4];		/* Ip source */
@@ -37,21 +60,115 @@
 	/* gre header */
 	uchar	flags[2];
 	uchar	eproto[2];	/* encapsulation protocol */
-} GREhdr;
+};
 
 typedef struct GREpriv GREpriv;
-struct GREpriv
-{
-	int		raw;			/* Raw GRE mode */
-
+struct GREpriv{
 	/* non-MIB stats */
-	ulong		csumerr;		/* checksum errors */
-	ulong		lenerr;			/* short packet */
+	uvlong	lenerr;			/* short packet */
 };
 
+typedef struct Bring	Bring;
+struct Bring{
+	Block	*ring[Nring];
+	long	produced;
+	long	consumed;
+};
+
+typedef struct GREconv	GREconv;
+struct GREconv{
+	int	raw;
+
+	/* Retunnelling information.  v4 only */
+	uchar	north[4];			/* HA */
+	uchar	south[4];			/* Base station */
+	uchar	hoa[4];				/* Home address */
+	uchar	coa[4];				/* Careof address */
+	ulong	seq;				/* Current sequence # */
+	int	dlsusp;				/* Downlink suspended? */
+	int	ulsusp;				/* Uplink suspended? */
+	ulong	ulkey;				/* GRE key */
+
+	QLock	lock;				/* Lock for rings */
+	Bring	dlpending;			/* Ring of pending packets */
+	Bring	dlbuffered;			/* Received while suspended */
+	Bring	ulbuffered;			/* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+	uchar	*rp;
+	ulong	seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+	char	*cmd;
+	int	argc;
+	char	*(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw]	=	{	"raw",		1,	grectlraw,	},
+[GREctlcooked]	=	{	"cooked",	1,	grectlcooked,	},
+[GREctlretunnel]=	{	"retunnel",	5,	grectlretunnel,	},
+[GREctlreport]	=	{	"report",	2,	grectlreport,	},
+[GREctldlsuspend]=	{	"dlsuspend",	1,	grectldlsuspend,},
+[GREctlulsuspend]=	{	"ulsuspend",	1,	grectlulsuspend,},
+[GREctldlresume]=	{	"dlresume",	1,	grectldlresume,	},
+[GREctlulresume]=	{	"ulresume",	1,	grectlulresume,	},
+[GREctlforward]	=	{	"forward",	2,	grectlforward,	},
+[GREctlulkey]	=	{	"ulkey",	2,	grectlulkey,	},
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
 static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
 
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+	Block *bp;
+
+	if(r->consumed == r->produced)
+		return nil;
+
+	bp = r->ring[r->consumed & Ringmask];
+	r->ring[r->consumed & Ringmask] = nil;
+	r->consumed++;
+	return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+	Block *tbp;
+
+	if(r->produced - r->consumed > Ringmask){
+		/* Full! */
+		tbp = r->ring[r->produced & Ringmask];
+		assert(tbp);
+		freeb(tbp);
+		r->consumed++;
+	}
+	r->ring[r->produced & Ringmask] = bp;
+	r->produced++;
+}
+
+static char *
 greconnect(Conv *c, char **argv, int argc)
 {
 	Proto *p;
@@ -91,7 +208,7 @@
 static void
 grecreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->rq = qopen(GREqlen, Qmsg, 0, c);
 	c->wq = qbypass(grekick, c);
 }
 
@@ -98,44 +215,88 @@
 static int
 grestate(Conv *c, char *state, int n)
 {
-	USED(c);
-	return snprint(state, n, "%s", "Datagram");
+	GREconv *grec;
+	char *ep, *p;
+
+	grec = c->ptcl;
+	p    = state;
+	ep   = p + n;
+	p    = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+	 "pending %uld  %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+			c->inuse? "Open ": "Closed ",
+			grec->raw? "raw ": "",
+			grec->dlsusp? "DL suspended ": "",
+			grec->ulsusp? "UL suspended ": "",
+			grec->hoa, grec->north, grec->south, grec->seq,
+			grec->dlpending.consumed, grec->dlpending.produced,
+			grec->dlbuffered.consumed, grec->dlbuffered.produced,
+			grec->ulbuffered.consumed, grec->ulbuffered.produced,
+			grec->ulkey);
+	return p - state;
 }
 
 static char*
 greannounce(Conv*, char**, int)
 {
-	return "pktifc does not support announce";
+	return "gre does not support announce";
 }
 
 static void
 greclose(Conv *c)
 {
-	qclose(c->rq);
-	qclose(c->wq);
-	qclose(c->eq);
+	GREconv *grec;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	/* Make sure we don't forward any more packets */
+	memset(grec->hoa, 0, sizeof grec->hoa);
+	memset(grec->north, 0, sizeof grec->north);
+	memset(grec->south, 0, sizeof grec->south);
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->dlpending)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->dlbuffered)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->ulbuffered)) != nil)
+		freeb(bp);
+
+	grec->dlpending.produced = grec->dlpending.consumed = 0;
+	grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+	grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+	qunlock(&grec->lock);
+
+	grec->raw = 0;
+	grec->seq = 0;
+	grec->dlsusp = grec->ulsusp = 1;
+
+	qhangup(c->rq, sessend);
+	qhangup(c->wq, sessend);
+	qhangup(c->eq, sessend);
 	ipmove(c->laddr, IPnoaddr);
 	ipmove(c->raddr, IPnoaddr);
-	c->lport = 0;
-	c->rport = 0;
+	c->lport = c->rport = 0;
 }
 
-int drop;
-
 static void
 grekick(void *x, Block *bp)
 {
-	Conv *c = x;
-	GREhdr *ghp;
+	Conv *c;
+	GREconv *grec;
+	GREhdr *gre;
 	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 
 	if(bp == nil)
 		return;
 
+	c    = x;
+	grec = c->ptcl;
+
 	/* Make space to fit ip header (gre header already there) */
 	bp = padblock(bp, GRE_IPONLY);
-	if(bp == nil)
-		return;
 
 	/* make sure the message has a GRE header */
 	bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
 	if(bp == nil)
 		return;
 
-	ghp = (GREhdr *)(bp->rp);
-	ghp->vihl = IP_VER4;
+	gre = (GREhdr *)bp->rp;
+	gre->vihl = IP_VER4;
 
-	if(!((GREpriv*)c->p->priv)->raw){
-		v4tov6(raddr, ghp->dst);
+	if(grec->raw == 0){
+		v4tov6(raddr, gre->dst);
 		if(ipcmp(raddr, v4prefix) == 0)
-			memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
-		v4tov6(laddr, ghp->src);
+			memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+		v4tov6(laddr, gre->src);
 		if(ipcmp(laddr, v4prefix) == 0){
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
-				findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
-			memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+				/* pick interface closest to dest */
+				findlocalip(c->p->f, c->laddr, raddr);
+			memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
 		}
-		hnputs(ghp->eproto, c->rport);
+		hnputs(gre->eproto, c->rport);
 	}
 
-	ghp->proto = IP_GREPROTO;
-	ghp->frag[0] = 0;
-	ghp->frag[1] = 0;
+	gre->proto = IP_GREPROTO;
+	gre->frag[0] = gre->frag[1] = 0;
 
+	grepdout++;
+	grebdout += BLEN(bp);
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
 static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
 {
-	int len;
-	GREhdr *ghp;
-	Conv *c, **p;
-	ushort eproto;
+	Metablock *m;
+	GREconv *grec;
+	GREhdr *gre;
+	int hdrlen, suspended, extra;
+	ushort flags;
+	ulong seq;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1){
+		freeb(bp);
+		return;
+	}
+
+	/*
+	 * We've received a packet with a GRE header and we need to
+	 * re-adjust the packet header to strip all unwanted parts
+	 * but leave room for only a sequence number.
+	 */
+	grec   = c->ptcl;
+	flags  = nhgets(gre->flags);
+	hdrlen = 0;
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%V routing info present.  Discarding packet", gre->src);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	/*
+	 * The outgoing packet only has the sequence number set.  Make room
+	 * for the sequence number.
+	 */
+	if(hdrlen != sizeof(ulong)){
+		extra = hdrlen - sizeof(ulong);
+		if(extra < 0 && bp->rp - bp->base < -extra){
+			print("gredownlink: cannot add sequence number\n");
+			freeb(bp);
+			return;
+		}
+		memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+		bp->rp += extra;
+		assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+		gre = (GREhdr *)bp->rp;
+	}
+	seq = grec->seq++;
+	hnputs(gre->flags, GRE_seq);
+	hnputl(bp->rp + sizeof(GREhdr), seq);
+
+	/*
+	 * Keep rp and seq at the base.  ipoput4 consumes rp for
+	 * refragmentation.
+	 */
+	assert(bp->rp - bp->base >= sizeof(Metablock));
+	m = (Metablock *)bp->base;
+	m->rp  = bp->rp;
+	m->seq = seq;
+
+	/*
+	 * Here we make a decision what we're doing with the packet.  We're
+	 * doing this w/o holding a lock which means that later on in the
+	 * process we may discover we've done the wrong thing.  I don't want
+	 * to call ipoput with the lock held.
+	 */
+restart:
+	suspended = grec->dlsusp;
+	if(suspended){
+		if(!canqlock(&grec->lock)){
+			/*
+			 * just give up.  too bad, we lose a packet.  this
+			 * is just too hard and my brain already hurts.
+			 */
+			freeb(bp);
+			return;
+		}
+
+		if(!grec->dlsusp){
+			/*
+			 * suspend race.  We though we were suspended, but
+			 * we really weren't.
+			 */
+			qunlock(&grec->lock);
+			goto restart;
+		}
+
+		/* Undo the incorrect ref count addition */
+		addring(&grec->dlbuffered, bp);
+		qunlock(&grec->lock);
+		return;
+	}
+
+	/*
+	 * When we get here, we're not suspended.  Proceed to send the
+	 * packet.
+	 */
+	memmove(gre->src, grec->coa, sizeof gre->dst);
+	memmove(gre->dst, grec->south, sizeof gre->dst);
+
+	ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+	grepdout++;
+	grebdout += BLEN(bp);
+
+	/*
+	 * Now make sure we didn't do the wrong thing.
+	 */
+	if(!canqlock(&grec->lock)){
+		freeb(bp);		/* The packet just goes away */
+		return;
+	}
+
+	/* We did the right thing */
+	addring(&grec->dlpending, bp);
+	qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	ushort flags;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1)
+		return;
+
+	grec = c->ptcl;
+	memmove(gre->src, grec->coa, sizeof gre->src);
+	memmove(gre->dst, grec->north, sizeof gre->dst);
+
+	/*
+	 * Add a key, if needed.
+	 */
+	if(grec->ulkey){
+		flags = nhgets(gre->flags);
+		if(flags & (GRE_cksum|GRE_routing)){
+			print("%V routing info present.  Discarding packet\n",
+				gre->src);
+			freeb(bp);
+			return;
+		}
+
+		if((flags & GRE_key) == 0){
+			/* Make room for the key */
+			if(bp->rp - bp->base < sizeof(ulong)){
+				print("%V can't add key\n", gre->src);
+				freeb(bp);
+				return;
+			}
+
+			bp->rp -= 4;
+			memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+			gre = (GREhdr *)bp->rp;
+			hnputs(gre->flags, flags | GRE_key);
+		}
+
+		/* Add the key */
+		hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+	}
+
+	if(!canqlock(&grec->lock)){
+		freeb(bp);
+		return;
+	}
+
+	if(grec->ulsusp)
+		addring(&grec->ulbuffered, bp);
+	else{
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		grepuout++;
+		grebuout += BLEN(bp);
+	}
+	qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+	int len, hdrlen;
+	ushort eproto, flags;
 	uchar raddr[IPaddrlen];
+	Conv *c, **p;
+	GREconv *grec;
+	GREhdr *gre;
 	GREpriv *gpriv;
+	Ip4hdr *ip;
 
-	gpriv = gre->priv;
-	ghp = (GREhdr*)(bp->rp);
+	/*
+	 * We don't want to deal with block lists.  Ever.  The problem is
+	 * that when the block is forwarded, devether.c puts the block into
+	 * a queue that also uses ->next.  Just do not use ->next here!
+	 */
+	if(bp->next != nil)
+		bp = pullupblock(bp, blocklen(bp));
 
-	v4tov6(raddr, ghp->src);
-	eproto = nhgets(ghp->eproto);
-	qlock(gre);
+	gre = (GREhdr *)bp->rp;
+	if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+		freeb(bp);
+		return;
+	}
 
-	/* Look for a conversation structure for this port and address */
-	c = nil;
-	for(p = gre->conv; *p; p++) {
+	v4tov6(raddr, gre->src);
+	eproto = nhgets(gre->eproto);
+	flags  = nhgets(gre->flags);
+	hdrlen = sizeof(GREhdr);
+
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%I routing info present.  Discarding packet\n", raddr);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	qlock(proto);
+
+	if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+		ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+		/*
+		 * Look for a conversation structure for this port and address, or
+		 * match the retunnel part, or match on the raw flag.
+		 */
+		for(p = proto->conv; *p; p++) {
+			c = *p;
+
+			if(c->inuse == 0)
+				continue;
+
+			/*
+			 * Do not stop this session - blocking here
+			 * implies that etherread is blocked.
+			 */
+			grec = c->ptcl;
+			if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+				grepdin++;
+				grebdin += BLEN(bp);
+				gredownlink(c, bp);
+				qunlock(proto);
+				return;
+			}
+
+			if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+				grepuin++;
+				grebuin += BLEN(bp);
+				greuplink(c, bp);
+				qunlock(proto);
+				return;
+			}
+		}
+	}
+
+
+	/*
+	 * when we get here, none of the forwarding tunnels matched.  now
+	 * try to match on raw and conversational sessions.
+	 */
+	for(c = nil, p = proto->conv; *p; p++) {
 		c = *p;
+
 		if(c->inuse == 0)
 			continue;
-		if(c->rport == eproto && 
-			(gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+		/*
+		 * Do not stop this session - blocking here
+		 * implies that etherread is blocked.
+		 */
+		grec = c->ptcl;
+		if(c->rport == eproto &&
+		    (grec->raw || ipcmp(c->raddr, raddr) == 0))
 			break;
 	}
 
-	if(*p == nil) {
-		qunlock(gre);
-		freeblist(bp);
+	qunlock(proto);
+
+	if(*p == nil){
+		freeb(bp);
 		return;
 	}
 
-	qunlock(gre);
-
 	/*
 	 * Trim the packet down to data size
 	 */
-	len = nhgets(ghp->len) - GRE_IPONLY;
+	len = nhgets(gre->len) - GRE_IPONLY;
 	if(len < GRE_IPPLUSGRE){
-		freeblist(bp);
+		freeb(bp);
 		return;
 	}
+
 	bp = trimblock(bp, GRE_IPONLY, len);
 	if(bp == nil){
+		gpriv = proto->priv;
 		gpriv->lenerr++;
 		return;
 	}
 
-	/*
-	 *  Can't delimit packet so pull it all into one block.
-	 */
-	if(qlen(c->rq) > 64*1024)
-		freeblist(bp);
-	else{
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("greiput");
-		qpass(c->rq, bp);
-	}
+	qpass(c->rq, bp);
 }
 
 int
@@ -234,29 +649,258 @@
 	GREpriv *gpriv;
 
 	gpriv = gre->priv;
+	return snprint(buf, len,
+		"gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+		grepdin, grepdout, grepuin, grepuout,
+		grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
 
-	return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->raw = 1;
+	return nil;
 }
 
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
 {
-	GREpriv *gpriv;
+	GREconv *grec;
 
-	gpriv = c->p->priv;
-	if(n == 1){
-		if(strcmp(f[0], "raw") == 0){
-			gpriv->raw = 1;
-			return nil;
-		}
-		else if(strcmp(f[0], "cooked") == 0){
-			gpriv->raw = 0;
-			return nil;
-		}
+	grec = c->ptcl;
+	grec->raw = 0;
+	return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+	uchar ipaddr[4];
+
+	grec = c->ptcl;
+	if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+		return "tunnel already set up";
+
+	v4parseip(ipaddr, argv[1]);
+	if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+		return "bad hoa";
+	memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+	v4parseip(ipaddr, argv[2]);
+	memmove(grec->north, ipaddr, sizeof grec->north);
+	v4parseip(ipaddr, argv[3]);
+	memmove(grec->south, ipaddr, sizeof grec->south);
+	v4parseip(ipaddr, argv[4]);
+	memmove(grec->coa, ipaddr, sizeof grec->coa);
+	grec->ulsusp = 1;
+	grec->dlsusp = 0;
+
+	return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+	ulong seq;
+	Block *bp;
+	Bring *r;
+	GREconv *grec;
+	Metablock *m;
+
+	grec = c->ptcl;
+	seq  = strtoul(argv[1], nil, 0);
+
+	qlock(&grec->lock);
+	r = &grec->dlpending;
+	while(r->produced - r->consumed > 0){
+		bp = r->ring[r->consumed & Ringmask];
+
+		assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		if((long)(seq - m->seq) <= 0)
+			break;
+
+		r->ring[r->consumed & Ringmask] = nil;
+		r->consumed++;
+
+		freeb(bp);
 	}
-	return "unknown control request";
+	qunlock(&grec->lock);
+	return nil;
 }
 
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->dlsusp)
+		return "already suspended";
+
+	grec->dlsusp = 1;
+	return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->ulsusp)
+		return "already suspended";
+
+	grec->ulsusp = 1;
+	return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		qunlock(&grec->lock);
+
+		ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+		qlock(&grec->lock);
+		addring(&grec->dlpending, bp);
+	}
+	grec->dlsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	grec->ulsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+	Block *bp;
+	GREconv *grec;
+	GREhdr *gre;
+	Metablock *m;
+
+	grec = c->ptcl;
+
+	v4parseip(grec->south, argv[1]);
+	memmove(grec->north, grec->south, sizeof grec->north);
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+	grec->dlsusp = 0;
+	grec->ulsusp = 0;
+
+	while((bp = getring(&grec->dlpending)) != nil){
+
+		assert(bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		assert(m->rp >= bp->base && m->rp < bp->lim);
+
+		bp->rp = m->rp;
+
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->ulkey = strtoul(argv[1], nil, 0);
+	return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+	int i;
+
+	if(n < 1)
+		return "too few arguments";
+
+	for(i = 0; i < Ncmds; i++)
+		if(strcmp(f[0], grectls[i].cmd) == 0)
+			break;
+
+	if(i == Ncmds)
+		return "no such command";
+	if(grectls[i].argc != 0 && grectls[i].argc != n)
+		return "incorrect number of arguments";
+
+	return grectls[i].f(c, n, f);
+}
+
 void
 greinit(Fs *fs)
 {
@@ -276,7 +920,7 @@
 	gre->stats = grestats;
 	gre->ipproto = IP_GREPROTO;
 	gre->nc = 64;
-	gre->ptclsize = 0;
+	gre->ptclsize = sizeof(GREconv);
 
 	Fsproto(fs, gre);
 }
--- a/os/ip/icmp.c
+++ b/os/ip/icmp.c
@@ -44,11 +44,6 @@
 	Maxtype		= 18,
 };
 
-enum
-{
-	MinAdvise	= 24,	/* minimum needed for us to advise another protocol */ 
-};
-
 char *icmpnames[Maxtype+1] =
 {
 [EchoReply]		"EchoReply",
@@ -70,6 +65,8 @@
 	IP_ICMPPROTO	= 1,
 	ICMP_IPSIZE	= 20,
 	ICMP_HDRSIZE	= 8,
+
+	MinAdvise	= ICMP_IPSIZE+4,	/* minimum needed for us to advise another protocol */ 
 };
 
 enum
@@ -113,7 +110,7 @@
 	c->wq = qbypass(icmpkick, c);
 }
 
-extern char*
+char*
 icmpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -126,11 +123,11 @@
 	return nil;
 }
 
-extern int
+int
 icmpstate(Conv *c, char *state, int n)
 {
 	USED(c);
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		"Datagram",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
 	);
 }
 
-extern char*
+char*
 icmpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -150,7 +147,7 @@
 	return nil;
 }
 
-extern void
+void
 icmpclose(Conv *c)
 {
 	qclose(c->rq);
@@ -169,8 +166,7 @@
 
 	if(bp == nil)
 		return;
-
-	if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+	if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
 		freeblist(bp);
 		return;
 	}
@@ -190,21 +186,50 @@
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
 {
+	uchar addr[IPaddrlen];
+	int i;
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	i = ipforme(f, addr);
+	return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+	uchar addr[IPaddrlen];
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
 	Block	*nbp;
 	Icmp	*p, *np;
+	uchar	ia[IPv4addrlen];
 
 	p = (Icmp *)bp->rp;
+	if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+		return;
 
-	netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+	netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+		ia, p->src, p->dst);
+
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
 	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
 	np = (Icmp *)nbp->rp;
 	np->vihl = IP_VER4;
+	memmove(np->src, ia, sizeof(np->src));
 	memmove(np->dst, p->src, sizeof(np->dst));
-	v6tov4(np->src, ia);
 	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
 	np->type = TimeExceed;
 	np->code = 0;
@@ -214,7 +239,6 @@
 	memset(np->cksum, 0, sizeof(np->cksum));
 	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
 }
 
 static void
@@ -222,20 +246,10 @@
 {
 	Block	*nbp;
 	Icmp	*p, *np;
-	int	i;
-	uchar	addr[IPaddrlen];
 
 	p = (Icmp *)bp->rp;
-
-	/* only do this for unicast sources and destinations */
-	v4tov6(addr, p->dst);
-	i = ipforme(f, addr);
-	if((i&Runi) == 0)
+	if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
 		return;
-	v4tov6(addr, p->src);
-	i = ipforme(f, addr);
-	if(i != 0 && (i&Runi) == 0)
-		return;
 
 	netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmpnoconv(Fs *f, Block *bp)
 {
 	icmpunreachable(f, bp, 3, 0);
 }
 
-extern void
+void
 icmpcantfrag(Fs *f, Block *bp, int mtu)
 {
 	icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
 static void
 goticmpkt(Proto *icmp, Block *bp)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
-	v4tov6(dst, p->src);
+	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
-		if(ipcmp(s->raddr, dst) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+		if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
 {
 	Icmp	*q;
 	uchar	ip[4];
 
 	q = (Icmp *)bp->rp;
+	if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+		return nil;
+
 	q->vihl = IP_VER4;
 	memmove(ip, q->src, sizeof(q->dst));
 	memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
 [3]	"port unreachable",
 [4]	"fragmentation needed and DF set",
 [5]	"source route failed",
+[6]	"destination network unknown",
+[7]	"destination host unknown",
+[8]	"source host isolated",
+[9]	"network administratively prohibited",
+[10]	"host administratively prohibited",
+[11]	"network unreachable for tos",
+[12]	"host unreachable for tos",
+[13]	"communication administratively prohibited",
+[14]	"host precedence violation",
+[15]	"precedence cutoff in effect",
 };
 
 static void
 icmpiput(Proto *icmp, Ipifc*, Block *bp)
 {
-	int	n, iplen;
+	int	n;
 	Icmp	*p;
 	Block	*r;
 	Proto	*pr;
@@ -332,12 +355,10 @@
 	Icmppriv *ipriv;
 
 	ipriv = icmp->priv;
-	
 	ipriv->stats[InMsgs]++;
 
-	p = (Icmp *)bp->rp;
-	netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
-	n = blocklen(bp);
+	bp = concatblock(bp);
+	n = BLEN(bp);
 	if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
 		ipriv->stats[InErrors]++;
 		ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
 		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
 		goto raise;
 	}
-	iplen = nhgets(p->length);
-	if(iplen > n || (iplen % 1)){
-		ipriv->stats[LenErrs]++;
+	if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
 		ipriv->stats[InErrors]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto raise;
-	}
-	if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
-		ipriv->stats[InErrors]++;
 		ipriv->stats[CsumErrs]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto raise;
 	}
+	p = (Icmp *)bp->rp;
+	netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+		(p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+		p->type, p->code);
 	if(p->type <= Maxtype)
 		ipriv->in[p->type]++;
 
 	switch(p->type) {
 	case EchoRequest:
-		if (iplen < n)
-			bp = trimblock(bp, 0, iplen);
-		r = mkechoreply(bp);
+		r = mkechoreply(bp, icmp->f);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 	case Unreachable:
-		if(p->code > 5)
-			msg = unreachcode[1];
-		else
+		if(p->code >= nelem(unreachcode)) {
+			snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+				p->src, p->dst, p->code);
+			msg = m2;
+		} else
 			msg = unreachcode[p->code];
 
+	Advise:
 		bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-		if(blocklen(bp) < MinAdvise){
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs]++;
 			goto raise;
 		}
 		p = (Icmp *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
+		if((nhgets(p->frag) & IP_FO) == 0){
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr != nil && pr->advise != nil) {
+				(*pr->advise)(pr, bp, msg);
+				return;
+			}
 		}
-
 		bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
 		goticmpkt(icmp, bp);
 		break;
 	case TimeExceed:
 		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %V", p->src);
-
-			bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-			if(blocklen(bp) < MinAdvise){
-				ipriv->stats[LenErrs]++;
-				goto raise;
-			}
-			p = (Icmp *)bp->rp;
-			pr = Fsrcvpcolx(icmp->f, p->proto);
-			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
-				return;
-			}
-			bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+			goto Advise;
 		}
-
 		goticmpkt(icmp, bp);
 		break;
 	default:
@@ -419,22 +428,25 @@
 	freeblist(bp);
 }
 
-void
+static void
 icmpadvise(Proto *icmp, Block *bp, char *msg)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
 	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, src) == 0)
 		if(ipcmp(s->raddr, dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -443,7 +455,7 @@
 	freeblist(bp);
 }
 
-int
+static int
 icmpstats(Proto *icmp, char *buf, int len)
 {
 	Icmppriv *priv;
@@ -456,7 +468,7 @@
 	for(i = 0; i < Nstats; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
 	for(i = 0; i <= Maxtype; i++){
-		if(icmpnames[i])
+		if(icmpnames[i] != nil)
 			p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
 		else
 			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
 	}
 	return p - buf;
 }
-
-int
-icmpgc(Proto *icmp)
-{
-	return natgc(icmp->ipproto);
-}
-
+	
 void
 icmpinit(Fs *fs)
 {
@@ -487,7 +493,7 @@
 	icmp->stats = icmpstats;
 	icmp->ctl = nil;
 	icmp->advise = icmpadvise;
-	icmp->gc = icmpgc;
+	icmp->gc = nil;
 	icmp->ipproto = IP_ICMPPROTO;
 	icmp->nc = 128;
 	icmp->ptclsize = 0;
--- a/os/ip/icmp6.c
+++ b/os/ip/icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,41 +10,36 @@
 #include "ip.h"
 #include "ipv6.h"
 
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+	InMsgs6,
+	InErrors6,
+	OutMsgs6,
+	CsumErrs6,
+	LenErrs6,
+	HlenErrs6,
+	HoplimErrs6,
+	IcmpCodeErrs6,
+	TargetErrs6,
+	OptlenErrs6,
+	AddrmxpErrs6,
+	RouterAddrErrs6,
 
-struct ICMPpkt {
-	uchar	type;
-	uchar	code;
-	uchar	cksum[2];
-	uchar	icmpid[2];
-	uchar	seq[2];
+	Nstats6,
 };
 
-struct IPICMP {
-	Ip6hdr;
-	ICMPpkt;
+enum {
+	ICMP_USEAD6	= 40,
 };
 
-struct NdiscC
-{
-	IPICMP;
-	uchar target[IPaddrlen];
+enum {
+	Oflag	= 1<<5,
+	Sflag	= 1<<6,
+	Rflag	= 1<<7,
 };
 
-struct Ndpkt
-{
-	NdiscC;
-	uchar otype;
-	uchar olen;	// length in units of 8 octets(incl type, code),
-				// 1 for IEEE 802 addresses
-	uchar lnaddr[6];	// link-layer address
-};
-
-enum {	
-	// ICMPv6 types
+enum {
+	/* ICMPv6 types */
 	EchoReply	= 0,
 	UnreachableV6	= 1,
 	PacketTooBigV6	= 2,
@@ -69,6 +67,65 @@
 	Maxtype6	= 137,
 };
 
+enum {
+	MinAdvise	= IP6HDR+4,	/* minimum needed for us to advise another protocol */ 
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding  */
+#define ICMPHDR \
+	IPV6HDR; \
+	uchar	type; \
+	uchar	code; \
+	uchar	cksum[2]; \
+	uchar	icmpid[2]; \
+	uchar	seq[2]
+
+struct IPICMP {
+	ICMPHDR;
+	uchar	payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	otype;
+	uchar	olen;		/* length in units of 8 octets(incl type, code),
+				 * 1 for IEEE 802 addresses */
+	uchar	lnaddr[6];	/* link-layer address */
+	uchar	payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+	ulong	stats[Nstats6];
+
+	/* message counts */
+	ulong	in[Maxtype6+1];
+	ulong	out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+	uchar	headers;
+} Icmpcb6;
+
 char *icmpnames6[Maxtype6+1] =
 {
 [EchoReply]		"EchoReply",
@@ -95,24 +152,6 @@
 [RedirectV6]		"RedirectV6",
 };
 
-enum
-{
-	InMsgs6,
-	InErrors6,
-	OutMsgs6,
-	CsumErrs6,
-	LenErrs6,
-	HlenErrs6,
-	HoplimErrs6,
-	IcmpCodeErrs6,
-	TargetErrs6,
-	OptlenErrs6,
-	AddrmxpErrs6,
-	RouterAddrErrs6,
-
-	Nstats6,
-};
-
 static char *statnames6[Nstats6] =
 {
 [InMsgs6]	"InMsgs",
@@ -129,49 +168,18 @@
 [RouterAddrErrs6]	"RouterAddrErrs",
 };
 
-typedef struct Icmppriv6
-{
-	ulong	stats[Nstats6];
-
-	/* message counts */
-	ulong	in[Maxtype6+1];
-	ulong	out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6 
-{
-	QLock;
-	uchar headers;
-} Icmpcb6;
-
 static char *unreachcode[] =
 {
-[icmp6_no_route]	"no route to destination",
-[icmp6_ad_prohib]	"comm with destination administratively prohibited",
-[icmp6_unassigned]	"icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach]	"address unreachable",
-[icmp6_port_unreach]	"port unreachable",
-[icmp6_unkn_code]	"icmp unreachable: unknown code",
+[Icmp6_no_route]	"no route to destination",
+[Icmp6_ad_prohib]	"comm with destination administratively prohibited",
+[Icmp6_out_src_scope]	"beyond scope of source address",
+[Icmp6_adr_unreach]	"address unreachable",
+[Icmp6_port_unreach]	"port unreachable",
+[Icmp6_gress_src_fail]	"source address failed ingress/egress policy",
+[Icmp6_rej_route]	"reject route to destination",
+[Icmp6_unknown]		"icmp unreachable: unknown code",
 };
 
-enum {
-	ICMP_USEAD6	= 40,
-};
-
-enum {
-	Oflag	= 1<<5,
-	Sflag	= 1<<6,
-	Rflag	= 1<<7,
-};
-
-enum {
-	slladd	= 1,
-	tlladd	= 2,
-	prfinfo	= 3,
-	redhdr	= 4,
-	mtuopt	= 5,
-};
-
 static void icmpkick6(void *x, Block *bp);
 
 static void
@@ -185,13 +193,14 @@
 set_cksum(Block *bp)
 {
 	IPICMP *p = (IPICMP *)(bp->rp);
+	int n = blocklen(bp);
 
-	hnputl(p->vcf, 0);  // borrow IP header as pseudoheader
-	hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+	hnputl(p->vcf, 0);  	/* borrow IP header as pseudoheader */
+	hnputs(p->ploadlen, n - IP6HDR);
 	p->proto = 0;
-	p->ttl = ICMPv6;	// ttl gets set later
+	p->ttl = ICMPv6;	/* ttl gets set later */
 	hnputs(p->cksum, 0);
-	hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+	hnputs(p->cksum, ptclcsum(bp, 0, n));
 	p->proto = ICMPv6;
 }
 
@@ -198,7 +207,8 @@
 static Block *
 newIPICMP(int packetlen)
 {
-	Block	*nbp;
+	Block *nbp;
+
 	nbp = allocb(packetlen);
 	nbp->wp += packetlen;
 	memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
 	return nbp;
 }
 
-void
+static void
 icmpadvise6(Proto *icmp, Block *bp, char *msg)
 {
-	Conv	**c, *s;
-	IPICMP	*p;
-	ushort	recid;
+	ushort recid;
+	Conv **c, *s;
+	IPICMP *p;
 
-	p = (IPICMP *) bp->rp;
+	p = (IPICMP *)bp->rp;
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->src) == 0)
 		if(ipcmp(s->raddr, p->dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -230,9 +242,9 @@
 static void
 icmpkick6(void *x, Block *bp)
 {
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Conv *c = x;
 	IPICMP *p;
-	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Icmppriv6 *ipriv = c->p->priv;
 	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
 
@@ -249,10 +261,10 @@
 		bp->rp += IPaddrlen;
 		ipmove(raddr, bp->rp);
 		bp->rp += IPaddrlen;
-		bp = padblock(bp, sizeof(Ip6hdr));
+		bp = padblock(bp, IP6HDR);
 	}
 
-	if(blocklen(bp) < sizeof(IPICMP)){
+	if(BLEN(bp) < IPICMPSZ){
 		freeblist(bp);
 		return;
 	}
@@ -268,23 +280,20 @@
 
 	set_cksum(bp);
 	p->vcf[0] = 0x06 << 4;
-	if(p->type <= Maxtype6)	
+	if(p->type <= Maxtype6)
 		ipriv->out[p->type]++;
 	ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-char*
+static char*
 icmpctl6(Conv *c, char **argv, int argc)
 {
 	Icmpcb6 *icb;
 
 	icb = (Icmpcb6*) c->ptcl;
-
-	if(argc==1) {
-		if(strcmp(argv[0], "headers")==0) {
-			icb->headers = 6;
-			return nil;
-		}
+	if(argc==1 && strcmp(argv[0], "headers")==0) {
+		icb->headers = 6;
+		return nil;
 	}
 	return "unknown control request";
 }
@@ -292,41 +301,39 @@
 static void
 goticmpkt6(Proto *icmp, Block *bp, int muxkey)
 {
-	Conv	**c, *s;
-	IPICMP	*p = (IPICMP *)bp->rp;
-	ushort	recid; 
-	uchar 	*addr;
+	ushort recid;
+	uchar *addr;
+	Conv **c, *s;
+	IPICMP *p = (IPICMP *)bp->rp;
 
 	if(muxkey == 0) {
 		recid = nhgets(p->icmpid);
 		addr = p->src;
-	}
-	else {
+	} else {
 		recid = muxkey;
 		addr = p->dst;
 	}
-
-	for(c = icmp->conv; *c; c++){
-		s = *c;
-		if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+	for(c = icmp->conv; (s = *c) != nil; c++){
+		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
-
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
 {
+	uchar addr[IPaddrlen];
 	IPICMP *p = (IPICMP *)(bp->rp);
-	uchar	addr[IPaddrlen];
 
+	if(isv6mcast(p->src))
+		return nil;
 	ipmove(addr, p->src);
-	ipmove(p->src, p->dst);
+	if(!isv6mcast(p->dst))
+		ipmove(p->src, p->dst);
+	else if (!ipv6local(ifc, p->src, 0, addr))
+		return nil;
 	ipmove(p->dst, addr);
 	p->type = EchoReplyV6;
 	set_cksum(bp);
@@ -335,49 +342,43 @@
 
 /*
  * sends out an ICMPv6 neighbor solicitation
- * 	suni == SRC_UNSPEC or SRC_UNI, 
+ * 	suni == SRC_UNSPEC or SRC_UNI,
  *	tuni == TARG_MULTI => multicast for address resolution,
  * 	and tuni == TARG_UNI => neighbor reachability.
  */
-
-extern void
+void
 icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-
-	nbp = newIPICMP(sizeof(Ndpkt));
+	nbp = newIPICMP(NDPKTSZ);
 	np = (Ndpkt*) nbp->rp;
 
+	if(suni == SRC_UNSPEC)
+		ipmove(np->src, v6Unspecified);
+	else
+		ipmove(np->src, src);
 
-	if(suni == SRC_UNSPEC) 
-		memmove(np->src, v6Unspecified, IPaddrlen);
-	else 
-		memmove(np->src, src, IPaddrlen);
-
 	if(tuni == TARG_UNI)
-		memmove(np->dst, targ, IPaddrlen);
+		ipmove(np->dst, targ);
 	else
 		ipv62smcast(np->dst, targ);
 
 	np->type = NbrSolicit;
 	np->code = 0;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 	if(suni != SRC_UNSPEC) {
-		np->otype = SRC_LLADDRESS;
-		np->olen = 1;	/* 1+1+6 = 8 = 1 8-octet */
+		np->otype = SRC_LLADDR;
+		np->olen = 1;		/* 1+1+6 = 8 = 1 8-octet */
 		memmove(np->lnaddr, mac, sizeof(np->lnaddr));
-	}
-	else {
-		int r = sizeof(Ndpkt)-sizeof(NdiscC);
-		nbp->wp -= r;
-	}
+	} else
+		nbp->wp -= NDPKTSZ - NDISCSZ;
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
 /*
  * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
  */
-extern void
+void
 icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-	nbp = newIPICMP(sizeof(Ndpkt));
-	np = (Ndpkt*) nbp->rp;
+	nbp = newIPICMP(NDPKTSZ);
+	np = (Ndpkt*)nbp->rp;
 
-	memmove(np->src, src, IPaddrlen);
-	memmove(np->dst, dst, IPaddrlen);
+	ipmove(np->src, src);
+	ipmove(np->dst, dst);
 
 	np->type = NbrAdvert;
 	np->code = 0;
 	np->icmpid[0] = flags;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 
-	np->otype = TARGET_LLADDRESS;
-	np->olen = 1;	
+	np->otype = TARGET_LLADDR;
+	np->olen = 1;
 	memmove(np->lnaddr, mac, sizeof(np->lnaddr));
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrAdvert]++;
-	netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+	netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+		return;
 
-	if(isv6mcast(p->src)) 
-		goto clean;
+	netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
 	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
-
-	rlock(ifc);
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
-		freeblist(nbp);
-		if(free) 
-			goto clean;
-		else
-			return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = UnreachableV6;
 	np->code = code;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[UnreachableV6]++;
 
-	if(free)
+	if(tome)
 		ipiput6(f, ifc, nbp);
-	else {
+	else 
 		ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-		return;
-	}
-
-clean:
-	runlock(ifc);
-	freeblist(bp);
 }
 
-extern void
+void
 icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
+	netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
+
 	nbp = newIPICMP(sz);
 	np = (IPICMP *) nbp->rp;
-
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = TimeExceedV6;
 	np->code = 0;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
-	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
+	netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	nbp = newIPICMP(sz);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = PacketTooBigV6;
 	np->code = 0;
 	hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
  * RFC 2461, pages 39-40, pages 57-58.
  */
 static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
-	int 	sz, osz, unsp, n, ttl, iplen;
-	int 	pktsz = BLEN(bp);
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *) packet;
-	Ndpkt	*np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+	int sz, osz, unsp, ttl;
+	int pktsz = BLEN(bp);
+	uchar *packet = bp->rp;
+	IPICMP *p = (IPICMP *) packet;
+	Ndpkt *np;
 
-	USED(ifc);
-	n = blocklen(bp);
-	if(n < sizeof(IPICMP)) {
+	if(pktsz < IPICMPSZ) {
 		ipriv->stats[HlenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
 		goto err;
 	}
 
-	iplen = nhgets(p->ploadlen);
-	if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
-		ipriv->stats[LenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto err;
-	}
-
-	// Rather than construct explicit pseudoheader, overwrite IPv6 header
+	/* Rather than construct explicit pseudoheader, overwrite IPv6 header */
 	if(p->proto != ICMPv6) {
-		// This code assumes no extension headers!!!
+		/* This code assumes no extension headers!!! */
 		netlog(icmp->f, Logicmp, "icmp error: extension header\n");
 		goto err;
 	}
@@ -586,7 +553,7 @@
 	ttl = p->ttl;
 	p->ttl = p->proto;
 	p->proto = 0;
-	if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+	if(ptclcsum(bp, 0, pktsz)) {
 		ipriv->stats[CsumErrs6]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto err;
@@ -595,19 +562,16 @@
 	p->ttl = ttl;
 
 	/* additional tests for some pkt types */
-	if( (p->type == NbrSolicit) ||
-		(p->type == NbrAdvert) ||
-		(p->type == RouterAdvert) ||
-		(p->type == RouterSolicit) ||
-		(p->type == RedirectV6) ) {
-
+	if (p->type == NbrSolicit   || p->type == NbrAdvert ||
+	    p->type == RouterAdvert || p->type == RouterSolicit ||
+	    p->type == RedirectV6) {
 		if(p->ttl != HOP_LIMIT) {
-			ipriv->stats[HoplimErrs6]++; 
-			goto err; 
+			ipriv->stats[HoplimErrs6]++;
+			goto err;
 		}
 		if(p->code != 0) {
-			ipriv->stats[IcmpCodeErrs6]++; 
-			goto err; 
+			ipriv->stats[IcmpCodeErrs6]++;
+			goto err;
 		}
 
 		switch (p->type) {
@@ -615,82 +579,78 @@
 		case NbrAdvert:
 			np = (Ndpkt*) p;
 			if(isv6mcast(np->target)) {
-				ipriv->stats[TargetErrs6]++; 
-				goto err; 
+				ipriv->stats[TargetErrs6]++;
+				goto err;
 			}
-			if(optexsts(np) && (np->olen == 0)) {
-				ipriv->stats[OptlenErrs6]++; 
-				goto err; 
+			if(optexsts(np) && np->olen == 0) {
+				ipriv->stats[OptlenErrs6]++;
+				goto err;
 			}
-		
-			if(p->type == NbrSolicit) {
-				if(ipcmp(np->src, v6Unspecified) == 0) { 
-					if(!issmcast(np->dst) || optexsts(np))  {
-						ipriv->stats[AddrmxpErrs6]++; 
-						goto err;
-					}
+
+			if (p->type == NbrSolicit &&
+			    ipcmp(np->src, v6Unspecified) == 0)
+				if(!issmcast(np->dst) || optexsts(np)) {
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
-		
-			if(p->type == NbrAdvert) {
-				if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
-					ipriv->stats[AddrmxpErrs6]++; 
-					goto err; 
+
+			if(p->type == NbrAdvert)
+				if(isv6mcast(np->dst) &&
+				    (nhgets(np->icmpid) & Sflag)){
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
 			break;
-	
+
 		case RouterAdvert:
-			if(pktsz - sizeof(Ip6hdr) < 16) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 16) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			if(!islinklocal(p->src)) {
-				ipriv->stats[RouterAddrErrs6]++; 
-				goto err; 
+				ipriv->stats[RouterAddrErrs6]++;
+				goto err;
 			}
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
 				if(osz <= 0) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
-				}	
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
+				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RouterSolicit:
-			if(pktsz - sizeof(Ip6hdr) < 8) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 8) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			unsp = (ipcmp(p->src, v6Unspecified) == 0);
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
-				if((osz <= 0) ||
-					(unsp && (*(packet+sz) == slladd)) ) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
+				if(osz <= 0 ||
+				    (unsp && packet[sz] == SRC_LLADDR)) {
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
 				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RedirectV6:
-			//to be filled in
+			/* to be filled in */
 			break;
-	
+
 		default:
 			goto err;
 		}
 	}
-
 	return 1;
-
 err:
-	ipriv->stats[InErrors6]++; 
+	ipriv->stats[InErrors6]++;
 	return 0;
 }
 
@@ -700,169 +660,162 @@
 	Iplifc *lifc;
 	int t;
 
-	rlock(ifc);
-	if(ipproxyifc(f, ifc, target)) {
-		runlock(ifc);
-		return t_uniproxy;
-	}
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, target) == 0) {
-			t = (lifc->tentative) ? t_unitent : t_unirany; 
-			runlock(ifc);
-			return t;
-		}
-	}
-
-	runlock(ifc);
-	return 0;
+	if((lifc = iplocalonifc(ifc, target)) != nil)
+		t = lifc->tentative? Tunitent: Tunirany;
+	else if(ipproxyifc(f, ifc, target))
+		t = Tuniproxy;
+	else
+		t = 0;
+	return t;
 }
 
 static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
 {
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *)packet;
-	Icmppriv6 *ipriv = icmp->priv;
-	Block	*r;
-	Proto	*pr;
-	char	*msg, m2[128];
-	Ndpkt* np;
+	char *msg, m2[128];
 	uchar pktflags;
-	uchar lsrc[IPaddrlen];
-	int refresh = 1;
+	uchar ia[IPaddrlen];
+	Block *r;
+	IPICMP *p;
+	Icmppriv6 *ipriv = icmp->priv;
 	Iplifc *lifc;
+	Ndpkt* np;
+	Proto *pr;
 
-	if(!valid(icmp, ipifc, bp, ipriv)) 
-		goto raise;
+	bp = concatblock(bp);
+	p = (IPICMP*)bp->rp;
 
-	if(p->type <= Maxtype6)
-		ipriv->in[p->type]++;
-	else
+	if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
 		goto raise;
 
+	ipriv->in[p->type]++;
+
 	switch(p->type) {
 	case EchoRequestV6:
-		r = mkechoreply6(bp);
+		r = mkechoreply6(bp, ifc);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 
 	case UnreachableV6:
-		if(p->code > 4)
-			msg = unreachcode[icmp6_unkn_code];
+		if(p->code >= nelem(unreachcode))
+			msg = unreachcode[Icmp6_unknown];
 		else
 			msg = unreachcode[p->code];
-
-		bp->rp += sizeof(IPICMP);
-		if(blocklen(bp) < 8){
+	Advise:
+		bp->rp += IPICMPSZ;
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs6]++;
 			goto raise;
 		}
 		p = (IPICMP *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
-		}
 
-		bp->rp -= sizeof(IPICMP);
-		goticmpkt6(icmp, bp, 0);
-		break;
-
-	case TimeExceedV6:
-		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %I", p->src);
-
-			bp->rp += sizeof(IPICMP);
-			if(blocklen(bp) < 8){
-				ipriv->stats[LenErrs6]++;
-				goto raise;
+		/* get rid of fragment header if this is the first fragment */
+		if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+			Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+			if((nhgets(fh->offsetRM) & ~7) == 0){	/* first fragment */
+				p->proto = fh->nexthdr;
+				/* copy down payload over fragment header */
+				bp->rp += IP6HDR;
+				bp->wp -= IP6FHDR;
+				memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+				hnputs(p->ploadlen, BLEN(bp));
+				bp->rp -= IP6HDR;
 			}
-			p = (IPICMP *)bp->rp;
+		}
+		if(p->proto != FH){
 			pr = Fsrcvpcolx(icmp->f, p->proto);
 			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
+				(*pr->advise)(pr, bp, msg);
 				return;
 			}
-			bp->rp -= sizeof(IPICMP);
 		}
+		bp->rp -= IPICMPSZ;
+		goticmpkt6(icmp, bp, 0);
+		break;
 
+	case TimeExceedV6:
+		if(p->code == 0){
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+			goto Advise;
+		}
+		if(p->code == 1){
+			snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+			goto Advise;
+		}
 		goticmpkt6(icmp, bp, 0);
 		break;
 
+	case PacketTooBigV6:
+		snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+			(ulong)nhgetl(p->icmpid), p->src);
+		goto Advise;
+
 	case RouterAdvert:
 	case RouterSolicit:
-		/* using lsrc as a temp, munge hdr for goticmp6 
-		memmove(lsrc, p->src, IPaddrlen);
-		memmove(p->src, p->dst, IPaddrlen);
-		memmove(p->dst, lsrc, IPaddrlen); */
-
 		goticmpkt6(icmp, bp, p->type);
 		break;
 
 	case NbrSolicit:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 		pktflags = 0;
-		switch (targettype(icmp->f, ipifc, np->target)) {
-		case t_unirany:
+		if(ifc->sendra6)
+			pktflags |= Rflag;
+		switch (targettype(icmp->f, ifc, np->target)) {
+		case Tunirany:
 			pktflags |= Oflag;
 			/* fall through */
 
-		case t_uniproxy: 
-			if(ipcmp(np->src, v6Unspecified) != 0) {
-				arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+		case Tuniproxy:
+			if(ipv6local(ifc, ia, 0, np->src)) {
+				if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+					break;
 				pktflags |= Sflag;
-			}
-			if(ipv6local(ipifc, lsrc)) {
-				icmpna(icmp->f, lsrc, 
-				   (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
-				   np->target, ipifc->mac, pktflags); 
-			}
-			else
-				freeblist(bp);
+			} else
+				ipmove(ia, np->target);
+			icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+				np->target, ifc->mac, pktflags);
 			break;
-
-		case t_unitent:
-			/* not clear what needs to be done. send up
-			 * an icmp mesg saying don't use this address? */
-
-		default:
-			freeblist(bp);
+		case Tunitent:
+			/*
+			 * not clear what needs to be done. send up
+			 * an icmp mesg saying don't use this address?
+			 */
+			break;
 		}
-
+		freeblist(bp);
 		break;
 
 	case NbrAdvert:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 
-		/* if the target address matches one of the local interface 
-		 * address and the local interface address has tentative bit set, 
-		 * then insert into ARP table. this is so the duplication address 
-		 * detection part of ipconfig can discover duplication through 
-		 * the arp table
+		/*
+		 * if the target address matches one of the local interface
+		 * addresses and the local interface address has tentative bit
+		 * set, insert into ARP table. this is so the duplicate address
+		 * detection part of ipconfig can discover duplication through
+		 * the arp table.
 		 */
-		lifc = iplocalonifc(ipifc, np->target);
-		if(lifc && lifc->tentative)
-			refresh = 0;
-		arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+		lifc = iplocalonifc(ifc, np->target);
+		if(lifc != nil && lifc->tentative)
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+		else if(ipv6local(ifc, ia, 0, np->target))
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
 		freeblist(bp);
 		break;
 
-	case PacketTooBigV6:
-
 	default:
 		goticmpkt6(icmp, bp, 0);
 		break;
 	}
 	return;
-
 raise:
 	freeblist(bp);
-
 }
 
-int
+static int
 icmpstats6(Proto *icmp6, char *buf, int len)
 {
 	Icmppriv6 *priv;
@@ -874,23 +827,28 @@
 	e = p+len;
 	for(i = 0; i < Nstats6; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
-	for(i = 0; i <= Maxtype6; i++){
+	for(i = 0; i <= Maxtype6; i++)
 		if(icmpnames6[i])
-			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/*		else
-			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
-	}
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+				priv->in[i], priv->out[i]);
 	return p - buf;
 }
 
 
-// need to import from icmp.c
+/* import from icmp.c */
 extern int	icmpstate(Conv *c, char *state, int n);
 extern char*	icmpannounce(Conv *c, char **argv, int argc);
 extern char*	icmpconnect(Conv *c, char **argv, int argc);
 extern void	icmpclose(Conv *c);
 
+static void
+icmpclose6(Conv *c)
+{
+	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+	icb->headers = 0;
+	icmpclose(c);
+}
+
 void
 icmp6init(Fs *fs)
 {
@@ -902,7 +860,7 @@
 	icmp6->announce = icmpannounce;
 	icmp6->state = icmpstate;
 	icmp6->create = icmpcreate6;
-	icmp6->close = icmpclose;
+	icmp6->close = icmpclose6;
 	icmp6->rcv = icmpiput6;
 	icmp6->stats = icmpstats6;
 	icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
 
 	Fsproto(fs, icmp6);
 }
-
--- a/os/ip/igmp.c
+++ b/os/ip/igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -40,8 +44,12 @@
 	uchar	unused;
 	uchar	igmpcksum[2];		/* checksum of igmp portion */
 	uchar	group[IPaddrlen];	/* multicast group */
+
+	uchar	payload[];
 };
 
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
 /*
  *  lists for group reports
  */
@@ -49,7 +57,7 @@
 struct IGMPrep
 {
 	IGMPrep		*next;
-	Media		*m;
+	Medium		*m;
 	int		ticks;
 	Multicast	*multi;
 };
@@ -76,19 +84,17 @@
 } stats;
 
 void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
 {
 	IGMPpkt *p;
 	Block *bp;
 
 	bp = allocb(sizeof(IGMPpkt));
-	if(bp == nil)
-		return;
 	p = (IGMPpkt*)bp->wp;
 	p->vihl = IP_VER4;
-	bp->wp += sizeof(IGMPpkt);
-	memset(bp->rp, 0, sizeof(IGMPpkt));
-	hnputl(p->src, Mediagetaddr(m));
+	bp->wp += IGMPPKTSZ;
+	memset(bp->rp, 0, IGMPPKTSZ);
+	hnputl(p->src, Mediumgetaddr(m));
 	hnputl(p->dst, Ipallsys);
 	p->vertype = (1<<4) | IGMPreport;
 	p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
 }
 
 void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
 {
 	int n;
 	IGMPpkt *ghp;
@@ -206,7 +212,7 @@
 		if(rp != nil)
 			break;	/* already reporting */
 
-		mp = Mediacopymulti(m);
+		mp = Mediumcopymulti(m);
 		if(mp == nil)
 			break;
 
@@ -285,7 +291,7 @@
 	igmp.ptclsize = 0;
 
 	igmpreportfn = igmpsendreport;
-	kproc("igmpproc", igmpproc, 0, 0);
+	kproc("igmpproc", igmpproc, 0);
 
 	Fsproto(fs, &igmp);
 }
--- a/os/ip/ihbootp.c
+++ /dev/null
@@ -1,323 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-
-static	ulong	fsip;
-static	ulong	auip;
-static	ulong	gwip;
-static	ulong	ipmask;
-static	ulong	ipaddr;
-static	ulong	dnsip;
-
-enum
-{
-	Bootrequest = 1,
-	Bootreply   = 2,
-};
-
-typedef struct Bootp
-{
-	/* udp.c oldheader */
-	uchar	raddr[IPaddrlen];
-	uchar	laddr[IPaddrlen];
-	uchar	rport[2];
-	uchar	lport[2];
-	/* bootp itself */
-	uchar	op;		/* opcode */
-	uchar	htype;		/* hardware type */
-	uchar	hlen;		/* hardware address len */
-	uchar	hops;		/* hops */
-	uchar	xid[4];		/* a random number */
-	uchar	secs[2];	/* elapsed snce client started booting */
-	uchar	pad[2];
-	uchar	ciaddr[4];	/* client IP address (client tells server) */
-	uchar	yiaddr[4];	/* client IP address (server tells client) */
-	uchar	siaddr[4];	/* server IP address */
-	uchar	giaddr[4];	/* gateway IP address */
-	uchar	chaddr[16];	/* client hardware address */
-	uchar	sname[64];	/* server host name (optional) */
-	uchar	file[128];	/* boot file name */
-	uchar	vend[128];	/* vendor-specific goo */
-} Bootp;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d
- * dnsip d.d.d.d"
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
- */
-
-static	Bootp	req;
-static	Proc*	rcvprocp;
-static	int	recv;
-static	int	done;
-static	Rendez	bootpr;
-static	char	rcvbuf[512];
-static	int	bootpdebug;
-
-/*
- * Parse the vendor specific fields according to RFC 1084.
- * We are overloading the "cookie server" to be the Inferno 
- * authentication server and the "resource location server"
- * to be the Inferno file server.
- *
- * If the vendor specific field is formatted properly, it
- * will begin with the four bytes 99.130.83.99 and end with
- * an 0xFF byte.
- */
-static void
-parsevend(uchar* vend)
-{
-	/* The field must start with 99.130.83.99 to be compliant */
-	if ((vend[0] != 99) || (vend[1] != 130) ||
-	    (vend[2] != 83) || (vend[3] != 99)){
-		if(bootpdebug)
-			print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
-		return;
-	}
-
-	/* Skip over the magic cookie */
-	vend += 4;
-
-	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
-		if(bootpdebug){
-			int i;
-			print("vend %d [%d]", vend[0], vend[1]);
-			for(i=0; i<vend[1]; i++)
-				print(" %2.2x", vend[i]);
-			print("\n");
-		}
-		switch (vend[0]) {
-		case 1:	/* Subnet mask field */
-			/* There must be only one subnet mask */
-			if (vend[1] != 4)
-				return;
-
-			ipmask = (vend[2]<<24)|
-				 (vend[3]<<16)|
-				 (vend[4]<<8)|
-				  vend[5];
-			break;
-
-		case 3:	/* Gateway/router field */
-			/* We are only concerned with first address */
-			if (vend[1] < 4)
-				break;
-
-			gwip =	(vend[2]<<24)|
-				(vend[3]<<16)|
-				(vend[4]<<8)|
-				 vend[5];
-			break;
-
-		case 6:	/* DNS server */
-			/* We are only concerned with first address */
-			if (vend[1] < 4)
-				break;
-
-			dnsip =	(vend[2]<<24)|
-				(vend[3]<<16)|
-				(vend[4]<<8)|
-				 vend[5];
-			break;
-
-		case 8:	/* "Cookie server" (auth server) field */
-			/* We are only concerned with first address */
-			if (vend[1] < 4)
-				break;
-
-			auip =	(vend[2]<<24)|
-				(vend[3]<<16)|
-				(vend[4]<<8)|
-				 vend[5];
-			break;
-
-		case 11:	/* "Resource loc server" (file server) field */
-			/* We are only concerned with first address */
-			if (vend[1] < 4)
-				break;
-
-			fsip =	(vend[2]<<24)|
-				(vend[3]<<16)|
-				(vend[4]<<8)|
-				 vend[5];
-			break;
-
-		default:	/* Ignore everything else */
-			break;
-		}
-
-		/* Skip over the field */
-		vend += vend[1] + 2;
-	}
-}
-
-static void
-rcvbootp(void *a)
-{
-	int n, fd;
-	Bootp *rp;
-
-	if(waserror())
-		pexit("", 0);
-	rcvprocp = up;	/* store for postnote below */
-	fd = (int)a;
-	while(done == 0) {
-		n = kread(fd, rcvbuf, sizeof(rcvbuf));
-		if(n <= 0)
-			break;
-		rp = (Bootp*)rcvbuf;
-		if (memcmp(req.chaddr, rp->chaddr, 6) == 0 &&
-		   rp->htype == 1 && rp->hlen == 6) {
-			ipaddr = (rp->yiaddr[0]<<24)|
-				 (rp->yiaddr[1]<<16)|
-				 (rp->yiaddr[2]<<8)|
-				  rp->yiaddr[3];
-			parsevend(rp->vend);
-			break;
-		}
-	}
-	poperror();
-	rcvprocp = nil;
-
-	recv = 1;
-	wakeup(&bootpr);
-	pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
-	int cfd, dfd, tries, n;
-	char ia[5+3*16], im[16], *av[3];
-	uchar nipaddr[4], ngwip[4], nipmask[4];
-	char dir[Maxpath];
-	static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	cfd = kannounce("udp!*!68", dir);
-	if(cfd < 0)
-		return "bootp announce failed";
-	strcat(dir, "/data");
-	if(kwrite(cfd, "headers", 7) < 0){
-		kclose(cfd);
-		return "bootp ctl headers failed";
-	}
-	kwrite(cfd, "oldheaders", 10);
-	dfd = kopen(dir, ORDWR);
-	if(dfd < 0){
-		kclose(cfd);
-		return "bootp open data failed";
-	}
-	kclose(cfd);
-
-	/* create request */
-	memset(&req, 0, sizeof(req));
-	ipmove(req.raddr, IPv4bcast);
-	hnputs(req.rport, 67);
-	req.op = Bootrequest;
-	req.htype = 1;			/* ethernet (all we know) */
-	req.hlen = 6;			/* ethernet (all we know) */
-
-	/* Hardware MAC address */
-	memmove(req.chaddr, ifc->mac, 6);
-	/* Fill in the local IP address if we know it */
-	ipv4local(ifc, req.ciaddr);
-	memset(req.file, 0, sizeof(req.file));
-	memmove(req.vend, vend_rfc1048, 4);
-
-	done = 0;
-	recv = 0;
-
-	kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
-
-	/*
-	 * broadcast bootp's till we get a reply,
-	 * or fixed number of tries
-	 */
-	tries = 0;
-	while(recv == 0) {
-		if(kwrite(dfd, &req, sizeof(req)) < 0)
-			print("bootp: write: %r");
-
-		tsleep(&bootpr, return0, 0, 1000);
-		if(++tries > 10) {
-			print("bootp: timed out\n");
-			break;
-		}
-	}
-	kclose(dfd);
-	done = 1;
-	if(rcvprocp != nil){
-		postnote(rcvprocp, 1, "timeout", 0);
-		rcvprocp = nil;
-	}
-
-	av[1] = "0.0.0.0";
-	av[2] = "0.0.0.0";
-	ipifcrem(ifc, av, 3);
-
-	hnputl(nipaddr, ipaddr);
-	sprint(ia, "%V", nipaddr);
-	hnputl(nipmask, ipmask);
-	sprint(im, "%V", nipmask);
-	av[1] = ia;
-	av[2] = im;
-	ipifcadd(ifc, av, 3, 0, nil);
-
-	if(gwip != 0) {
-		hnputl(ngwip, gwip);
-		n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
-		routewrite(ifc->conv->p->f, nil, ia, n);
-	}
-	return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
-	int n;
-	char *buf;
-	uchar a[4];
-
-	buf = smalloc(READSTR);
-	if(waserror()){
-		free(buf);
-		nexterror();
-	}
-	hnputl(a, fsip);
-	n = snprint(buf, READSTR, "fsip %15V\n", a);
-	hnputl(a, auip);
-	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
-	hnputl(a, gwip);
-	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
-	hnputl(a, ipmask);
-	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
-	hnputl(a, ipaddr);
-	n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
-	hnputl(a, dnsip);
-	snprint(buf + n, READSTR-n, "dnsip %15V\n", a);
-
-	len = readstr(offset, bp, len, buf);
-	poperror();
-	free(buf);
-	return len;
-}
-
-char*	(*bootp)(Ipifc*) = rbootp;
-int	(*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/il.c
+++ b/os/ip/il.c
@@ -189,7 +189,7 @@
 {
 	Ipht	ht;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 
 	ulong	csumerr;		/* checksum errors */
 	ulong	hlenerr;		/* header length error */
@@ -208,7 +208,7 @@
 
 
 void	ilrcvmsg(Conv*, Block*);
-void	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
 void	ilackq(Ilcb*, Block*);
 void	ilprocess(Conv*, Ilhdr*, Block*);
 void	ilpullup(Conv*);
@@ -251,6 +251,9 @@
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
+	if(c->ipversion != V4)
+		return "only IP version 4 supported";
+		
 	return ilstart(c, IL_CONNECT, fast);
 }
 
@@ -260,7 +263,7 @@
 	Ilcb *ic;
 
 	ic = (Ilcb*)(c->ptcl);
-	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
 		ilstates[ic->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -548,6 +551,9 @@
 
 	ih = (Ilhdr *)bp->rp;
 	plen = blocklen(bp);
+	if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+		goto raise;	/* ignore non V4 packets */
+
 	if(plen < IL_IPSIZE+IL_HDRSIZE){
 		netlog(il->f, Logil, "il: hlenerr\n");
 		ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
 		else
 			st = iltype[ih->iltype];
 		ipriv->stats[CsumErrs]++;
-		netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+		netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
 			csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
 		goto raise;
 	}
@@ -595,7 +601,7 @@
 			else
 				st = iltype[ih->iltype];
 			ilreject(il->f, ih);		/* no channel and not sync */
-			netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+			netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
 				st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); 
 			goto raise;
 		}
@@ -829,7 +835,7 @@
 
 	c = ic->conv;
 	id = nhgetl(h->ilid);
-	netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+	netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
 		ic->rexmit, ic->timeout,
 		c->raddr, c->lport, c->rport);
 
@@ -852,7 +858,7 @@
 	ic = (Ilcb*)s->ptcl;
 
 	USED(ic);
-	netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
 		ilstates[ic->state],  ic->rstart, ic->recvd, ic->start, 
 		ic->next, iltype[h->iltype], nhgetl(h->ilid), 
 		nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
 
 	_ilprocess(s, h, bp);
 
-	netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
 }
 
 void
@@ -917,17 +923,12 @@
 		bp->list = nil;
 		dlen = nhgets(oh->illen)-IL_HDRSIZE;
 		bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+			
 		/*
 		 * Upper levels don't know about multiple-block
 		 * messages so copy all into one (yick).
 		 */
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("ilpullup");
-		bp = packblock(bp);
-		if(bp == 0)
-			panic("ilpullup2");
-		qpass(s->rq, bp);
+		qpass(s->rq, packblock(concatblock(bp)));
 	}
 	qunlock(&ic->outo);
 }
@@ -948,7 +949,7 @@
 	id = nhgetl(h->ilid);
 	/* Window checks */
 	if(id <= ic->recvd || id > ic->recvd+ic->window) {
-		netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+		netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
 			id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
 		freeblist(bp);
 		return;
@@ -983,7 +984,7 @@
 	qunlock(&ic->outo);
 }
 
-void
+int
 ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
 {
 	Ilhdr *ih;
@@ -1034,7 +1035,7 @@
 		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
 
 if(ipc==nil)
-	panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+	panic("ipc is nil caller is %#p", getcallerpc(&ipc));
 if(ipc->p==nil)
 	panic("ipc->p is nil");
 
@@ -1042,7 +1043,7 @@
 		iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), 
 		nhgets(ih->ilsrc), nhgets(ih->ildst));
 
-	ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+	return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
 }
 
 void
@@ -1145,6 +1146,8 @@
 
 	il = x;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Iltickms);
 	for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
 		qlock(&ipriv->apl);
 		if(ipriv->ackprocstarted == 0){
 			sprint(kpname, "#I%dilack", c->p->f->dev);
-			kproc(kpname, ilackproc, c->p, 0);
+			kproc(kpname, ilackproc, c->p);
 			ipriv->ackprocstarted = 1;
 		}
 		qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
 	case IL_CONNECT:
 		ic->state = Ilsyncer;
 		iphtadd(&ipriv->ht, c);
-		ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+		if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+			ilhangup(c, "no route");
 		break;
 	}
 
@@ -1332,6 +1336,8 @@
 		if(s->lport == psource)
 		if(ipcmp(s->laddr, source) == 0)
 		if(ipcmp(s->raddr, dest) == 0){
+			if(s->ignoreadvice)
+				break;
 			qunlock(il);
 			ic = (Ilcb*)s->ptcl;
 			switch(ic->state){
@@ -1380,12 +1386,6 @@
 	}
 }
 
-int
-ilgc(Proto *il)
-{
-	return natgc(il->ipproto);
-}
-
 void
 ilinit(Fs *f)
 {
@@ -1406,7 +1406,7 @@
 	il->advise = iladvise;
 	il->stats = ilxstats;
 	il->inuse = ilinuse;
-	il->gc = ilgc;
+	il->gc = nil;
 	il->ipproto = IP_ILPROTO;
 	il->nc = scalednconv();
 	il->ptclsize = sizeof(Ilcb);
--- a/os/ip/ip.c
+++ b/os/ip/ip.c
@@ -7,94 +7,6 @@
 
 #include	"ip.h"
 
-typedef struct IP		IP;
-typedef struct Fragment4	Fragment4;
-typedef struct Fragment6	Fragment6;
-typedef struct Ipfrag		Ipfrag;
-
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= 64*1024,	/* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp)	(((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
-
-	Nstats,
-};
-
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
-
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
-
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 static char *statnames[] =
 {
 [Forwarding]	"Forwarding",
@@ -118,45 +30,11 @@
 [FragCreates]	"FragCreates",
 };
 
-#define BLKIP(xp)	((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
+static Block*		ip4reassemble(IP*, int, Block*);
+static void		ipfragfree4(IP*, Fragment4*);
+static Fragment4*	ipfragallo4(IP*);
 
-ushort		ipcsum(uchar*);
-Block*		ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void		ipfragfree4(IP*, Fragment4*);
-Fragment4*	ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
-	V6params *v6p;
-
-	v6p = smalloc(sizeof(V6params));
-	
-	v6p->rp.mflag		= 0;		// default not managed
-	v6p->rp.oflag		= 0;
-	v6p->rp.maxraint	= 600000;	// millisecs
-	v6p->rp.minraint	= 200000;
-	v6p->rp.linkmtu		= 0;		// no mtu sent
-	v6p->rp.reachtime	= 0;
-	v6p->rp.rxmitra		= 0;
-	v6p->rp.ttl		= MAXTTL;
-	v6p->rp.routerlt	= 3*(v6p->rp.maxraint);	
-
-	v6p->hp.rxmithost	= 1000;		// v6 RETRANS_TIMER
-
-	v6p->cdrouter 		= -1;
-
-	f->v6p			= v6p;
-
-}
-
-void
+static void
 initfrag(IP *ip, int size)
 {
 	Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
 	IP *ip;
 
 	ip = smalloc(sizeof(IP));
+	ip->stats[DefaultTTL] = MAXTTL;
 	initfrag(ip, 100);
 	f->ip = ip;
 
@@ -202,11 +81,11 @@
 	if(f->ip->iprouting==0)
 		f->ip->stats[Forwarding] = 2;
 	else
-		f->ip->stats[Forwarding] = 1;	
+		f->ip->stats[Forwarding] = 1;
 }
 
 int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
 	Ipifc *ifc;
 	uchar *gate;
@@ -213,66 +92,41 @@
 	ulong fragoff;
 	Block *xp, *nb;
 	Ip4hdr *eh, *feh;
-	int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
-	Route *r, *sr;
+	int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+	Route *r;
 	IP *ip;
 	int rv = 0;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip4hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip4hdr*)bp->rp;
+	assert(BLEN(bp) >= IP4HDR);
 	len = blocklen(bp);
-
-	if(gating){
-		chunk = nhgets(eh->length);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk < len)
-			len = chunk;
-	}
 	if(len >= IP_MAX){
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v4lookup(f, eh->dst, c);
-	if(r == nil){
+	r = v4lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v4lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v4.gate;
 
-	if(!gating)
-		eh->vihl = IP_VER4|IP_HLEN4;
-	eh->ttl = ttl;
-	if(!gating)
-		eh->tos = tos;
-
-	if(!canrlock(ifc))
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
+	}
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
@@ -280,17 +134,18 @@
 	if(ifc->m == nil)
 		goto raise;
 
-	/* Output NAT */
-	if(nato(bp, ifc, f) != 0)
-		goto raise;
+	if(!gating){
+		eh->vihl = IP_VER4|IP_HLEN4;
+		eh->tos = tos;
+	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		if(!gating)
-			hnputs(eh->id, incref(&ip->id4));
 		hnputs(eh->length, len);
 		if(!gating){
+			hnputs(eh->id, incref(&ip->id4));
 			eh->frag[0] = 0;
 			eh->frag[1] = 0;
 		}
@@ -297,31 +152,31 @@
 		eh->cksum[0] = 0;
 		eh->cksum[1] = 0;
 		hnputs(eh->cksum, ipcsum(&eh->vihl));
-		ifc->m->bwrite(ifc, bp, V4, gate);
+
+		ipifcoput(ifc, bp, V4, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
 	if(eh->frag[0] & (IP_DF>>8)){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
 		icmpcantfrag(f, bp, medialen);
-		netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	seglen = (medialen - IP4HDR) & ~7;
+	hlen = (eh->vihl & 0xF)<<2;
+	seglen = (medialen - hlen) & ~7;
 	if(seglen < 8){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	dlen = len - IP4HDR;
+	dlen = len - hlen;
 	xp = bp;
 	if(gating)
 		lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
 	else
 		lid = incref(&ip->id4);
 
-	offset = IP4HDR;
-	while(xp != nil && offset && offset >= BLEN(xp)) {
+	offset = hlen;
+	while(offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
@@ -341,30 +196,30 @@
 		fragoff = 0;
 	dlen += fragoff;
 	for(; fragoff < dlen; fragoff += seglen) {
-		nb = allocb(IP4HDR+seglen);
-		feh = (Ip4hdr*)(nb->rp);
+		nb = allocb(hlen+seglen);
+		feh = (Ip4hdr*)nb->rp;
 
-		memmove(nb->wp, eh, IP4HDR);
-		nb->wp += IP4HDR;
+		memmove(nb->wp, eh, hlen);
+		nb->wp += hlen;
 
 		if((fragoff + seglen) >= dlen) {
 			seglen = dlen - fragoff;
 			hnputs(feh->frag, fragoff>>3);
 		}
-		else	
+		else
 			hnputs(feh->frag, (fragoff>>3)|IP_MF);
 
-		hnputs(feh->length, seglen + IP4HDR);
+		hnputs(feh->length, seglen + hlen);
 		hnputs(feh->id, lid);
 
 		/* Copy up the data area */
 		chunk = seglen;
 		while(chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk %d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -376,12 +231,13 @@
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
 				xp = xp->next;
-		} 
+		}
 
 		feh->cksum[0] = 0;
 		feh->cksum[1] = 0;
 		hnputs(feh->cksum, ipcsum(&feh->vihl));
-		ifc->m->bwrite(ifc, nb, V4, gate);
+
+		ipifcoput(ifc, nb, V4, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
 void
 ipiput4(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos, proto, olen;
+	int hl, len, hop, tos;
+	uchar v6dst[IPaddrlen];
+	ushort frag;
 	Ip4hdr *h;
 	Proto *p;
-	ushort frag;
-	int notforme;
-	uchar *dp, v6dst[IPaddrlen];
 	IP *ip;
-	Route *r;
 
-	if(BLKIPVER(bp) != IP_VER4) {
+	if((bp->rp[0]&0xF0) != IP_VER4) {
 		ipiput6(f, ifc, bp);
 		return;
 	}
@@ -430,58 +283,45 @@
 			return;
 	}
 
-	h = (Ip4hdr*)(bp->rp);
-
-	/* Input NAT */
-	nati(bp, ifc);
-
-	/* dump anything that whose header doesn't checksum */
+	h = (Ip4hdr*)bp->rp;
+	hl = (h->vihl & 0xF)<<2;
+	if(hl < IP4HDR || hl > BLEN(bp)) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+		goto drop;
+	}
 	if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
 		ip->stats[InHdrErrors]++;
-		netlog(f, Logip, "ip: checksum error %V\n", h->src);
-		freeblist(bp);
+		netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+		goto drop;
+	}
+	len = nhgets(h->length);
+	if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+		if(bp != nil)
+			goto drop;
 		return;
 	}
-	v4tov6(v6dst, h->dst);
-	notforme = ipforme(f, v6dst) == 0;
+	h = (Ip4hdr*)bp->rp;
 
-	/* Check header length and version */
-	if((h->vihl&0x0F) != IP_HLEN4) {
-		hl = (h->vihl&0xF)<<2;
-		if(hl < (IP_HLEN4<<2)) {
-			ip->stats[InHdrErrors]++;
-			netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
-			freeblist(bp);
-			return;
-		}
-	  /* If this is not routed strip off the options */
-		if(notforme == 0) {
-			olen = nhgets(h->length);
-			dp = bp->rp + (hl - (IP_HLEN4<<2));
-			memmove(dp, h, IP_HLEN4<<2);
-			bp->rp = dp;
-			h = (Ip4hdr*)(bp->rp);
-			h->vihl = (IP_VER4|IP_HLEN4);
-			hnputs(h->length, olen-hl+(IP_HLEN4<<2));
-		}
-	}
-
 	/* route */
-	if(notforme) {
-		Conv conv;
+	v4tov6(v6dst, h->dst);
+	if(!ipforme(f, v6dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
 
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
-		}
+		if(!ip->iprouting)
+			goto drop;
 
 		/* don't forward to source's network */
-		conv.r = nil;
-		r = v4lookup(f, h->dst, &conv);
-		if(r == nil || r->ifc == ifc){
+		rh.r = nil;
+		r = v4lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
 		hop = h->ttl;
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
-			icmpttlexceeded(f, ifc->lifc->local, bp);
-			freeblist(bp);
-			return;
+			icmpttlexceeded(f, ifc, bp);
+			goto drop;
 		}
 
 		/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
-		if(r->ifc->reassemble){
+		if(nifc->reassemble){
 			frag = nhgets(h->frag);
-			if(frag) {
-				h->tos = 0;
-				if(frag & IP_MF)
-					h->tos = 1;
-				bp = ip4reassemble(ip, frag, bp, h);
+			if(frag & (IP_MF|IP_FO)) {
+				bp = ip4reassemble(ip, frag, bp);
 				if(bp == nil)
 					return;
-				h = (Ip4hdr*)(bp->rp);
+				h = (Ip4hdr*)bp->rp;
 			}
 		}
 
@@ -511,27 +346,30 @@
 		ip->stats[ForwDatagrams]++;
 		tos = h->tos;
 		hop = h->ttl;
-		ipoput4(f, bp, 1, hop - 1, tos, &conv);
+		ipoput4(f, bp, 1, hop - 1, tos, &rh);
 		return;
 	}
 
+	/* If this is not routed strip off the options */
+	if(hl > IP4HDR) {
+		hl -= IP4HDR;
+		len -= hl;
+		bp->rp += hl;
+		memmove(bp->rp, h, IP4HDR);
+		h = (Ip4hdr*)bp->rp;
+		h->vihl = IP_VER4|IP_HLEN4;
+		hnputs(h->length, len);
+	}
+
 	frag = nhgets(h->frag);
-	if(frag) {
-		h->tos = 0;
-		if(frag & IP_MF)
-			h->tos = 1;
-		bp = ip4reassemble(ip, frag, bp, h);
+	if(frag & (IP_MF|IP_FO)) {
+		bp = ip4reassemble(ip, frag, bp);
 		if(bp == nil)
 			return;
-		h = (Ip4hdr*)(bp->rp);
+		h = (Ip4hdr*)bp->rp;
 	}
 
-	/* don't let any frag info go up the stack */
-	h->frag[0] = 0;
-	h->frag[1] = 0;
-
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
 	}
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -550,45 +389,43 @@
 	int i;
 
 	ip = f->ip;
-	ip->stats[DefaultTTL] = MAXTTL;
-
 	p = buf;
 	e = p+len;
-	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+	for(i = 0; i < Nipstats; i++)
+		p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
 	return p - buf;
 }
 
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
 {
-	int fend;
+	int ovlap, fragsize, len;
+	ulong src, dst;
 	ushort id;
+	Block *bl, **l, *prev;
 	Fragment4 *f, *fnext;
-	ulong src, dst;
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Ipfrag *fp, *fq;
+	Ip4hdr *ih;
 
+	/*
+	 *  block lists are too hard, concatblock into a single block
+	 */
+	bp = concatblock(bp);
+
+	ih = (Ip4hdr*)bp->rp;
 	src = nhgetl(ih->src);
 	dst = nhgetl(ih->dst);
 	id = nhgets(ih->id);
+	fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
 
-	/*
-	 *  block lists are too hard, pullupblock into a single block
-	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip4hdr*)(bp->rp);
-	}
-
 	qlock(&ip->fraglock4);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead4; f; f = fnext){
+	for(f = ip->flisthead4; f != nil; f = fnext){
 		fnext = f->next;	/* because ipfragfree4 changes the list */
-		if(f->src == src && f->dst == dst && f->id == id)
+		if(f->id == id && f->src == src && f->dst == dst)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+	if((offset & (IP_MF|IP_FO)) == 0) {
 		if(f != nil) {
-			ipfragfree4(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree4(ip, f);
 		}
 		qunlock(&ip->fraglock4);
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset<<3;
-	BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = (offset & IP_FO)<<3;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -627,8 +465,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock4);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock4);
+
 		return nil;
 	}
 
@@ -638,7 +477,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -645,15 +484,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock4);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -662,26 +502,26 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 		/* Take completely covered segments out */
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
 			if(ovlap <= 0)
 				break;
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
-				(*l)->rp += ovlap;
+			if(ovlap < fq->flen) {
+				/* move up ip header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
 				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -689,35 +529,50 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  without IP_MF set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-		if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
-			bl = f->blist;
-			len = nhgets(BLKIP(bl)->length);
-			bl->wp = bl->rp + len;
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += IP4HDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		ih = (Ip4hdr*)bl->rp;
+		if(ih->frag[0]&(IP_MF>>8))
+			continue;
 
-			bl = f->blist;
-			f->blist = nil;
+		bl = f->blist;
+		len = BLEN(bl);
+
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
+
+		if(len >= IP_MAX){
 			ipfragfree4(ip, f);
-			ih = BLKIP(bl);
-			hnputs(ih->length, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock4);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree4(ip, f);
+
+		ih = (Ip4hdr*)bl->rp;
+		ih->frag[0] = 0;
+		ih->frag[1] = 0;
+		hnputs(ih->length, len);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock4);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock4);
 	return nil;
@@ -726,20 +581,20 @@
 /*
  * ipfragfree4 - Free a list of fragments - assume hold fraglock4
  */
-void
+static void
 ipfragfree4(IP *ip, Fragment4 *frag)
 {
 	Fragment4 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	frag->src = 0;
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	frag->src = 0;
+	frag->dst = 0;
 
 	l = &ip->flisthead4;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -755,7 +610,7 @@
 /*
  * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
  */
-Fragment4 *
+static Fragment4*
 ipfragallo4(IP *ip)
 {
 	Fragment4 *f;
@@ -762,7 +617,7 @@
 
 	while(ip->fragfree4 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead4; f->next; f = f->next)
+		for(f = ip->flisthead4; f->next != nil; f = f->next)
 			;
 		ipfragfree4(ip, f);
 	}
--- a/os/ip/ip.h
+++ b/os/ip/ip.h
@@ -1,35 +1,33 @@
 typedef struct	Conv	Conv;
+typedef struct	Fragment4 Fragment4;
+typedef struct	Fragment6 Fragment6;
 typedef struct	Fs	Fs;
 typedef union	Hwaddr	Hwaddr;
 typedef struct	IP	IP;
 typedef struct	IPaux	IPaux;
+typedef struct	Ip4hdr	Ip4hdr;
+typedef struct	Ipfrag	Ipfrag;
 typedef struct	Ipself	Ipself;
 typedef struct	Ipselftab	Ipselftab;
 typedef struct	Iplink	Iplink;
 typedef struct	Iplifc	Iplifc;
 typedef struct	Ipmulti	Ipmulti;
-typedef struct	IProuter IProuter;
 typedef struct	Ipifc	Ipifc;
 typedef struct	Iphash	Iphash;
 typedef struct	Ipht	Ipht;
 typedef struct	Netlog	Netlog;
-typedef struct	Ifclog	Ifclog;
 typedef struct	Medium	Medium;
 typedef struct	Proto	Proto;
 typedef struct	Arpent	Arpent;
 typedef struct	Arp Arp;
 typedef struct	Route	Route;
+typedef struct	Routehint Routehint;
 
 typedef struct	Routerparams	Routerparams;
 typedef struct 	Hostparams	Hostparams;
-typedef struct 	V6router	V6router;
-typedef struct	V6params	V6params;
+typedef struct	v6params	v6params;
 
-typedef struct Ip4hdr     Ip4hdr;
-typedef struct Nat	Nat;
-
 #pragma incomplete Arp
-#pragma	incomplete Ifclog
 #pragma incomplete Ipself
 #pragma incomplete Ipselftab
 #pragma incomplete IP
@@ -39,10 +37,9 @@
 {
 	Addrlen=	64,
 	Maxproto=	20,
-	Nhash=		64,
-	Maxincall=	5,
-	Nchans=		16383,
-	MAClen=		16,		/* longest mac address */
+	Maxincall=	10,
+	Nchans=		1024,
+	MAClen=		8,		/* longest mac address */
 
 	MAXTTL=		255,
 	DFLTTOS=	0,
@@ -57,6 +54,12 @@
 	V6=		6,
 	IP_VER4= 	0x40,
 	IP_VER6=	0x60,
+	IP_HLEN4=	5,		/* v4: Header length in words */
+	IP_DF=		0x4000,		/* v4: Don't fragment */
+	IP_MF=		0x2000,		/* v4: More fragments */
+	IP_FO=		0x1fff,		/* v4: Fragment offset */
+	IP4HDR=		IP_HLEN4<<2,	/* sizeof(Ip4hdr) */
+	IP_MAX=		64*1024,	/* Max. Internet packet size, v4 & v6 */
 
 	/* 2^Lroot trees in the root table */
 	Lroot=		10,
@@ -73,6 +76,79 @@
 	Connected=	4,
 };
 
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nipstats,
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+	uchar	payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+	uvlong		stats[Nipstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
 /* on the wire packet header */
 struct Ip4hdr
 {
@@ -86,9 +162,14 @@
 	uchar	cksum[2];	/* Header checksum */
 	uchar	src[4];		/* IP source */
 	uchar	dst[4];		/* IP destination */
-	uchar	data[1];	/* start of data */
 };
 
+struct Routehint
+{
+	Route	*r;			/* last route used */
+	ulong	rgen;			/* routetable generation for *r */
+};
+
 /*
  *  one per conversation directory
  */
@@ -100,9 +181,9 @@
 	Proto*	p;
 
 	int	restricted;		/* remote port is restricted */
+	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 	uint	ttl;			/* max time to live */
 	uint	tos;			/* type of service */
-	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 
 	uchar	ipversion;
 	uchar	laddr[IPaddrlen];	/* local IP address */
@@ -139,8 +220,7 @@
 
 	void*	ptcl;			/* protocol specific stuff */
 
-	Route	*r;			/* last route used */
-	ulong	rgen;			/* routetable generation for *r */
+	Routehint;
 };
 
 struct Medium
@@ -161,18 +241,8 @@
 	/* process packets written to 'data' */
 	void	(*pktin)(Fs *f, Ipifc *ifc, Block *bp);
 
-	/* routes for router boards */
-	void	(*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
-	void	(*remroute)(Ipifc *ifc, int, uchar*, uchar*);
-	void	(*flushroutes)(Ipifc *ifc);
-
-	/* for routing multicast groups */
-	void	(*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
-	void	(*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
 	/* address resolution */
-	void	(*ares)(Fs*, int, uchar*, uchar*, int, int);	/* resolve */
-	void	(*areg)(Ipifc*, uchar*);			/* register */
+	void	(*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 
 	/* v6 address generation */
 	void	(*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
 	uchar	mask[IPaddrlen];
 	uchar	remote[IPaddrlen];
 	uchar	net[IPaddrlen];
+	uchar	type;		/* route type */
 	uchar	tentative;	/* =1 => v6 dup disc on, =0 => confirmed unique */
 	uchar	onlink;		/* =1 => onlink, =0 offlink. */
 	uchar	autoflag;	/* v6 autonomous flag */
-	long 	validlt;	/* v6 valid lifetime */
-	long 	preflt;		/* v6 preferred lifetime */
-	long	origint;	/* time when addr was added */
+	ulong 	validlt;	/* v6 valid lifetime */
+	ulong 	preflt;		/* v6 preferred lifetime */
+	ulong	origint;	/* time when addr was added */
 	Iplink	*link;		/* addresses linked to this lifc */
 	Iplifc	*next;
 };
@@ -203,25 +274,25 @@
 	Ipself	*self;
 	Iplifc	*lifc;
 	Iplink	*selflink;	/* next link for this local address */
-	Iplink	*lifclink;	/* next link for this ifc */
-	ulong	expire;
+	Iplink	*lifclink;	/* next link for this lifc */
 	Iplink	*next;		/* free list */
+	ulong	expire;
 	int	ref;
 };
 
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
 
 /* default values, one per stack */
 struct Routerparams {
-	int	mflag;
-	int	oflag;
-	int 	maxraint;
-	int	minraint;
-	int	linkmtu;
-	int	reachtime;
-	int	rxmitra;
-	int	ttl;
-	int	routerlt;	
+	int	mflag;		/* flag: managed address configuration */
+	int	oflag;		/* flag: other stateful configuration */
+	int 	maxraint;	/* max. router adv interval (ms) */
+	int	minraint;	/* min. router adv interval (ms) */
+	int	linkmtu;	/* mtu options */
+	int	reachtime;	/* reachable time */
+	int	rxmitra;	/* retransmit interval */
+	int	ttl;		/* cur hop count limit */
+	int	routerlt;	/* router lifetime */
 };
 
 struct Hostparams {
@@ -231,22 +302,18 @@
 struct Ipifc
 {
 	RWlock;
-	
+
 	Conv	*conv;		/* link to its conversation structure */
 	char	dev[64];	/* device we're attached to */
 	Medium	*m;		/* Media pointer */
 	int	maxtu;		/* Maximum transfer unit */
 	int	mintu;		/* Minumum tranfer unit */
-	int	mbps;		/* megabits per second */
 	void	*arg;		/* medium specific */
-	int	reassemble;	/* reassemble IP packets before forwarding */
 
-	/* these are used so that we can unbind on the fly */
-	Lock	idlock;
+	uchar	reflect;	/* allow forwarded packets to go out the same interface */
+	uchar	reassemble;	/* reassemble IP packets before forwarding to this interface */
+	
 	uchar	ifcid;		/* incremented each 'bind/unbind/add/remove' */
-	int	ref;		/* number of proc's using this ipifc */
-	Rendez	wait;		/* where unbinder waits for ref == 0 */
-	int	unbinding;
 
 	uchar	mac[MAClen];	/* MAC address */
 
@@ -255,10 +322,16 @@
 	ulong	in, out;	/* message statistics */
 	ulong	inerr, outerr;	/* ... */
 
-	uchar	sendra6;	/* == 1 => send router advs on this ifc	*/
-	uchar	recvra6;	/* == 1 => recv router advs on this ifc */
-	Routerparams rp;	/* router parameters as in RFC 2461, pp.40--43. 
+	uchar	sendra6;	/* flag: send router advs on this ifc */
+	uchar	recvra6;	/* flag: recv router advs on this ifc */
+	Routerparams rp;	/* router parameters as in RFC 2461, pp.40—43.
 					used only if node is router */
+
+	int	speed;		/* link speed in bits per second */
+	int	delay;		/* burst delay in ms */
+	int	burst;		/* burst delay in bytes */
+	int	load;		/* bytes in flight */
+	ulong	ticks;
 };
 
 /*
@@ -330,20 +403,11 @@
 	int		nc;		/* number of conversations */
 	int		ac;
 	Qid		qid;		/* qid for protocol directory */
-	ushort		nextport;
 	ushort		nextrport;
 
 	void		*priv;
 };
 
-/*
- *  Stream for sending packets to user level
- */
-struct IProuter {
-	QLock;
-	int	opens;
-	Queue	*q;
-};
 
 /*
  *  one per IP protocol stack
@@ -362,8 +426,7 @@
 	IP	*ip;
 	Ipselftab	*self;
 	Arp	*arp;
-	V6params	*v6p;
-	IProuter iprouter;
+	v6params	*v6p;
 
 	Route	*v4root[1<<Lroot];	/* v4 routing forest */
 	Route	*v6root[1<<Lroot];	/* v6 routing forest */
@@ -370,7 +433,6 @@
 	Route	*queue;			/* used as temp when reinjecting routes */
 
 	Netlog	*alog;
-	Ifclog	*ilog;
 
 	char	ndb[1024];		/* an ndb entry for this interface */
 	int	ndbvers;
@@ -377,23 +439,10 @@
 	long	ndbmtime;
 };
 
-/* one per default router known to host */
-struct V6router {
-	uchar	inuse;
-	Ipifc	*ifc;
-	int	ifcid;
-	uchar	routeraddr[IPaddrlen];
-	long	ltorigin;
-	Routerparams	rp;
-};
-
-struct V6params
+struct v6params
 {
 	Routerparams	rp;		/* v6 params, one copy per node now */
 	Hostparams	hp;
-	V6router	v6rlist[3];	/* max 3 default routers, currently */
-	int		cdrouter;	/* uses only v6rlist[cdrouter] if   */ 
-					/* cdrouter >= 0. */
 };
 
 
@@ -410,8 +459,7 @@
 char*	Fsstdbind(Conv*, char**, int);
 ulong	scalednconv(void);
 void	closeconv(Conv*);
-
-/* 
+/*
  *  logging
  */
 enum
@@ -434,7 +482,6 @@
 	Logrudpmsg=	1<<16,
 	Logesp=		1<<17,
 	Logtcpwin=	1<<18,
-	Lognat=		1<<19,
 };
 
 void	netloginit(Fs*);
@@ -449,17 +496,17 @@
 void	ifclogopen(Fs*, Chan*);
 void	ifclogclose(Fs*, Chan*);
 
+#pragma varargck argpos netlog	3
+
 /*
  *  iproute.c
  */
 typedef	struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
 typedef struct V4route V4route;
 typedef struct V6route V6route;
 
 enum
 {
-
 	/* type bits */
 	Rv4=		(1<<0),		/* this is a version 4 route */
 	Rifc=		(1<<1),		/* this route is a directly connected interface */
@@ -468,27 +515,18 @@
 	Rbcast=		(1<<4),		/* a broadcast self address */
 	Rmulti=		(1<<5),		/* a multicast self address */
 	Rproxy=		(1<<6),		/* this route should be proxied */
+	Rsrc=		(1<<7),		/* source specific route */
 };
 
-struct Routewalk
-{
-	int	o;
-	int	h;
-	char*	p;
-	char*	e;
-	void*	state;
-	void	(*walk)(Route*, Routewalk*);
-};
-
 struct	RouteTree
 {
-	Route*	right;
-	Route*	left;
-	Route*	mid;
+	Route	*mid;
+	Route	*left;
+	Route	*right;
+	Ipifc	*ifc;
+	uchar	ifcid;		/* must match ifc->id */
 	uchar	depth;
 	uchar	type;
-	uchar	ifcid;		/* must match ifc->id */
-	Ipifc	*ifc;
 	char	tag[4];
 	int	ref;
 };
@@ -497,6 +535,10 @@
 {
 	ulong	address;
 	ulong	endaddress;
+
+	ulong	source;
+	ulong	endsource;
+
 	uchar	gate[IPv4addrlen];
 };
 
@@ -504,6 +546,10 @@
 {
 	ulong	address[IPllen];
 	ulong	endaddress[IPllen];
+
+	ulong	source[IPllen];
+	ulong	endsource[IPllen];
+
 	uchar	gate[IPaddrlen];
 };
 
@@ -516,17 +562,16 @@
 		V4route v4;
 	};
 };
-extern void	v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void	v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route*	v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route*	v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void	addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void	remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route*	v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v4source(Fs *f, uchar *a, uchar *s);
+extern Route*	v6source(Fs *f, uchar *a, uchar *s);
 extern long	routeread(Fs *f, char*, ulong, int);
 extern long	routewrite(Fs *f, Chan*, char*, int);
-extern void	routetype(int, char*);
-extern void	ipwalkroutes(Fs*, Routewalk*);
-extern void	convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void	routetype(int type, char p[8]);
 
 /*
  *  devip.c
@@ -543,7 +588,6 @@
 };
 
 extern IPaux*	newipaux(char*, char*);
-extern void	setlport(Conv*);
 
 /*
  *  arp.c
@@ -552,18 +596,16 @@
 {
 	uchar	ip[IPaddrlen];
 	uchar	mac[MAClen];
-	Medium	*type;			/* media type */
-	Arpent*	hash;
-	Block*	hold;
-	Block*	last;
-	uint	ctime;			/* time entry was created or refreshed */
-	uint	utime;			/* time entry was last used */
-	uchar	state;
+	Arpent	*hash;
 	Arpent	*nextrxt;		/* re-transmit chain */
-	uint	rtime;			/* time for next retransmission */
-	uchar	rxtsrem;
+	Block	*hold;
+	Block	*last;
 	Ipifc	*ifc;
 	uchar	ifcid;			/* must match ifc->id */
+	uchar	state;
+	uchar	rxtsrem;		/* re-tranmissions remaining */
+	ulong	ctime;			/* time entry was created or refreshed */
+	ulong	utime;			/* time entry was last used */
 };
 
 extern void	arpinit(Fs*);
@@ -572,15 +614,17 @@
 extern Arpent*	arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
 extern void	arprelease(Arp*, Arpent *a);
 extern Block*	arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void	arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int	arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void	ndpsendsol(Fs*, Ipifc*, Arpent*);
 
 /*
  * ipaux.c
  */
 
-extern int	myetheraddr(uchar*, char*);
-extern ulong	parseip(uchar*, char*);
-extern ulong	parseipmask(uchar*, char*);
+extern int	parseether(uchar*, char*);
+extern vlong	parseip(uchar*, char*);
+extern vlong	parseipmask(uchar*, char*, int);
+extern vlong	parseipandmask(uchar*, uchar*, char*, char*);
 extern char*	v4parseip(uchar*, char*);
 extern void	maskip(uchar *from, uchar *mask, uchar *to);
 extern int	parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
 extern void	v4tov6(uchar *v6, uchar *v4);
 extern int	v6tov4(uchar *v4, uchar *v6);
 extern int	eipfmt(Fmt*);
+extern int	convipvers(Conv *c);
 
 #define	ipmove(x, y) memmove(x, y, IPaddrlen)
 #define	ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
- 
-#define	ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define	ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
 
 extern uchar IPv4bcast[IPaddrlen];
 extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
 extern Medium	ethermedium;
 extern Medium	nullmedium;
 extern Medium	pktmedium;
-extern Medium	tripmedium;
 
 /*
  *  ipifc.c
@@ -619,33 +660,24 @@
  */
 extern Medium*	ipfindmedium(char *name);
 extern void	addipmedium(Medium *med);
+extern void	ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
 extern int	ipforme(Fs*, uchar *addr);
-extern int	iptentative(Fs*, uchar *addr);
-extern int	ipisbm(uchar *);
-extern int	ipismulticast(uchar *);
-extern Ipifc*	findipifc(Fs*, uchar *remote, int type);
-extern void	findprimaryip(Fs*, uchar*);
+extern int	ipismulticast(uchar *ip);
+extern Ipifc*	findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc*	findipifcstr(Fs *f, char *s);
 extern void	findlocalip(Fs*, uchar *local, uchar *remote);
-extern int	ipv4local(Ipifc *ifc, uchar *addr);
-extern int	ipv6local(Ipifc *ifc, uchar *addr);
-extern int	ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int	ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int	ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
 extern Iplifc*	iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc*	ipremoteonifc(Ipifc *ifc, uchar *ip);
 extern int	ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int	ipismulticast(uchar *ip);
-extern int	ipisbooting(void);
-extern int	ipifccheckin(Ipifc *ifc, Medium *med);
-extern void	ipifccheckout(Ipifc *ifc);
-extern int	ipifcgrab(Ipifc *ifc);
-extern void	ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void	ipifcremroute(Fs*, int, uchar*, uchar*);
 extern void	ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
 extern void	ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
 extern char*	ipifcrem(Ipifc *ifc, char **argv, int argc);
 extern char*	ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
 extern long	ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char*	ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void	ipsendra6(Fs *f, int on);
-
+extern char*	ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char*	ipifcremove6(Ipifc *ifc, char**argv, int argc);
 /*
  *  ip.c
  */
@@ -652,37 +684,26 @@
 extern void	iprouting(Fs*, int);
 extern void	icmpnoconv(Fs*, Block*);
 extern void	icmpcantfrag(Fs*, Block*, int);
-extern void	icmpttlexceeded(Fs*, uchar*, Block*);
+extern void	icmpttlexceeded(Fs*, Ipifc*, Block*);
 extern ushort	ipcsum(uchar*);
 extern void	ipiput4(Fs*, Ipifc*, Block*);
 extern void	ipiput6(Fs*, Ipifc*, Block*);
-extern int	ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int	ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int	ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int	ipoput6(Fs*, Block*, int, int, int, Routehint*);
 extern int	ipstats(Fs*, char*, int);
 extern ushort	ptclbsum(uchar*, int);
 extern ushort	ptclcsum(Block*, int, int);
 extern void	ip_init(Fs*);
-extern void	update_mtucache(uchar*, ulong);
-extern ulong	restrict_mtu(uchar*, ulong);
+extern void	ip_init_6(Fs*);
 
 /*
  * bootp.c
  */
-char*	(*bootp)(Ipifc*);
-int	(*bootpread)(char*, ulong, int);
+extern int	bootpread(char*, ulong, int);
 
 /*
- *  iprouter.c
- */
-void	useriprouter(Fs*, Ipifc*, Block*);
-void	iprouteropen(Fs*);
-void	iprouterclose(Fs*);
-long	iprouterread(Fs*, void*, int);
-
-/*
  *  resolving inferno/plan9 differences
  */
-Chan*		commonfdtochan(int, int, int, int);
 char*		commonuser(void);
 char*		commonerror(void);
 
@@ -695,15 +716,3 @@
  *  global to all of the stack
  */
 extern void	(*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int	nato(Block*, Ipifc*, Fs*);
-extern void	nati(Block*, Ipifc*);
-extern int	natgc(uchar);
-
-extern int	addnataddr(uchar*, uchar*, Iplifc*);
-extern int	removenataddr(uchar*, uchar*, Iplifc*);
-extern void	shownataddr(void);
-extern void flushnataddr(void);
--- a/os/ip/ipaux.c
+++ b/os/ip/ipaux.c
@@ -5,49 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 #include	"ip.h"
-#include  "ipv6.h"
+#include	"ipv6.h"
 
-/*
- *  well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- *  prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0, 0, 0, 0
-};
-
-
 char *v6hdrtypes[Maxhdrtype] =
 {
 	[HBH]		"HopbyHop",
@@ -54,7 +13,7 @@
 	[ICMP]		"ICMP",
 	[IGMP]		"IGMP",
 	[GGP]		"GGP",
-	[IPINIP]		"IP",
+	[IPINIP]	"IP",
 	[ST]		"ST",
 	[TCP]		"TCP",
 	[UDP]		"UDP",
@@ -87,6 +46,7 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+
 uchar v6linklocal[IPaddrlen] = {
 	0xfe, 0x80, 0, 0,
 	0, 0, 0, 0,
@@ -99,26 +59,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6llpreflen = 8;	// link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
-	0xfe, 0xc0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6slpreflen = 6;	// site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
-	0x08, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
+int v6llpreflen = 8;	/* link-local prefix length in bytes */
+
 uchar v6multicast[IPaddrlen] = {
 	0xff, 0, 0, 0,
 	0, 0, 0, 0,
@@ -131,7 +73,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6mcpreflen = 1;	// multicast prefix length
+int v6mcpreflen = 1;	/* multicast prefix length */
+
 uchar v6allnodesN[IPaddrlen] = {
 	0xff, 0x01, 0, 0,
 	0, 0, 0, 0,
@@ -138,6 +81,12 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+uchar v6allroutersN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
 uchar v6allnodesNmask[IPaddrlen] = {
 	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
@@ -144,7 +93,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6aNpreflen = 2;	// all nodes (N) prefix
+int v6aNpreflen = 2;	/* all nodes (N) prefix */
+
 uchar v6allnodesL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -151,19 +101,6 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
-uchar v6allnodesLmask[IPaddrlen] = {
-	0xff, 0xff, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6aLpreflen = 2;	// all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
-	0xff, 0x01, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0x02
-};
 uchar v6allroutersL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -170,12 +107,14 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x02
 };
-uchar v6allroutersS[IPaddrlen] = {
-	0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
 	0, 0, 0, 0,
-	0, 0, 0, 0x02
+	0, 0, 0, 0
 };
+int v6aLpreflen = 2;	/* all nodes (L) prefix */
+
 uchar v6solicitednode[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -190,9 +129,6 @@
 };
 int v6snpreflen = 13;
 
-
-
-
 ushort
 ptclcsum(Block *bp, int offset, int len)
 {
@@ -215,7 +151,7 @@
 	if(bp->next == nil) {
 		if(blocklen < len)
 			len = blocklen;
-		return ~ptclbsum(addr, len) & 0xffff;
+		return ptclbsum(addr, len) ^ 0xffff;
 	}
 
 	losum = 0;
@@ -247,7 +183,7 @@
 	while((csum = losum>>16) != 0)
 		losum = csum + (losum & 0xffff);
 
-	return ~losum & 0xffff;
+	return losum ^ 0xffff;
 }
 
 enum
@@ -255,306 +191,9 @@
 	Isprefix= 16,
 };
 
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
-	char buf[5*8];
-	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
-	static char *ifmt = "%d.%d.%d.%d";
-	uchar *p, ip[16];
-	ulong *lp;
-	ushort s;
-	int i, j, n, eln, eli;
-
-	switch(f->r) {
-	case 'E':		/* Ethernet address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
-	case 'I':		/* Ip address */
-		p = va_arg(f->args, uchar*);
-common:
-		if(memcmp(p, v4prefix, 12) == 0)
-			return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
-		/* find longest elision */
-		eln = eli = -1;
-		for(i = 0; i < 16; i += 2){
-			for(j = i; j < 16; j += 2)
-				if(p[j] != 0 || p[j+1] != 0)
-					break;
-			if(j > i && j - i > eln){
-				eli = i;
-				eln = j - i;
-			}
-		}
-
-		/* print with possible elision */
-		n = 0;
-		for(i = 0; i < 16; i += 2){
-			if(i == eli){
-				n += sprint(buf+n, "::");
-				i += eln;
-				if(i >= 16)
-					break;
-			} else if(i != 0)
-				n += sprint(buf+n, ":");
-			s = (p[i]<<8) + p[i+1];
-			n += sprint(buf+n, "%ux", s);
-		}
-		return fmtstrcpy(f, buf);
-
-	case 'i':		/* v6 address as 4 longs */
-		lp = va_arg(f->args, ulong*);
-		for(i = 0; i < 4; i++)
-			hnputl(ip+4*i, *lp++);
-		p = ip;
-		goto common;
-
-	case 'V':		/* v4 ip address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
-	case 'M':		/* ip mask */
-		p = va_arg(f->args, uchar*);
-
-		/* look for a prefix mask */
-		for(i = 0; i < 16; i++)
-			if(p[i] != 0xff)
-				break;
-		if(i < 16){
-			if((prefixvals[p[i]] & Isprefix) == 0)
-				goto common;
-			for(j = i+1; j < 16; j++)
-				if(p[j] != 0)
-					goto common;
-			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
-		} else
-			n = 8*16;
-
-		/* got one, use /xx format */
-		return fmtprint(f, "/%d", n);
-	}
-	return fmtstrcpy(f, "(eipfmt)");
-}
-
 #define CLASS(p) ((*(uchar*)(p))>>6)
 
-extern char*
-v4parseip(uchar *to, char *from)
-{
-	int i;
-	char *p;
-
-	p = from;
-	for(i = 0; i < 4 && *p; i++){
-		to[i] = strtoul(p, &p, 0);
-		if(*p == '.')
-			p++;
-	}
-	switch(CLASS(to)){
-	case 0:	/* class A - 1 uchar net */
-	case 1:
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = to[1];
-			to[1] = 0;
-		} else if(i == 2){
-			to[3] = to[1];
-			to[1] = 0;
-		}
-		break;
-	case 2:	/* class B - 2 uchar net */
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = 0;
-		}
-		break;
-	}
-	return p;
-}
-
-int
-isv4(uchar *ip)
-{
-	return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- *  the following routines are unrolled with no memset's to speed
- *  up the usual case
- */
 void
-v4tov6(uchar *v6, uchar *v4)
-{
-	v6[0] = 0;
-	v6[1] = 0;
-	v6[2] = 0;
-	v6[3] = 0;
-	v6[4] = 0;
-	v6[5] = 0;
-	v6[6] = 0;
-	v6[7] = 0;
-	v6[8] = 0;
-	v6[9] = 0;
-	v6[10] = 0xff;
-	v6[11] = 0xff;
-	v6[12] = v4[0];
-	v6[13] = v4[1];
-	v6[14] = v4[2];
-	v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
-	if(v6[0] == 0
-	&& v6[1] == 0
-	&& v6[2] == 0
-	&& v6[3] == 0
-	&& v6[4] == 0
-	&& v6[5] == 0
-	&& v6[6] == 0
-	&& v6[7] == 0
-	&& v6[8] == 0
-	&& v6[9] == 0
-	&& v6[10] == 0xff
-	&& v6[11] == 0xff)
-	{
-		v4[0] = v6[12];
-		v4[1] = v6[13];
-		v4[2] = v6[14];
-		v4[3] = v6[15];
-		return 0;
-	} else {
-		memset(v4, 0, 4);
-		return -1;
-	}
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
-	int i, elipsis = 0, v4 = 1;
-	ulong x;
-	char *p, *op;
-
-	memset(to, 0, IPaddrlen);
-	p = from;
-	for(i = 0; i < 16 && *p; i+=2){
-		op = p;
-		x = strtoul(p, &p, 16);
-		if(*p == '.' || (*p == 0 && i == 0)){
-			p = v4parseip(to+i, op);
-			i += 4;
-			break;
-		} else {
-			to[i] = x>>8;
-			to[i+1] = x;
-		}
-		if(*p == ':'){
-			v4 = 0;
-			if(*++p == ':'){
-				elipsis = i+2;
-				p++;
-			}
-		}
-	}
-	if(i < 16){
-		memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
-		memset(&to[elipsis], 0, 16-i);
-	}
-	if(v4){
-		to[10] = to[11] = 0xff;
-		return nhgetl(to+12);
-	} else
-		return 6;
-}
-
-/*
- *  hack to allow ip v4 masks to be entered in the old
- *  style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
-	ulong x;
-	int i;
-	uchar *p;
-
-	if(*from == '/'){
-		/* as a number of prefix bits */
-		i = atoi(from+1);
-		if(i < 0)
-			i = 0;
-		if(i > 128)
-			i = 128;
-		memset(to, 0, IPaddrlen);
-		for(p = to; i >= 8; i -= 8)
-			*p++ = 0xff;
-		if(i > 0)
-			*p = ~((1<<(8-i))-1);
-		x = nhgetl(to+IPv4off);
-	} else {
-		/* as a straight bit mask */
-		x = parseip(to, from);
-		if(memcmp(to, v4prefix, IPv4off) == 0)
-			memset(to, 0xff, IPv4off);
-	}
-	return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
-	int i;
-
-	for(i = 0; i < IPaddrlen; i++)
-		to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
-	if(isv4(ip))
-		return classmask[ip[IPv4off]>>6];
-	else {
-		if(ipcmp(ip, v6loopback) == 0)
-			return IPallbits;
-		else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
-			return v6linklocalmask;
-		else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
-			return v6sitelocalmask;
-		else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
-			return v6solicitednodemask;
-		else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
-			return v6multicastmask;
-		return IPallbits;
-	}
-}
-
-void
 ipv62smcast(uchar *smcast, uchar *a)
 {
 	assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
 ulong
 iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
 {
-	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
 }
 
 void
@@ -678,7 +317,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address and port */
 	hv = iphash(IPnoaddr, 0, da, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match just port */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address */
 	hv = iphash(IPnoaddr, 0, da, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
 			return c;
 		}
 	}
-	
+
 	/* look for something that matches anything */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
 	}
 	unlock(ht);
 	return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+	if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+		return V4;
+	else
+		return V6;
 }
--- a/os/ip/ipifc.c
+++ b/os/ip/ipifc.c
@@ -11,17 +11,14 @@
 #define DPRINT if(0)print
 
 enum {
-	Maxmedia = 32,
-	Nself = Maxmedia*5,
-	NHASH = (1<<6),
-	NCACHE = 256,
-	QMAX = 64*1024-1,
+	Maxmedia	= 32,
+	Nself		= Maxmedia*5,
+	NHASH		= 1<<6,
+	NCACHE		= 256,
+	QMAX		= 192*1024-1,
 };
 
-Medium *media[Maxmedia] =
-{
-	0
-};
+Medium *media[Maxmedia] = { 0 };
 
 /*
  *  cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
 struct Ipself
 {
 	uchar	a[IPaddrlen];
-	Ipself	*hnext;		/* next address in the hash table */
+	Ipself	*next;		/* next address in the hash table */
 	Iplink	*link;		/* binding twixt Ipself and Ipifc */
 	ulong	expire;
 	uchar	type;		/* type of address */
-	int	ref;
-	Ipself	*next;		/* free list */
 };
 
 struct Ipselftab
@@ -64,11 +59,47 @@
 
 static void	addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
 static void	remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char*	ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char*	ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char*	ipifcremlifc(Ipifc*, Iplifc*);
+static void	ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char*	ipifcremlifc(Ipifc*, Iplifc**);
 
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+	unknownv6,		/* UGH */
+	unspecifiedv6,
+	linklocalv6,
+	globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+	if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+		return unknownv6;
+	else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+	    isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+		return linklocalv6;
+	else
+		return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+	int i, c;
+
+	for(i = 0; i < n; i++){
+		if((c = a[i] ^ b[i]) == 0)
+			continue;
+		for(i <<= 3; (c & 0x80) == 0; i++)
+			c <<= 1;
+		return i;
+	}
+	return i << 3;
+}
+
 /*
  *  link in a new medium
  */
@@ -121,7 +152,7 @@
 	wlock(ifc);
 	if(ifc->m != nil){
 		wunlock(ifc);
-		return "interface already bound";	
+		return Ebound;
 	}
 	if(waserror()){
 		wunlock(ifc);
@@ -142,18 +173,14 @@
 	ifc->m = m;
 	ifc->mintu = ifc->m->mintu;
 	ifc->maxtu = ifc->m->maxtu;
+	ifc->delay = 40;
+	ifc->speed = 0;
 	if(ifc->m->unbindonclose == 0)
 		ifc->conv->inuse++;
-	ifc->rp.mflag = 0;		// default not managed
-	ifc->rp.oflag = 0;
-	ifc->rp.maxraint = 600000;	// millisecs
-	ifc->rp.minraint = 200000;
-	ifc->rp.linkmtu = 0;		// no mtu sent
-	ifc->rp.reachtime = 0;
-	ifc->rp.rxmitra = 0;
-	ifc->rp.ttl = MAXTTL;
-	ifc->rp.routerlt = 3*(ifc->rp.maxraint);
 
+	/* default router paramters */
+	ifc->rp = c->p->f->v6p->rp;
+
 	/* any ancillary structures (like routes) no longer pertain */
 	ifc->ifcid++;
 
@@ -170,29 +197,44 @@
 
 /*
  *  detach a device from an interface, close the interface
- *  called with ifc->conv closed
  */
 static char*
 ipifcunbind(Ipifc *ifc)
 {
-	char *err;
+	Medium *m;
 
-	if(waserror()){
+	wlock(ifc);
+	m = ifc->m;
+	if(m == nil){
 		wunlock(ifc);
-		nexterror();
+		return Eunbound;
 	}
-	wlock(ifc);
 
-	/* dissociate routes */
-	if(ifc->m != nil && ifc->m->unbindonclose == 0)
-		ifc->conv->inuse--;
-	ifc->ifcid++;
+	/* disassociate logical interfaces (before zeroing ifc->arg) */
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 
 	/* disassociate device */
-	if(ifc->m != nil && ifc->m->unbind)
-		(*ifc->m->unbind)(ifc);
+	if(m->unbind != nil){
+		extern Medium nullmedium;
+
+		/*
+		 * unbind() might unlock the ifc, so change the medium
+		 * to the nullmedium to prevent packets from getting
+		 * sent while the medium is shutting down.
+		 */
+		ifc->m = &nullmedium;
+
+		if(!waserror()){
+			(*m->unbind)(ifc);
+			poperror();
+		}
+	}
+
 	memset(ifc->dev, 0, sizeof(ifc->dev));
 	ifc->arg = nil;
+
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 
 	/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
 	qclose(ifc->conv->wq);
 	qclose(ifc->conv->sq);
 
-	/* disassociate logical interfaces */
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
-
+	/* dissociate routes */
+	ifc->ifcid++;
+	if(m->unbindonclose == 0)
+		ifc->conv->inuse--;
 	ifc->m = nil;
 	wunlock(ifc);
-	poperror();
+
 	return nil;
 }
 
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
 
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
 char slineformat[] = "	%-40I %-10M %-40I %-12lud %-12lud\n";
 
-
 static int
 ipifcstate(Conv *c, char *state, int n)
 {
@@ -228,19 +266,18 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
 	m = snprint(state, n, sfixedformat,
 		ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
 		ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
 		ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
 		ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
-		ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+		ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+		ifc->speed, ifc->delay);
 
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
-		m += snprint(state+m, n - m, slineformat,
-			lifc->local, lifc->mask, lifc->remote,
-			lifc->validlt, lifc->preflt);
+	for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+		m += snprint(state+m, n - m, slineformat, lifc->local,
+			lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
 	if(ifc->lifc == nil)
 		m += snprint(state+m, n - m, "\n");
 	runlock(ifc);
@@ -256,13 +293,11 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
-	m = 0;
-
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+	m = 0;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 		m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
-		for(link = lifc->link; link; link = link->lifclink)
+		for(link = lifc->link; link != nil; link = link->lifclink)
 			m += snprint(state+m, n - m, " %-40.40I", link->self->a);
 		m += snprint(state+m, n - m, "\n");
 	}
@@ -279,6 +314,59 @@
 	return ifc->m != nil;
 }
 
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+	int burst;
+
+	burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+	if(burst < ifc->maxtu)
+		burst = ifc->maxtu;
+	ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+	if(delay < 0)
+		delay = 0;
+	else if(delay > 1000)
+		delay = 1000;
+	ifc->delay = delay;
+	ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+	if(speed < 0)
+		speed = 0;
+	ifc->speed = speed;
+	ifc->load = 0;
+	ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	if(ifc->speed){
+		ulong now = MACHP(0)->ticks;
+		int dt = TK2MS(now - ifc->ticks);
+		ifc->ticks = now;
+		ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+		if(ifc->load < 0 || dt < 0 || dt > 1000)
+			ifc->load = 0;
+		else if(ifc->load > ifc->burst){
+			freeblist(bp);
+			return;
+		}
+	}
+	bp = concatblock(bp);
+	ifc->load += BLEN(bp);
+	ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
 /*
  *  called when a process writes to an interface's 'data'
  */
@@ -294,18 +382,15 @@
 		return;
 
 	ifc = (Ipifc*)c->ptcl;
-	if(!canrlock(ifc)){
-		freeb(bp);
-		return;
-	}
+	rlock(ifc);
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
-	if(ifc->m == nil || ifc->m->pktin == nil)
-		freeb(bp);
-	else
+	if(ifc->m != nil && ifc->m->pktin != nil)
 		(*ifc->m->pktin)(c->p->f, ifc, bp);
+	else
+		freeb(bp);
 	runlock(ifc);
 	poperror();
 }
@@ -319,27 +404,26 @@
 	Ipifc *ifc;
 
 	c->rq = qopen(QMAX, 0, 0, 0);
-	c->sq = qopen(2*QMAX, 0, 0, 0);
 	c->wq = qopen(QMAX, Qkick, ipifckick, c);
+	c->sq = qopen(QMAX, 0, 0, 0);
+	if(c->rq == nil || c->wq == nil || c->sq == nil)
+		error(Enomem);
 	ifc = (Ipifc*)c->ptcl;
 	ifc->conv = c;
-	ifc->unbinding = 0;
 	ifc->m = nil;
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 }
 
 /*
  *  called after last close of ipifc data or ctl
- *  called with c locked, we must unlock
  */
 static void
 ipifcclose(Conv *c)
 {
-	Ipifc *ifc;
-	Medium *m;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
+	Medium *m = ifc->m;
 
-	ifc = (Ipifc*)c->ptcl;
-	m = ifc->m;
 	if(m != nil && m->unbindonclose)
 		ipifcunbind(ifc);
 }
@@ -347,19 +431,17 @@
 /*
  *  change an interface's mtu
  */
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
 {
-	int mtu;
+	Medium *m = ifc->m;
 
-	if(argc < 2)
+	if(m == nil)
+		return Eunbound;
+	if(mtu < m->mintu || mtu > m->maxtu)
 		return Ebadarg;
-	if(ifc->m == nil)
-		return Ebadarg;
-	mtu = strtoul(argv[1], 0, 0);
-	if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
-		return Ebadarg;
 	ifc->maxtu = mtu;
+	ipifcadjustburst(ifc);
 	return nil;
 }
 
@@ -374,13 +456,8 @@
 	Iplifc *lifc, **l;
 	int i, type, mtu;
 	Fs *f;
-	int sendnbrdisc = 0;
 
-	if(ifc->m == nil)
-		return "ipifc not yet bound to device";
-
-	f = ifc->conv->p->f;
-
+	mtu = 0;
 	type = Rifc;
 	memset(ip, 0, IPaddrlen);
 	memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
 		/* fall through */
 	case 5:
 		mtu = strtoul(argv[4], 0, 0);
-		if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
-			ifc->maxtu = mtu;
 		/* fall through */
 	case 4:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
-		parseip(rem, argv[3]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+			return Ebadip;
 		maskip(rem, mask, net);
 		break;
 	case 3:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+			return Ebadip;
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
 		break;
 	case 2:
-		parseip(ip, argv[1]);
+		if (parseip(ip, argv[1]) == -1)
+			return Ebadip;
 		memmove(mask, defmask(ip), IPaddrlen);
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
@@ -415,26 +490,55 @@
 		break;
 	default:
 		return Ebadarg;
-		break;
 	}
-	if(isv4(ip))
+
+	/* check for point-to-point interface */
+	if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+	if(ipcmp(mask, IPallbits) == 0)
+		type |= Rptpt;
+
+	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+		type |= Rv4;
 		tentative = 0;
+	}
+
 	wlock(ifc);
+	if(ifc->m == nil){
+		wunlock(ifc);
+		return Eunbound;
+	}
+	f = ifc->conv->p->f;
+	if(waserror()){
+		wunlock(ifc);
+		return up->errstr;
+	}
 
+	if(mtu > 0)
+		ipifcsetmtu(ifc, mtu);
+
 	/* ignore if this is already a local address for this ifc */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, ip) == 0) {
-			if(lifc->tentative != tentative)
-				lifc->tentative = tentative;
-			if(lifcp != nil) {
-				lifc->onlink = lifcp->onlink;
-				lifc->autoflag = lifcp->autoflag;
-				lifc->validlt = lifcp->validlt;
-				lifc->preflt = lifcp->preflt;
-				lifc->origint = lifcp->origint;
+	if((lifc = iplocalonifc(ifc, ip)) != nil){
+		if(lifcp != nil) {
+			if(!lifc->onlink && lifcp->onlink){
+				lifc->onlink = 1;
+				addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+					lifc->remote, lifc->type, ifc, tifc);
+				if(v6addrtype(ip) != linklocalv6)
+					addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+						lifc->remote, lifc->type, ifc, tifc);
 			}
-			goto out;
+			lifc->autoflag = lifcp->autoflag;
+			lifc->validlt = lifcp->validlt;
+			lifc->preflt = lifcp->preflt;
+			lifc->origint = lifcp->origint;
 		}
+		if(lifc->tentative != tentative){
+			lifc->tentative = tentative;
+			goto done;
+		}
+		wunlock(ifc);
+		poperror();
+		return nil;
 	}
 
 	/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
 	ipmove(lifc->mask, mask);
 	ipmove(lifc->remote, rem);
 	ipmove(lifc->net, net);
+	lifc->type = type;
 	lifc->tentative = tentative;
 	if(lifcp != nil) {
 		lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
 		lifc->validlt = lifcp->validlt;
 		lifc->preflt = lifcp->preflt;
 		lifc->origint = lifcp->origint;
+	} else {		/* default values */
+		lifc->onlink = lifc->autoflag = 1;
+		lifc->validlt = lifc->preflt = ~0UL;
+		lifc->origint = NOW / 1000;
 	}
-	else {		// default values
-		lifc->onlink = 1;
-		lifc->autoflag = 1;
-		lifc->validlt = 0xffffffff;
-		lifc->preflt = 0xffffffff;
-		lifc->origint = NOW / 10^3;
-	}
 	lifc->next = nil;
 
-	for(l = &ifc->lifc; *l; l = &(*l)->next)
+	for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
 		;
 	*l = lifc;
 
-	/* check for point-to-point interface */
-	if(ipcmp(ip, v6loopback))  /* skip v6 loopback, it's a special address */
-	if(ipcmp(mask, IPallbits) == 0)
-		type |= Rptpt;
+	/* add route for this logical interface */
+	if(lifc->onlink){
+		addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+		if(v6addrtype(ip) != linklocalv6)
+			addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+	}
 
-	/* add local routes */
-	if(isv4(ip))
-		v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
-	else
-		v6addroute(f, tifc, rem, mask, rem, type);
-
 	addselfcache(f, ifc, lifc, ip, Runi);
 
-	if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
-		ipifcregisterproxy(f, ifc, rem);
-		goto out;
+	/* register proxy */
+	if(type & Rptpt){
+		if(type & Rproxy)
+			ipifcregisterproxy(f, ifc, rem, 1);
+		goto done;
 	}
 
-	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+	if(type & Rv4) {
 		/* add subnet directed broadcast address to the self cache */
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) & mask[i];
 		addselfcache(f, ifc, lifc, bcast, Rbcast);
-		
+
 		addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
-	}
-	else {
+	} else {
 		if(ipcmp(ip, v6loopback) == 0) {
 			/* add node-local mcast address */
 			addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
 
 			/* add route for all node multicast */
-			v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+			addroute(f, v6allnodesN, v6allnodesNmask,
+				ip, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
 		}
 
 		/* add all nodes multicast address */
 		addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-		
+
 		/* add route for all nodes multicast */
-		v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-		
+		addroute(f, v6allnodesL, v6allnodesLmask,
+			ip, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
+
 		/* add solicited-node multicast address */
 		ipv62smcast(bcast, ip);
 		addselfcache(f, ifc, lifc, bcast, Rmulti);
-
-		sendnbrdisc = 1;
 	}
 
-	/* register the address on this network for address resolution */
-	if(isv4(ip) && ifc->m->areg != nil)
-		(*ifc->m->areg)(ifc, ip);
-
-out:
+done:
 	wunlock(ifc);
-	if(tentative && sendnbrdisc)
-		icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+
+	rlock(ifc);
+	ipifcregisteraddr(f, ifc, lifc, ip);
+	runlock(ifc);
+
 	return nil;
 }
 
 /*
  *  remove a logical interface from an ifc
- *  always called with ifc wlock'd
+ *	called with ifc wlock'd
  */
 static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
 {
-	Iplifc **l;
-	Fs *f;
+	Iplifc *lifc = *l;
+	Fs *f = ifc->conv->p->f;
 
-	f = ifc->conv->p->f;
-
-	/*
-	 *  find address on this interface and remove from chain.
-	 *  for pt to pt we actually specify the remote address as the
-	 *  addresss to remove.
-	 */
-	for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
-		;
-	if(*l == nil)
+	if(lifc == nil)
 		return "address not on this interface";
 	*l = lifc->next;
 
 	/* disassociate any addresses */
-	while(lifc->link)
+	while(lifc->link != nil)
 		remselfcache(f, ifc, lifc, lifc->link->self->a);
 
 	/* remove the route for this logical interface */
-	if(isv4(lifc->local))
-		v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
-	else {
-		v6delroute(f, lifc->remote, lifc->mask, 1);
+	if(lifc->onlink){
+		remroute(f, lifc->remote, lifc->mask,
+			lifc->local, IPallbits,
+			lifc->remote, lifc->type, ifc, tifc);
+		if(v6addrtype(lifc->local) != linklocalv6)
+			remroute(f, lifc->remote, lifc->mask,
+				lifc->local, IPnoaddr,
+				lifc->remote, lifc->type, ifc, tifc);
+	}
+
+	/* unregister proxy */
+	if(lifc->type & Rptpt){
+		if(lifc->type & Rproxy)
+			ipifcregisterproxy(f, ifc, lifc->remote, 0);
+		goto done;
+	}
+
+	/* remove route for all nodes multicast */
+	if((lifc->type & Rv4) == 0){
 		if(ipcmp(lifc->local, v6loopback) == 0)
-			/* remove route for all node multicast */
-			v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
-		else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
-			/* remove route for all link multicast */
-			v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+			remroute(f, v6allnodesN, v6allnodesNmask,
+				lifc->local, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
+
+		remroute(f, v6allnodesL, v6allnodesLmask,
+			lifc->local, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
 	}
 
+done:
 	free(lifc);
 	return nil;
-
 }
 
 /*
  *  remove an address from an interface.
- *  called with c locked
  */
 char*
 ipifcrem(Ipifc *ifc, char **argv, int argc)
 {
-	uchar ip[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar rem[IPaddrlen];
-	Iplifc *lifc;
-	char *rv;
+	uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+	Iplifc *lifc, **l;
+	char *err;
 
 	if(argc < 3)
 		return Ebadarg;
-
-	parseip(ip, argv[1]);
-	parseipmask(mask, argv[2]);
+	if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+		return Ebadip;
 	if(argc < 4)
 		maskip(ip, mask, rem);
-	else
-		parseip(rem, argv[3]);
+	else if(parseip(rem, argv[3]) == -1)
+		return Ebadip;
 
-	wlock(ifc);
-
 	/*
 	 *  find address on this interface and remove from chain.
 	 *  for pt to pt we actually specify the remote address as the
 	 *  addresss to remove.
 	 */
+	wlock(ifc);
+	l = &ifc->lifc;
 	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
-		if (memcmp(ip, lifc->local, IPaddrlen) == 0
-		&& memcmp(mask, lifc->mask, IPaddrlen) == 0
-		&& memcmp(rem, lifc->remote, IPaddrlen) == 0)
+		if(ipcmp(ip, lifc->local) == 0
+		&& ipcmp(mask, lifc->mask) == 0
+		&& ipcmp(rem, lifc->remote) == 0)
 			break;
+		l = &lifc->next;
 	}
-
-	rv = ipifcremlifc(ifc, lifc);
+	err = ipifcremlifc(ifc, l);
 	wunlock(ifc);
-	return rv;
+	return err;
 }
 
 /*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->addroute != nil)
-				m->addroute(ifc, vers, addr, mask, gate, type);
-		}
-	}
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->remroute != nil)
-				m->remroute(ifc, vers, addr, mask);
-		}
-	}
-}
-
-/*
  *  associate an address with the interface.  This wipes out any previous
  *  addresses.  This is a macro that means, remove all the old interfaces
  *  and add a new one.
@@ -679,170 +740,89 @@
 static char*
 ipifcconnect(Conv* c, char **argv, int argc)
 {
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 	char *err;
-	Ipifc *ifc;
 
-	ifc = (Ipifc*)c->ptcl;
-
-	if(ifc->m == nil)
-		 return "ipifc not yet bound to device";
-
-	if(waserror()){
-		wunlock(ifc);
-		nexterror();
-	}
 	wlock(ifc);
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 	wunlock(ifc);
-	poperror();
 
 	err = ipifcadd(ifc, argv, argc, 0, nil);
-	if(err)
+	if(err != nil)
 		return err;
 
 	Fsconnected(c, nil);
-
 	return nil;
 }
 
 char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
 {
-	int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+	int i, argsleft;
+	uchar sendra, recvra;
+	Routerparams rp;
 
-	argsleft = argc - 1;
 	i = 1;
-
-	if(argsleft % 2 != 0)
+	argsleft = argc - 1;
+	if((argsleft % 2) != 0)
 		return Ebadarg;
 
+	sendra = ifc->sendra6;
+	recvra = ifc->recvra6;
+	rp = ifc->rp;
+
 	while (argsleft > 1) {
-		if(strcmp(argv[i],"recvra")==0)
-			ifc->recvra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"sendra")==0)
-			ifc->sendra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"mflag")==0)
-			ifc->rp.mflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"oflag")==0)
-			ifc->rp.oflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"maxraint")==0)
-			ifc->rp.maxraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"minraint")==0)
-			ifc->rp.minraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"linkmtu")==0)
-			ifc->rp.linkmtu = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"reachtime")==0)
-			ifc->rp.reachtime = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"rxmitra")==0)
-			ifc->rp.rxmitra = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"ttl")==0)
-			ifc->rp.ttl = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"routerlt")==0)
-			ifc->rp.routerlt = atoi(argv[i+1]);
+		if(strcmp(argv[i], "recvra") == 0)
+			recvra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "sendra") == 0)
+			sendra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "mflag") == 0)
+			rp.mflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "oflag") == 0)
+			rp.oflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "maxraint") == 0)
+			rp.maxraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "minraint") == 0)
+			rp.minraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "linkmtu") == 0)
+			rp.linkmtu = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "reachtime") == 0)
+			rp.reachtime = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "rxmitra") == 0)
+			rp.rxmitra = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "ttl") == 0)
+			rp.ttl = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "routerlt") == 0)
+			rp.routerlt = atoi(argv[i+1]);
 		else
-			return Ebadarg;	
+			return Ebadarg;
 
 		argsleft -= 2;
 		i += 2;
 	}
 
-	// consistency check
-	if(ifc->rp.maxraint < ifc->rp.minraint) {
-		ifc->rp.maxraint = vmax;
-		ifc->rp.minraint = vmin;
+	/* consistency check */
+	if(rp.maxraint < rp.minraint)
 		return Ebadarg;
-	}
 
-	return nil;
-}
+	ifc->rp = rp;
+	ifc->sendra6 = sendra;
+	ifc->recvra6 = recvra;
 
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->sendra6 = (i!=0);
 	return nil;
 }
 
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->recvra6 = (i!=0);	
-	return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
-	uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
-	Iplifc *lifc;
-
-	if(argc == 2){
-		if((strcmp(argv[1], "show") == 0)){
-			shownataddr();
-			return nil;
-		}else if((strcmp(argv[1], "flush") == 0)){
-			flushnataddr();
-			return nil;
-		}else
-			return Ebadarg;
-	}
-
-	if(argc != 5)
-		return Ebadarg;
-
-	if (parseip(src, argv[2]) == -1)
-		return Ebadip;
-
-	if (parseipmask(mask, argv[3]) == -1)
-		return Ebadip;
-
-	if (parseip(dst, argv[4]) == -1)
-		return Ebadip;
-
-	if((lifc=iplocalonifc(ifc, dst)) == nil)
-		return Ebadip;
-
-	if(strcmp(argv[1], "add") == 0){
-		if(addnataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else if(strcmp(argv[1], "remove") == 0){
-		if(removenataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else
-		return Ebadarg;
-
-	return nil;
-}
-
 /*
  *  non-standard control messages.
- *  called with c locked.
  */
 static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
 {
-	Ipifc *ifc;
-	int i;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 
-	ifc = (Ipifc*)c->ptcl;
 	if(strcmp(argv[0], "add") == 0)
 		return ipifcadd(ifc, argv, argc, 0, nil);
-	else if(strcmp(argv[0], "bootp") == 0)
-		return bootp(ifc);
 	else if(strcmp(argv[0], "try") == 0)
 		return ipifcadd(ifc, argv, argc, 1, nil);
 	else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
 		return ipifcrem(ifc, argv, argc);
 	else if(strcmp(argv[0], "unbind") == 0)
 		return ipifcunbind(ifc);
-	else if(strcmp(argv[0], "joinmulti") == 0)
-		return ipifcjoinmulti(ifc, argv, argc);
-	else if(strcmp(argv[0], "leavemulti") == 0)
-		return ipifcleavemulti(ifc, argv, argc);
 	else if(strcmp(argv[0], "mtu") == 0)
-		return ipifcsetmtu(ifc, argv, argc);
-	else if(strcmp(argv[0], "reassemble") == 0){
-		ifc->reassemble = 1;
+		return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+	else if(strcmp(argv[0], "speed") == 0){
+		ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
 		return nil;
 	}
+	else if(strcmp(argv[0], "delay") == 0){
+		ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+		return nil;
+	}
 	else if(strcmp(argv[0], "iprouting") == 0){
-		i = 1;
-		if(argc > 1)
-			i = atoi(argv[1]);
-		iprouting(c->p->f, i);
+		iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
 		return nil;
 	}
-	else if(strcmp(argv[0], "addpref6") == 0)
-		return ipifcaddpref6(ifc, argv, argc);
-	else if(strcmp(argv[0], "setpar6") == 0)
-		return ipifcsetpar6(ifc, argv, argc);
-	else if(strcmp(argv[0], "sendra6") == 0)
-		return ipifcsendra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "recvra6") == 0)
-		return ipifcrecvra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "nat") == 0)
-		return ipifcnat(ifc, argv, argc);
+	else if(strcmp(argv[0], "reflect") == 0){
+		ifc->reflect = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "reassemble") == 0){
+		ifc->reassemble = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "add6") == 0)
+		return ipifcadd6(ifc, argv, argc);
+	else if(strcmp(argv[0], "remove6") == 0)
+		return ipifcremove6(ifc, argv, argc);
+	else if(strcmp(argv[0], "ra6") == 0)
+		return ipifcra6(ifc, argv, argc);
 	return "unsupported ctl";
 }
 
+int
 ipifcstats(Proto *ipifc, char *buf, int len)
 {
 	return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
 	ipifc->nc = Maxmedia;
 	ipifc->ptclsize = sizeof(Ipifc);
 
-	f->ipifc = ipifc;			/* hack for ipifcremroute, findipifc, ... */
+	f->ipifc = ipifc;	/* hack for ipifcremroute, findipifc, ... */
 	f->self = smalloc(sizeof(Ipselftab));	/* hack for ipforme */
 
 	Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
 
 /*
  *  add to self routing cache
- *	called with c locked
  */
 static void
 addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
 {
-	Ipself *p;
 	Iplink *lp;
+	Ipself *p;
 	int h;
 
+	type |= (lifc->type & Rv4);
 	qlock(f->self);
+	if(waserror()){
+		qunlock(f->self);
+		nexterror();
+	}
 
 	/* see if the address already exists */
 	h = hashipa(a);
-	for(p = f->self->hash[h]; p; p = p->next)
-		if(memcmp(a, p->a, IPaddrlen) == 0)
+	for(p = f->self->hash[h]; p != nil; p = p->next)
+		if(ipcmp(a, p->a) == 0)
 			break;
 
 	/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
 	}
 
 	/* look for a link for this lifc */
-	for(lp = p->link; lp; lp = lp->selflink)
+	for(lp = p->link; lp != nil; lp = lp->selflink)
 		if(lp->lifc == lifc)
 			break;
 
@@ -962,18 +948,19 @@
 		lifc->link = lp;
 
 		/* add to routing table */
-		if(isv4(a))
-			v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
-		else
-			v6addroute(f, tifc, a, IPallbits, a, type);
+		addroute(f, a, IPallbits,
+			lifc->local, 
+			((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+				IPallbits : IPnoaddr,
+			a, type, ifc, tifc);
 
 		if((type & Rmulti) && ifc->m->addmulti != nil)
 			(*ifc->m->addmulti)(ifc, a, lifc->local);
-	} else {
+	} else
 		lp->ref++;
-	}
 
 	qunlock(f->self);
+	poperror();
 }
 
 /*
@@ -992,8 +979,8 @@
 	ulong now = NOW;
 
 	l = &freeiplink;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1000,10 +987,11 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
+
 static void
 ipselffree(Ipself *p)
 {
@@ -1011,8 +999,8 @@
 	ulong now = NOW;
 
 	l = &freeipself;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1019,7 +1007,7 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
@@ -1027,7 +1015,6 @@
 /*
  *  Decrement reference for this address on this link.
  *  Unlink from selftab if this is the last ref.
- *	called with c locked
  */
 static void
 remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
 
 	/* find the unique selftab entry */
 	l = &f->self->hash[hashipa(a)];
-	for(p = *l; p; p = *l){
+	for(p = *l; p != nil; p = *l){
 		if(ipcmp(p->a, a) == 0)
 			break;
 		l = &p->next;
@@ -1053,7 +1040,7 @@
 	 *  that matches the selftab entry
 	 */
 	l_lifc = &lifc->link;
-	for(link = *l_lifc; link; link = *l_lifc){
+	for(link = *l_lifc; link != nil; link = *l_lifc){
 		if(link->self == p)
 			break;
 		l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
 	 *  the one we just found
 	 */
 	l_self = &p->link;
-	for(link = *l_self; link; link = *l_self){
-		if(link == *(l_lifc))
+	for(link = *l_self; link != nil; link = *l_self){
+		if(link == *l_lifc)
 			break;
 		l_self = &link->selflink;
 	}
@@ -1079,9 +1066,20 @@
 	if(--(link->ref) != 0)
 		goto out;
 
-	if((p->type & Rmulti) && ifc->m->remmulti != nil)
-		(*ifc->m->remmulti)(ifc, a, lifc->local);
+	/* remove from routing table */
+	remroute(f, a, IPallbits,
+		lifc->local, 
+		((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+			IPallbits : IPnoaddr,
+		a, p->type, ifc, tifc);
 
+	if((p->type & Rmulti) && ifc->m->remmulti != nil){
+		if(!waserror()){
+			(*ifc->m->remmulti)(ifc, a, lifc->local);
+			poperror();
+		}
+	}
+
 	/* ref == 0, remove from both chains and free the link */
 	*l_lifc = link->lifclink;
 	*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
 	if(p->link != nil)
 		goto out;
 
-	/* remove from routing table */
-	if(isv4(a))
-		v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
-	else
-		v6delroute(f, a, IPallbits, 1);
-	
+	/* if null address, forget */
+	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+		f->self->acceptall = 0;
+
 	/* no more links, remove from hash and free */
 	*l = p->next;
 	ipselffree(p);
 
-	/* if IPnoaddr, forget */
-	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
-		f->self->acceptall = 0;
-
 out:
 	qunlock(f->self);
 }
 
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
-	Nstformat= 41,
-};
-
 long
 ipselftabread(Fs *f, char *cp, ulong offset, int n)
 {
@@ -1124,14 +1110,14 @@
 
 	m = 0;
 	off = offset;
-	qlock(f->self);
 	for(i = 0; i < NHASH && m < n; i++){
 		for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
 			nifc = 0;
-			for(link = p->link; link; link = link->selflink)
+			for(link = p->link; link != nil; link = link->selflink)
 				nifc++;
 			routetype(p->type, state);
-			m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+			m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+				p->a, nifc, state);
 			if(off > 0){
 				off -= m;
 				m = 0;
@@ -1138,30 +1124,15 @@
 			}
 		}
 	}
-	qunlock(f->self);
 	return m;
 }
 
-int
-iptentative(Fs *f, uchar *addr)
-{
- 	Ipself *p;
-
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
-		if(ipcmp(addr, p->a) == 0) {
-			return p->link->lifc->tentative;
-		}
-	}
-	return 0;
-}
-
 /*
  *  returns
  *	0		- no match
  *	Runi
  *	Rbcast
- *	Rmcast
+ *	Rmulti
  */
 int
 ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
 {
 	Ipself *p;
 
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
+	for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
 		if(ipcmp(addr, p->a) == 0)
-			return p->type;
-	}
+			return p->type & (Runi|Rbcast|Rmulti);
 
 	/* hack to say accept anything */
 	if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
  *  return nil.
  */
 Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
 {
+	uchar gnet[IPaddrlen];
+	int spec, xspec;
 	Ipifc *ifc, *x;
 	Iplifc *lifc;
-	Conv **cp, **e;
-	uchar gnet[IPaddrlen];
-	uchar xmask[IPaddrlen];
+	Conv **cp;
 
-	x = nil; memset(xmask, 0, IPaddrlen);
-
-	/* find most specific match */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
-
+	x = nil;
+	xspec = 0;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!canrlock(ifc))
+			continue;
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if(type & Runi){
+				if(ipcmp(remote, lifc->local) == 0){
+				Found:
+					runlock(ifc);
+					return ifc;
+				}
+			} else if(type & (Rbcast|Rmulti)) {
+				if(ipcmp(local, lifc->local) == 0)
+					goto Found;
+			}
 			maskip(remote, lifc->mask, gnet);
 			if(ipcmp(gnet, lifc->net) == 0){
-				if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+				spec = comprefixlen(remote, lifc->local, IPaddrlen);
+				if(spec > xspec){
 					x = ifc;
-					ipmove(xmask, lifc->mask);
+					xspec = spec;
 				}
 			}
 		}
+		runlock(ifc);
 	}
-	if(x != nil)
-		return x;
+	return x;
+}
 
-	/* for now for broadcast and multicast, just use first interface */
-	if(type & (Rbcast|Rmulti)){
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == 0)
-				continue;
-			ifc = (Ipifc*)(*cp)->ptcl;
-			if(ifc->lifc != nil)
-				return ifc;
-		}
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+	uchar ip[IPaddrlen];
+	Conv *c;
+	char *p;
+	long x;
+
+	x = strtol(s, &p, 10);
+	if(p > s && *p == '\0'){
+		if(x < 0)
+			return nil;
+		if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+			return (Ipifc*)c->ptcl;
 	}
-		
+	if(parseip(ip, s) != -1)
+		return findipifc(f, ip, ip, Runi);
 	return nil;
 }
 
-enum {
-	unknownv6,
-	multicastv6,
-	unspecifiedv6,
-	linklocalv6,
-	sitelocalv6,
-	globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
-	if(isv6global(addr))
-		return globalv6;
-	if(islinklocal(addr))
-		return linklocalv6;
-	if(isv6mcast(addr))
-		return multicastv6;
-	if(issitelocal(addr))
-		return sitelocalv6;
-	return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ *  find "best" (global > link local > unspecified)
+ *  local address; address must be current.
+ */
 static void
 findprimaryipv6(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
-	Iplifc *lifc;
+	ulong now = NOW/1000;
 	int atype, atypel;
+	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	ipmove(local, v6Unspecified);
 	atype = unspecifiedv6;
 
-	/* find "best" (global > sitelocal > link local > unspecified)
-	 * local address; address must be current */
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 			atypel = v6addrtype(lifc->local);
 			if(atypel > atype)
-			if(v6addrcurr(lifc)) {
+			if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
 				ipmove(local, lifc->local);
 				atype = atypel;
-				if(atype == globalv6)
+				if(atype == globalv6){
+					runlock(ifc);
 					return;
+				}
 			}
 		}
+		runlock(ifc);
 	}
 }
 
 /*
- *  returns first ip address configured
+ *  returns first v4 address configured
  */
 static void
 findprimaryipv4(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
 	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	/* find first ifc local address */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		if((lifc = ifc->lifc) != nil){
-			ipmove(local, lifc->local);
-			return;
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if((lifc->type & Rv4) != 0){
+				ipmove(local, lifc->local);
+				runlock(ifc);
+				return;
+			}
 		}
+		runlock(ifc);
 	}
+	ipmove(local, IPnoaddr);
 }
 
 /*
- *  find the local address 'closest' to the remote system, copy it to
- *  local and return the ifc for that address
+ * ipv4local, ipv6local:
+ *  return a local address associated with an interface close to remote.
+ *  prefixlen is the number of leading bits in the local address that
+ *  have to match an interface address to be considered. this is used
+ *  by source specific routes to filter on the source address.
+ *  return non-zero on success or zero when no address was found.
+ *
+ *  for ipv4local, all addresses are 4 byte format.
  */
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
-	Ipifc *ifc;
 	Iplifc *lifc;
-	Route *r;
-	uchar gate[IPaddrlen];
-	uchar gnet[IPaddrlen];
-	int version;
-	int atype = unspecifiedv6, atypel = unknownv6;
+	int a, b;
 
-	USED(atype);
-	USED(atypel);
-	qlock(f->ipifc);
-	r = v6lookup(f, remote, nil);
- 	version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-	
-	if(r != nil){
-		ifc = r->ifc;
-		if(r->type & Rv4)
-			v4tov6(gate, r->v4.gate);
-		else {
-			ipmove(gate, r->v6.gate);
-			ipmove(local, v6Unspecified);
-		}
+	b = -1;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+			continue;
 
-		/* find ifc address closest to the gateway to use */
-		switch(version) {
-		case V4:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0){
-					ipmove(local, lifc->local);
-					goto out;
-				}
-			}
-			break;
-		case V6:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				atypel = v6addrtype(lifc->local);
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0)
-				if(atypel > atype)
-				if(v6addrcurr(lifc)) {
-					ipmove(local, lifc->local);
-					atype = atypel;
-					if(atype == globalv6)
-						break;
-				}
-			}
-			if(atype > unspecifiedv6)
-				goto out;
-			break;
-		default:
-			panic("findlocalip: version %d", version);
+		if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+			continue;
+		
+		a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+		if(a > b){
+			b = a;
+			memmove(local, lifc->local+IPv4off, IPv4addrlen);
 		}
 	}
-
-	switch(version){
-	case V4:
-		findprimaryipv4(f, local);
-		break;
-	case V6:
-		findprimaryipv6(f, local);
-		break;
-	default:
-		panic("findlocalip2: version %d", version);
-	}
-
-out:
-	qunlock(f->ipifc);
+	return b >= 0;
 }
 
-/*
- *  return first v4 address associated with an interface
- */
 int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
+	struct {
+		int	atype;
+		int	deprecated;
+		int	comprefixlen;
+	} a, b;
+	int atype;
+	ulong now;
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(isv4(lifc->local)){
-			memmove(addr, lifc->local+IPv4off, IPv4addrlen);
-			return 1;
-		}
+	if(isv4(remote)){
+		memmove(local, v4prefix, IPv4off);
+		if((prefixlen -= IPv4off*8) < 0)
+			prefixlen = 0;
+		return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
 	}
-	return 0;
-}
 
-/*
- *  return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
-	Iplifc *lifc;
+	atype = v6addrtype(remote);
+	b.atype = unknownv6;
+	b.deprecated = 1;
+	b.comprefixlen = 0;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local) && !(lifc->tentative)){
-			ipmove(addr, lifc->local);
-			return 1;
+	now = NOW/1000;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if(lifc->tentative)
+			continue;
+
+		if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+			continue;
+
+		a.atype = v6addrtype(lifc->local);
+		a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+		a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+		/* prefer appropriate scope */
+		if(a.atype != b.atype){
+			if(a.atype > b.atype && b.atype < atype ||
+			   a.atype < b.atype && b.atype > atype)
+				goto Good;
+			continue;
 		}
+		/* prefer non-deprecated addresses */
+		if(a.deprecated != b.deprecated){
+			if(b.deprecated)
+				goto Good;
+			continue;
+		}
+		/* prefer longer common prefix */
+		if(a.comprefixlen != b.comprefixlen){
+			if(a.comprefixlen > b.comprefixlen)
+				goto Good;
+			continue;
+		}
+		continue;
+	Good:
+		b = a;
+		ipmove(local, lifc->local);
 	}
-	return 0;
+
+	return b.atype >= atype;
 }
 
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ *  find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
 {
-	Iplifc *lifc;
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local)){
-			ipmove(addr, lifc->local);
-			return SRC_UNI;
-		}
+	if(isv4(remote)) {
+		memmove(local, v4prefix, IPv4off);
+		if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+			findprimaryipv4(f, local);
+	} else {
+		if(v6source(f, remote, local) == nil)
+			findprimaryipv6(f, local);
 	}
-	return SRC_UNSPEC;
 }
 
 /*
@@ -1444,13 +1396,28 @@
 {
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
 		if(ipcmp(ip, lifc->local) == 0)
 			return lifc;
+
 	return nil;
 }
 
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+	uchar net[IPaddrlen];
+	Iplifc *lifc;
 
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		maskip(ip, lifc->mask, net);
+		if(ipcmp(net, lifc->remote) == 0)
+			return lifc;
+	}
+	return nil;
+}
+
+
 /*
  *  See if we're proxying for this address on this interface
  */
@@ -1458,24 +1425,13 @@
 ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
 {
 	Route *r;
-	uchar net[IPaddrlen];
-	Iplifc *lifc;
 
 	/* see if this is a direct connected pt to pt address */
-	r = v6lookup(f, ip, nil);
-	if(r == nil)
+	r = v6lookup(f, ip, ip, nil);
+	if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
 		return 0;
-	if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
-		return 0;
 
-	/* see if this is on the right interface */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		maskip(ip, lifc->mask, net);
-		if(ipcmp(net, lifc->remote) == 0)
-			return 1;
-	}
-
-	return 0;
+	return ipremoteonifc(ifc, ip) != nil;
 }
 
 /*
@@ -1487,73 +1443,53 @@
 	if(isv4(ip)){
 		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
 			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
 	}
+	else if(ip[0] == 0xff)
+		return V6;
 	return 0;
 }
 
-int
-ipisbm(uchar *ip)
-{
-	if(isv4(ip)){
-		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
-			return V4;
-		if(ipcmp(ip, IPv4bcast) == 0)
-			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
-	}
-	return 0;
-}
-
-
 /*
- *  add a multicast address to an interface, called with c locked
+ *  add a multicast address to an interface.
  */
 void
 ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
 {
-	Ipifc *ifc;
-	Iplifc *lifc;
-	Conv **p;
 	Ipmulti *multi, **l;
+	Iplifc *lifc;
+	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	if(isv4(ma) != isv4(ia))
+		error("incompatible multicast/interface ip address");
+
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			return;		/* it's already there */
 
-	multi = *l = smalloc(sizeof(*multi));
-	ipmove(multi->ma, ma);
-	ipmove(multi->ia, ia);
-	multi->next = nil;
-
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-		ifc = (Ipifc*)(*p)->ptcl;
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
 		if(waserror()){
-			wunlock(ifc);
+			runlock(ifc);
 			nexterror();
 		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
-				addselfcache(f, ifc, lifc, ma, Rmulti);
-		wunlock(ifc);
+		if((lifc = iplocalonifc(ifc, ia)) != nil)
+			addselfcache(f, ifc, lifc, ma, Rmulti);
+		runlock(ifc);
 		poperror();
 	}
+
+	multi = smalloc(sizeof(*multi));
+	ipmove(multi->ma, ma);
+	ipmove(multi->ia, ia);
+	multi->next = nil;
+	*l = multi;
 }
 
 
 /*
- *  remove a multicast address from an interface, called with c locked
+ *  remove a multicast address from an interface.
  */
 void
 ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
 {
 	Ipmulti *multi, **l;
 	Iplifc *lifc;
-	Conv **p;
 	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			break;
 
 	multi = *l;
@@ -1576,161 +1508,101 @@
 		return; 	/* we don't have it open */
 
 	*l = multi->next;
+	multi->next = nil;
 
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-
-		ifc = (Ipifc*)(*p)->ptcl;
-		if(waserror()){
-			wunlock(ifc);
-			nexterror();
-		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
+		if(!waserror()){
+			if((lifc = iplocalonifc(ifc, ia)) != nil)
 				remselfcache(f, ifc, lifc, ma);
-		wunlock(ifc);
-		poperror();
+			poperror();
+		}
+		runlock(ifc);
 	}
-
 	free(multi);
 }
 
-/*
- *  make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
 {
-	USED(ifc, argv, argc);
-	return nil;
+	if(waserror()){
+		print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+		return;
+	}
+	if(ifc->m != nil && ifc->m->areg != nil)
+		(*ifc->m->areg)(f, ifc, lifc, ip);
+	poperror();
 }
 
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
-	USED(ifc, argv, argc);
-	return nil;
-}
-
 static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
 {
-	Conv **cp, **e;
-	Ipifc *nifc;
+	uchar a[IPaddrlen];
 	Iplifc *lifc;
-	Medium *m;
-	uchar net[IPaddrlen];
+	Ipifc *nifc;
+	Conv **cp;
 
-	/* register the address on any network that will proxy for us */
-	e = &f->ipifc->conv[f->ipifc->nc];
+	/* register the address on any interface that will proxy for the ip */
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
+		nifc = (Ipifc*)(*cp)->ptcl;
+		if(nifc == ifc || !canrlock(nifc))
+			continue;
 
-	if(!isv4(ip)) { // V6
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->addmulti == nil) {
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
-					ipv62smcast(net, ip);
-					addselfcache(f, nifc, lifc, net, Rmulti);
-					arpenter(f, V6, ip, nifc->mac, 6, 0);
-					//(*m->addmulti)(nifc, net, ip);
-					break;
-				}
-			}
+		if(nifc->m == nil
+		|| (lifc = ipremoteonifc(nifc, ip)) == nil
+		|| (lifc->type & Rptpt) != 0
+		|| waserror()){
 			runlock(nifc);
+			continue;
 		}
-		return;
-	}
-	else { // V4
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->areg == nil){
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0){
-					(*m->areg)(nifc, ip);
-					break;
-				}
-			}
-			runlock(nifc);
+		if((lifc->type & Rv4) == 0){
+			/* add solicited-node multicast addr */
+			ipv62smcast(a, ip);
+			if(add)
+				addselfcache(f, nifc, lifc, a, Rmulti);
+			else
+				remselfcache(f, nifc, lifc, a);
 		}
+		if(add)
+			ipifcregisteraddr(f, nifc, lifc, ip);
+		runlock(nifc);
+		poperror();
 	}
 }
 
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
-	Route *r;
-
-	r = v6lookup(f, v6Unspecified, nil);
-	if(r!=nil)
-	if(!(force) && (strcmp(r->tag,"ra")!=0))	// route entries generated
-		return;			// by all other means take
-					// precedence over router annc
-
-	v6delroute(f, v6Unspecified, v6Unspecified, 1);
-	v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
-	Ngates = 3,
-};
-
 char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
 {
-	uchar	onlink = 1;
-	uchar	autoflag = 1;
-	long 	validlt = 0xffffffff;
-	long 	preflt = 0xffffffff;
-	long	origint = NOW / 10^3;
-	uchar	prefix[IPaddrlen];
-	int	plen = 64;
-	Iplifc	*lifc;
-	char	addr[40], preflen[6];
-	char	*params[3];
+	int plen = 64;
+	char addr[40], preflen[6];
+	char *params[3];
+	uchar prefix[IPaddrlen];
+	Iplifc lifc;
+	Medium *m;
 
+	lifc.onlink = 1;
+	lifc.autoflag = 1;
+	lifc.validlt = lifc.preflt = ~0UL;
+	lifc.origint = NOW / 1000;
+
 	switch(argc) {
 	case 7:
-		preflt = atoi(argv[6]);
+		lifc.preflt = strtoul(argv[6], 0, 10);
 		/* fall through */
 	case 6:
-		validlt = atoi(argv[5]);
+		lifc.validlt = strtoul(argv[5], 0, 10);
 		/* fall through */
 	case 5:
-		autoflag =  atoi(argv[4]);
+		lifc.autoflag = atoi(argv[4]) != 0;
 		/* fall through */
 	case 4:
-		onlink = atoi(argv[3]);
+		lifc.onlink = atoi(argv[3]) != 0;
 		/* fall through */
 	case 3:
 		plen = atoi(argv[2]);
+		/* fall through */
 	case 2:
 		break;
 	default:
@@ -1737,25 +1609,16 @@
 		return Ebadarg;
 	}
 
-	if((parseip(prefix, argv[1])!=6) ||
-	 	(validlt < preflt) ||
-		(plen < 0) || (plen > 64) ||
-		(islinklocal(prefix))
-	)
+	if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+	    plen > 64 || islinklocal(prefix))
 		return Ebadarg;
 
-	lifc = smalloc(sizeof(Iplifc));
-	lifc->onlink = (onlink!=0);
-	lifc->autoflag = (autoflag!=0);
-	lifc->validlt = validlt;
-	lifc->preflt = preflt;
-	lifc->origint = origint;
+	/* issue "add" ctl msg for v6 link-local addr and prefix len */
+	m = ifc->m;
+	if(m == nil || m->pref2addr == nil)
+		return Eunbound;
+	(*m->pref2addr)(prefix, ifc->mac);	/* mac → v6 link-local addr */
 
-	if(ifc->m->pref2addr!=nil)
-		ifc->m->pref2addr(prefix, ifc->mac);
-	else
-		return Ebadarg;
-	
 	sprint(addr, "%I", prefix);
 	sprint(preflen, "/%d", plen);
 	params[0] = "add";
@@ -1762,6 +1625,28 @@
 	params[1] = addr;
 	params[2] = preflen;
 
-	return ipifcadd(ifc, params, 3, 0, lifc);
+	return ipifcadd(ifc, params, 3, 0, &lifc);
 }
 
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+	Iplifc *lifc, **l;
+	ulong now;
+
+	if(argc != 1)
+		return Ebadarg;
+
+	wlock(ifc);
+	now = NOW/1000;
+	for(l = &ifc->lifc; (lifc = *l) != nil;) {
+		if((lifc->type & Rv4) == 0)
+		if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+			if(ipifcremlifc(ifc, l) == nil)
+				continue;
+		l = &lifc->next;
+	}
+	wunlock(ifc);
+
+	return nil;
+}
--- a/os/ip/ipmux.c
+++ b/os/ip/ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -6,30 +9,14 @@
 #include "../port/error.h"
 
 #include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
 
 typedef struct Ipmuxrock  Ipmuxrock;
 typedef struct Ipmux      Ipmux;
-typedef struct Ip6hdr     Ip6hdr;
 
 enum
 {
-	IPHDR		= 20,		/* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
-	uchar vcf[4];		/* version, class label, and flow label */ 
-	uchar ploadlen[2];	/* payload length */
-	uchar proto;		/* next header, i.e. proto */
-	uchar ttl;		/* hop limit, i.e. ttl */
-	uchar src[16];		/* IP source */
-	uchar dst[16];		/* IP destination */
-};
-
-
-enum
-{
+	Tver,
 	Tproto,
 	Tdata,
 	Tiph,
@@ -36,28 +23,8 @@
 	Tdst,
 	Tsrc,
 	Tifc,
-
-	Cother = 0,
-	Cbyte,		/* single byte */
-	Cmbyte,		/* single byte with mask */
-	Cshort,		/* single short */
-	Cmshort,	/* single short with mask */
-	Clong,		/* single long */
-	Cmlong,		/* single long with mask */
-	Cifc,
-	Cmifc,
 };
 
-char *ftname[] = 
-{
-[Tproto]	"proto",
-[Tdata]		"data",
-[Tiph]	 	"iph",
-[Tdst]		"dst",
-[Tsrc]		"src",
-[Tifc]		"ifc",
-};
-
 /*
  *  a node in the decision tree
  */
@@ -66,16 +33,12 @@
 	Ipmux	*yes;
 	Ipmux	*no;
 	uchar	type;		/* type of field(Txxxx) */
-	uchar	ctype;		/* tupe of comparison(Cxxxx) */
 	uchar	len;		/* length in bytes of item to compare */
 	uchar	n;		/* number of items val points to */
-	short	off;		/* offset of comparison */
-	short	eoff;		/* end offset of comparison */
-	uchar	skiphdr;	/* should offset start after ipheader */
+	int	off;		/* offset of comparison */
 	uchar	*val;
 	uchar	*mask;
 	uchar	*e;		/* val+n*len*/
-
 	int	ref;		/* so we can garbage collect */
 	Conv	*conv;
 };
@@ -90,6 +53,7 @@
 
 static int	ipmuxsprint(Ipmux*, int, char*, int);
 static void	ipmuxkick(void *x);
+static void	ipmuxfree(Ipmux *f);
 
 static char*
 skipwhite(char *p)
@@ -122,27 +86,33 @@
 	Ipmux *f;
 
 	p = skipwhite(p);
-	if(strncmp(p, "dst", 3) == 0){
+	if(strncmp(p, "ver", 3) == 0){
+		type = Tver;
+		off = 0;
+		len = 1;
+		p += 3;
+	}
+	else if(strncmp(p, "dst", 3) == 0){
 		type = Tdst;
-		off = offsetof(Ip4hdr, dst[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, dst[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "src", 3) == 0){
 		type = Tsrc;
-		off = offsetof(Ip4hdr, src[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, src[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "ifc", 3) == 0){
 		type = Tifc;
-		off = -IPv4addrlen;
-		len = IPv4addrlen;
+		off = -IPaddrlen;
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "proto", 5) == 0){
 		type = Tproto;
-		off = offsetof(Ip4hdr, proto);
+		off = offsetof(Ip6hdr, proto);
 		len = 1;
 		p += 5;
 	}
@@ -160,7 +130,7 @@
 			return nil;
 		p++;
 		off = strtoul(p, &p, 0);
-		if(off < 0 || off > (64-IPHDR))
+		if(off < 0)
 			return nil;
 		p = skipwhite(p);
 		if(*p != ':')
@@ -189,11 +159,6 @@
 	f->mask = nil;
 	f->n = 1;
 	f->ref = 1;
-	if(type == Tdata)
-		f->skiphdr = 1;
-	else
-		f->skiphdr = 0;
-
 	return f;	
 }
 
@@ -229,7 +194,7 @@
 static Ipmux*
 parsemux(char *p)
 {
-	int n, nomask;
+	int n;
 	Ipmux *f;
 	char *val;
 	char *mask;
@@ -247,7 +212,7 @@
 		goto parseerror;
 
 	/* parse mask */
-	mask = follows(val, '&');
+	mask = follows(p, '&');
 	if(mask != nil){
 		switch(f->type){
 		case Tsrc:
@@ -254,7 +219,7 @@
 		case Tdst:
 		case Tifc:
 			f->mask = smalloc(f->len);
-			v4parseip(f->mask, mask);
+			parseipmask(f->mask, mask, 0);
 			break;
 		case Tdata:
 		case Tiph:
@@ -264,15 +229,13 @@
 		default:
 			goto parseerror;
 		}
-		nomask = 0;
-	} else {
-		nomask = 1;
+	} else if(f->type == Tver){
 		f->mask = smalloc(f->len);
-		memset(f->mask, 0xff, f->len);
+		f->mask[0] = 0xF0;
 	}
 
 	/* parse vals */
-	f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+	f->n = getfields(val, vals, nelem(vals), 1, "|");
 	if(f->n == 0)
 		goto parseerror;
 	f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
 	v = f->val;
 	for(n = 0; n < f->n; n++){
 		switch(f->type){
+		case Tver:
+			if(f->n != 1)
+				goto parseerror;
+			if(strcmp(vals[n], "6") == 0)
+				*v = IP_VER6;
+			else if(strcmp(vals[n], "4") == 0)
+				*v = IP_VER4;
+			else
+				goto parseerror;
+			break;
 		case Tsrc:
 		case Tdst:
 		case Tifc:
-			v4parseip(v, vals[n]);
+			if(parseip(v, vals[n]) == -1)
+				goto parseerror;
 			break;
 		case Tproto:
 		case Tdata:
@@ -292,34 +266,11 @@
 		}
 		v += f->len;
 	}
-
-	f->eoff = f->off + f->len;
 	f->e = f->val + f->n*f->len;
-	f->ctype = Cother;
-	if(f->n == 1){
-		switch(f->len){
-		case 1:
-			f->ctype = nomask ? Cbyte : Cmbyte;
-			break;
-		case 2:
-			f->ctype = nomask ? Cshort : Cmshort;
-			break;
-		case 4:
-			if(f->type == Tifc)
-				f->ctype = nomask ? Cifc : Cmifc;
-			else
-				f->ctype = nomask ? Clong : Cmlong;
-			break;
-		}
-	}
 	return f;
 
 parseerror:
-	if(f->mask)
-		free(f->mask);
-	if(f->val)
-		free(f->val);
-	free(f);
+	ipmuxfree(f);
 	return nil;
 }
 
@@ -342,8 +293,7 @@
 		return n;
 
 	/* compare offsets, call earlier ones more specific */
-	n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) - 
-		(b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+	n = a->off - b->off;
 	if(n != 0)
 		return n;
 
@@ -413,6 +363,10 @@
 	*nf = *f;
 	nf->no = ipmuxcopy(f->no);
 	nf->yes = ipmuxcopy(f->yes);
+	if(f->mask != nil){
+		nf->mask = smalloc(f->len);
+		memmove(nf->mask, f->mask, f->len);
+	}
 	nf->val = smalloc(f->n*f->len);
 	nf->e = nf->val + f->len*f->n;
 	memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
 static void
 ipmuxfree(Ipmux *f)
 {
-	if(f->val != nil)
-		free(f->val);
+	if(f == nil)
+		return;
+	free(f->val);
+	free(f->mask);
 	free(f);
 }
 
@@ -432,10 +388,8 @@
 {
 	if(f == nil)
 		return;
-	if(f->no != nil)
-		ipmuxfree(f->no);
-	if(f->yes != nil)
-		ipmuxfree(f->yes);
+	ipmuxfree(f->no);
+	ipmuxfree(f->yes);
 	ipmuxfree(f);
 }
 
@@ -510,6 +464,8 @@
 		return ipmuxremove(&ft->no, f);
 	}
 
+	ipmuxremove(&ft->no, f->no);
+
 	/* we found a match */
 	if(--(ft->ref) == 0){
 		/*
@@ -531,8 +487,55 @@
 }
 
 /*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+	int i, n;
+
+	if(f == nil)
+		return nil;
+
+	switch(f->type){
+	case Tproto:
+		f->off = offsetof(Ip4hdr, proto);
+		break;
+	case Tdst:
+		f->off = offsetof(Ip4hdr, dst[0]);
+		if(0){
+	case Tsrc:
+		f->off = offsetof(Ip4hdr, src[0]);
+		}
+		if(f->len != IPaddrlen)
+			break;
+		n = 0;
+		for(i = 0; i < f->n; i++){
+			if(isv4(f->val + i*IPaddrlen)){
+				memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+				n++;
+			}
+		}
+		if(n == 0){
+			ipmuxtreefree(f);
+			return nil;
+		}
+		f->n = n;
+		f->len = IPv4addrlen;
+		if(f->mask != nil)
+			memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+	}
+	f->e = f->val + f->n*f->len;
+
+	f->yes = ipmuxconv4(f->yes);
+	f->no = ipmuxconv4(f->no);
+
+	return f;
+}
+
+/*
  *  connection request is a semi separated list of filters
- *  e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *  e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
  *
  *  there's no protection against overlapping specs.
  */
@@ -568,6 +571,18 @@
 		return Ebadarg;
 	mux->conv = c;
 
+	if(chain->type != Tver) {
+		char ver6[] = "ver=6";
+		mux = parsemux(ver6);
+		mux->yes = chain;
+		mux->no = ipmuxcopy(chain);
+		chain = mux;
+	}
+	if(*chain->val == IP_VER4)
+		chain->yes = ipmuxconv4(chain->yes);
+	else
+		chain->no = ipmuxconv4(chain->no);
+
 	/* save a copy of the chain so we can later remove it */
 	mux = ipmuxcopy(chain);
 	r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
 	Block *bp;
 
 	bp = qget(c->wq);
-	if(bp == nil)
-		return;
-	else {
+	if(bp != nil) {
 		Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
-		if((ih4->vihl)&0xF0 != 0x60)
+
+		if((ih4->vihl & 0xF0) != IP_VER6)
 			ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
-		else {
-			Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
-			ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
-		}
+		else
+			ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
 	}
 }
 
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+	int i;
+
+	if(m == nil)
+		return memcmp(v, c, n) != 0;
+
+	for(i = 0; i < n; i++)
+		if((v[i] & m[i]) != c[i])
+			return 1;
+	return 0;
+}
+
 static void
 ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
 {
-	int len, hl;
 	Fs *f = p->f;
-	uchar *m, *h, *v, *e, *ve, *hp;
 	Conv *c;
+	Iplifc *lifc;
 	Ipmux *mux;
-	Ip4hdr *ip;
+	uchar *v;
+	Ip4hdr *ip4;
 	Ip6hdr *ip6;
+	int off, hl;
 
-	ip = (Ip4hdr*)bp->rp;
-	hl = (ip->vihl&0x0F)<<2;
+	ip4 = (Ip4hdr*)bp->rp;
+	if((ip4->vihl & 0xF0) == IP_VER4) {
+		hl = (ip4->vihl&0x0F)<<2;
+		ip6 = nil;
+	} else {
+		hl = IP6HDR;
+		ip6 = (Ip6hdr*)ip4;
+	}
 
 	if(p->priv == nil)
 		goto nomatch;
 
-	h = bp->rp;
-	len = BLEN(bp);
+	c = nil;
+	lifc = nil;
 
-	/* run the v4 filter */
+	/* run the filter */
 	rlock(f);
-	c = nil;
 	mux = f->ipmux->priv;
 	while(mux != nil){
-		if(mux->eoff > len){
-			mux = mux->no;
-			continue;
-		}
-		hp = h + mux->off + ((int)mux->skiphdr)*hl;
-		switch(mux->ctype){
-		case Cbyte:
-			if(*mux->val == *hp)
-				goto yes;
+		switch(mux->type){
+		case Tifc:
+			if(mux->len != IPaddrlen)
+				goto no;
+			for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+				for(v = mux->val; v < mux->e; v += IPaddrlen)
+					if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+						goto yes;
+			goto no;
+		case Tdata:
+			off = hl;
 			break;
-		case Cmbyte:
-			if((*hp & *mux->mask) == *mux->val)
-				goto yes;
-			break;
-		case Cshort:
-			if(*((ushort*)mux->val) == *(ushort*)hp)
-				goto yes;
-			break;
-		case Cmshort:
-			if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
-				goto yes;
-			break;
-		case Clong:
-			if(*((ulong*)mux->val) == *(ulong*)hp)
-				goto yes;
-			break;
-		case Cmlong:
-			if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
-		case Cifc:
-			if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
-				goto yes;
-			break;
-		case Cmifc:
-			if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
 		default:
-			v = mux->val;
-			for(e = mux->e; v < e; v = ve){
-				m = mux->mask;
-				hp = h + mux->off;
-				for(ve = v + mux->len; v < ve; v++){
-					if((*hp++ & *m++) != *v)
-						break;
-				}
-				if(v == ve)
-					goto yes;
-			}
+			off = 0;
+			break;
 		}
+		off += mux->off;
+		if(off < 0 || off + mux->len > BLEN(bp))
+			goto no;
+		for(v = mux->val; v < mux->e; v += mux->len)
+			if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+				goto yes;
+no:
 		mux = mux->no;
 		continue;
 yes:
@@ -743,28 +747,24 @@
 	if(c != nil){
 		/* tack on interface address */
 		bp = padblock(bp, IPaddrlen);
-		ipmove(bp->rp, ifc->lifc->local);
-		bp = concatblock(bp);
-		if(bp != nil)
-			if(qpass(c->rq, bp) < 0)
-				print("Q");
+		if(lifc == nil)
+			lifc = ifc->lifc;
+		ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+		qpass(c->rq, concatblock(bp));
 		return;
 	}
 
 nomatch:
 	/* doesn't match any filter, hand it to the specific protocol handler */
-	ip = (Ip4hdr*)bp->rp;
-	if((ip->vihl&0xF0)==0x40) {
-		p = f->t2p[ip->proto];
-	} else {
-		ip6 = (Ip6hdr*)bp->rp;
+	if(ip6 != nil)
 		p = f->t2p[ip6->proto];
-	}
-	if(p && p->rcv)
-		(*p->rcv)(p, ifc, bp);
 	else
-		freeblist(bp);
-	return;
+		p = f->t2p[ip4->proto];
+	if(p != nil && p->rcv != nil){
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+	freeblist(bp);
 }
 
 static int
@@ -780,11 +780,14 @@
 		n += snprint(buf+n, len-n, "\n");
 		return n;
 	}
-	n += snprint(buf+n, len-n, "h[%d:%d]&", 
-               mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])), 
-               mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
-	for(i = 0; i < mux->len; i++)
-		n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	n += snprint(buf+n, len-n, "%s[%d:%d]", 
+		mux->type == Tdata ? "data": "iph",
+		mux->off, mux->off+mux->len-1);
+	if(mux->mask != nil){
+		n += snprint(buf+n, len-n, "&");
+		for(i = 0; i < mux->len; i++)
+			n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	}
 	n += snprint(buf+n, len-n, "=");
 	v = mux->val;
 	for(j = 0; j < mux->n; j++){
--- a/os/ip/iproute.c
+++ b/os/ip/iproute.c
@@ -12,10 +12,10 @@
 static void	calcd(Route*);
 
 /* these are used for all instances of IP */
-Route*	v4freelist;
-Route*	v6freelist;
-RWlock	routelock;
-ulong	v4routegeneration, v6routegeneration;
+static Route*	v4freelist;
+static Route*	v6freelist;
+static RWlock	routelock;
+static ulong	v4routegeneration, v6routegeneration;
 
 static void
 freeroute(Route *r)
@@ -22,6 +22,7 @@
 {
 	Route **l;
 
+	r->ref = 0;
 	r->left = nil;
 	r->right = nil;
 	if(r->type & Rv4)
@@ -35,9 +36,8 @@
 static Route*
 allocroute(int type)
 {
-	Route *r;
+	Route *r, **l;
 	int n;
-	Route **l;
 
 	if(type & Rv4){
 		n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
 		return;
 
 	l = allocroute(r->type);
+	l->left = r;
 	l->mid = *q;
 	*q = l;
-	l->left = r;
 }
 
 /*
@@ -99,11 +99,11 @@
  */
 enum
 {
-	Rpreceeds,
-	Rfollows,
-	Requals,
-	Rcontains,
-	Rcontained,
+	Rpreceeds,	/* a left of b */
+	Rfollows,	/* a right of b */
+	Requals,	/* a equals b */
+	Rcontains,	/* a contians b */
+	Roverlaps,	/* a overlaps b */
 };
 
 static int
@@ -112,44 +112,88 @@
 	if(a->type & Rv4){
 		if(a->v4.endaddress < b->v4.address)
 			return Rpreceeds;
-
 		if(a->v4.address > b->v4.endaddress)
 			return Rfollows;
-
 		if(a->v4.address <= b->v4.address
 		&& a->v4.endaddress >= b->v4.endaddress){
 			if(a->v4.address == b->v4.address
-			&& a->v4.endaddress == b->v4.endaddress)
-				return Requals;
+			&& a->v4.endaddress == b->v4.endaddress){
+				if(a->v4.source <= b->v4.source
+				&& a->v4.endsource >= b->v4.endsource){
+					if(a->v4.source == b->v4.source
+					&& a->v4.endsource == b->v4.endsource)
+						return Requals;
+					return Rcontains;
+				}
+				return Roverlaps;
+			}
 			return Rcontains;
 		}
-		return Rcontained;
+		return Roverlaps;
 	}
 
 	if(lcmp(a->v6.endaddress, b->v6.address) < 0)
 		return Rpreceeds;
-
 	if(lcmp(a->v6.address, b->v6.endaddress) > 0)
 		return Rfollows;
-
 	if(lcmp(a->v6.address, b->v6.address) <= 0
 	&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
 		if(lcmp(a->v6.address, b->v6.address) == 0
-		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
-				return Requals;
+		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+			if(lcmp(a->v6.source, b->v6.source) <= 0
+			&& lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+				if(lcmp(a->v6.source, b->v6.source) == 0
+				&& lcmp(a->v6.endsource, b->v6.endsource) == 0)
+					return Requals;
+				return Rcontains;
+			}
+			return Roverlaps;
+		}
 		return Rcontains;
 	}
+	return Roverlaps;
+}
 
-	return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+	if(a == b)
+		return 1;
+
+	if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+		return 0;
+
+	if(a->type & Rv4){
+		if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+		&& memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+			return 0;
+	} else {
+		if(ipcmp(a->v6.gate, IPnoaddr) != 0
+		&& ipcmp(a->v6.gate, b->v6.gate) != 0)
+			return 0;
+	}
+
+	if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+		return 0;
+
+	if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+		return 0;
+
+	return 1;
 }
 
 static void
 copygate(Route *old, Route *new)
 {
+	old->type = new->type;
+	old->ifc = new->ifc;
+	old->ifcid = new->ifcid;
 	if(new->type & Rv4)
 		memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
 	else
-		memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+		ipmove(old->v6.gate, new->v6.gate);
+	strncpy(old->tag, new->tag, sizeof(new->tag));
 }
 
 /*
@@ -162,12 +206,12 @@
 
 	l = p->left;
 	r = p->right;
-	p->left = 0;
-	p->right = 0;
+	p->left = nil;
+	p->right = nil;
 	addnode(f, root, p);
-	if(l)
+	if(l != nil)
 		walkadd(f, root, l);
-	if(r)
+	if(r != nil)
 		walkadd(f, root, r);
 }
 
@@ -180,16 +224,16 @@
 	Route *q;
 	int d;
 
-	if(p) {
+	if(p != nil) {
 		d = 0;
 		q = p->left;
-		if(q)
+		if(q != nil)
 			d = q->depth;
 		q = p->right;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		q = p->mid;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		p->depth = d+1;
 	}
@@ -210,8 +254,8 @@
 	 * rotate tree node
 	 */
 	p = *cur;
-	dl = 0; if(l = p->left) dl = l->depth;
-	dr = 0; if(r = p->right) dr = r->depth;
+	dl = 0; if((l = p->left) != nil) dl = l->depth;
+	dr = 0; if((r = p->right) != nil) dr = r->depth;
 
 	if(dl > dr+1) {
 		p->left = l->right;
@@ -239,7 +283,7 @@
 	Route *p;
 
 	p = *cur;
-	if(p == 0) {
+	if(p == nil) {
 		*cur = new;
 		new->depth = 1;
 		return;
@@ -269,15 +313,13 @@
 		 *  supercede the old entry if the old one isn't
 		 *  a local interface.
 		 */
-		if((p->type & Rifc) == 0){
-			p->type = new->type;
-			p->ifcid = -1;
+		if((p->type & Rifc) == 0)
 			copygate(p, new);
-		} else if(new->type & Rifc)
+		else if(new->type & Rifc)
 			p->ref++;
 		freeroute(new);
 		break;
-	case Rcontained:
+	case Roverlaps:
 		addnode(f, &p->mid, new);
 		break;
 	}
@@ -285,241 +327,316 @@
 	balancetree(cur);
 }
 
-#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ *  find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
 {
 	Route *p;
-	ulong sa;
-	ulong m;
-	ulong ea;
-	int h, eh;
 
-	m = nhgetl(mask);
-	sa = nhgetl(a) & m;
-	ea = sa | ~m;
+	for(;;){
+		p = *cur;
+		if(p == nil)
+			return nil;
+		switch(rangecompare(r, p)){
+		case Rcontains:
+			return nil;
+		case Rpreceeds:
+			cur = &p->left;
+			break;
+		case Rfollows:
+			cur = &p->right;
+			break;
+		case Roverlaps:
+			cur = &p->mid;
+			break;
+		case Requals:
+			if((p->type & Rifc) == 0 && !matchroute(r, p))
+				return nil;
+			return cur;
+		}
+	}
+}
 
-	eh = V4H(ea);
-	for(h=V4H(sa); h<=eh; h++) {
-		p = allocroute(Rv4 | type);
-		p->v4.address = sa;
-		p->v4.endaddress = ea;
-		memmove(p->v4.gate, gate, sizeof(p->v4.gate));
-		memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+	Route *x;
 
-		wlock(&routelock);
-		addnode(f, &f->v4root[h], p);
-		while(p = f->queue) {
-			f->queue = p->mid;
-			walkadd(f, &f->v4root[h], p->left);
-			freeroute(p);
-		}
-		wunlock(&routelock);
+	if(r == nil)
+		return nil;
+
+	if((x = looknodetag(r->mid, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->left, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->right, tag)) != nil)
+		return x;
+
+	if((r->type & Rifc) == 0){
+		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+			return r;
 	}
-	v4routegeneration++;
 
-	ipifcaddroute(f, Rv4, a, mask, gate, type);
+	return nil;
 }
 
-#define	V6H(a)	(((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
+#define	V6H(a)	(((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
 
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
 {
-	Route *p;
-	ulong sa[IPllen], ea[IPllen];
-	ulong x, y;
-	int h, eh;
+	Route **h, **e, *p;
 
-	/*
-	if(ISDFLT(a, mask, tag))
-		f->v6p->cdrouter = -1;
-	*/
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
 
+	for(; h <= e; h++) {
+		p = allocroute(r->type);
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		sa[h] = x & y;
-		ea[h] = x | ~y;
-	}
+		p->ifc = r->ifc;
+		p->ifcid = r->ifcid;
 
-	eh = V6H(ea);
-	for(h = V6H(sa); h <= eh; h++) {
-		p = allocroute(type);
-		memmove(p->v6.address, sa, IPaddrlen);
-		memmove(p->v6.endaddress, ea, IPaddrlen);
-		memmove(p->v6.gate, gate, IPaddrlen);
-		memmove(p->tag, tag, sizeof(p->tag));
+		if(r->type & Rv4)
+			memmove(&p->v4, &r->v4, sizeof(r->v4));
+		else
+			memmove(&p->v6, &r->v6, sizeof(r->v6));
 
-		wlock(&routelock);
-		addnode(f, &f->v6root[h], p);
-		while(p = f->queue) {
+		memmove(p->tag, r->tag, sizeof(r->tag));
+
+		addnode(f, h, p);
+		while((p = f->queue) != nil) {
 			f->queue = p->mid;
-			walkadd(f, &f->v6root[h], p->left);
+			walkadd(f, h, p->left);
 			freeroute(p);
 		}
-		wunlock(&routelock);
 	}
-	v6routegeneration++;
 
-	ipifcaddroute(f, 0, a, mask, gate, type);
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
 {
-	Route *p;
+	Route **h, **e, **l, *p;
 
-	for(;;){
-		p = *cur;
-		if(p == 0)
-			return 0;
-	
-		switch(rangecompare(r, p)){
-		case Rcontains:
-			return 0;
-		case Rpreceeds:
-			cur = &p->left;
-			break;
-		case Rfollows:
-			cur = &p->right;
-			break;
-		case Rcontained:
-			cur = &p->mid;
-			break;
-		case Requals:
-			return cur;
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
+
+	for(; h <= e; h++) {
+		if((l = looknode(h, r)) == nil)
+			continue;
+		p = *l;
+		if(--(p->ref) != 0)
+			continue;
+		*l = nil;
+		addqueue(&f->queue, p->left);
+		addqueue(&f->queue, p->mid);
+		addqueue(&f->queue, p->right);
+		freeroute(p);
+
+		while((p = f->queue) != nil) {
+			f->queue = p->mid;
+			walkadd(f, h, p->left);
+			freeroute(p);
 		}
 	}
+
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong m;
+	ulong x, y;
+	Route r;
+	int h;
 
-	m = nhgetl(mask);
-	rt.v4.address = nhgetl(a) & m;
-	rt.v4.endaddress = rt.v4.address | ~m;
-	rt.type = Rv4;
+	memset(&r, 0, sizeof(r));
 
-	eh = V4H(rt.v4.endaddress);
-	for(h=V4H(rt.v4.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v4root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v4root[h], p->left);
-					freeroute(p);
-				}
-			}
+	r.type = type;
+
+	if(type & Rv4){
+		x = nhgetl(a+IPv4off);
+		y = nhgetl(mask+IPv4off);
+		r.v4.address = x & y;
+		r.v4.endaddress = x | ~y;
+
+		x = nhgetl(s+IPv4off);
+		y = nhgetl(smask+IPv4off);
+		if(y != 0)
+			r.type |= Rsrc;
+		r.v4.source = x & y;
+		r.v4.endsource = x | ~y;
+
+		memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+	} else {
+		for(h = 0; h < IPllen; h++){
+			x = nhgetl(a+4*h);
+			y = nhgetl(mask+4*h);
+			r.v6.address[h] = x & y;
+			r.v6.endaddress[h] = x | ~y;
+
+			x = nhgetl(s+4*h);
+			y = nhgetl(smask+4*h);
+			if(y != 0)
+				r.type |= Rsrc;
+			r.v6.source[h] = x & y;
+			r.v6.endsource[h] = x | ~y;
 		}
-		if(dolock)
-			wunlock(&routelock);
+
+		memmove(r.v6.gate, gate, IPaddrlen);
 	}
-	v4routegeneration++;
 
-	ipifcremroute(f, Rv4, a, mask);
+	if(ifc != nil){
+		r.ifc = ifc;
+		r.ifcid = ifc->ifcid;
+	}
+
+	if(tag != nil)
+		strncpy(r.tag, tag, sizeof(r.tag));
+
+	return r;
 }
 
 void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong x, y;
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routeadd(f, &r);
+	wunlock(&routelock);
+}
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		rt.v6.address[h] = x & y;
-		rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routerem(f, &r);
+	wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+	uchar local[IPaddrlen], gate[IPaddrlen];
+	Ipifc *ifc;
+	int i;
+
+	ifc = r->ifc;
+	if(ifc != nil && ifc->ifcid == r->ifcid)
+		return ifc;
+
+	if(r->type & Rsrc) {
+		if(r->type & Rv4) {
+			hnputl(local+IPv4off, r->v4.source);
+			memmove(local, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(local+4*i, r->v6.source[i]);
+		}
+	} else {
+		ipmove(local, IPnoaddr);
 	}
-	rt.type = 0;
 
-	eh = V6H(rt.v6.endaddress);
-	for(h=V6H(rt.v6.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v6root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v6root[h], p->left);
-					freeroute(p);
-				}
-			}
+	if(r->type & Rifc) {
+		if(r->type & Rv4) {
+			hnputl(gate+IPv4off, r->v4.address);
+			memmove(gate, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(gate+4*i, r->v6.address[i]);
 		}
-		if(dolock)
-			wunlock(&routelock);
+	} else {
+		if(r->type & Rv4)
+			v4tov6(gate, r->v4.gate);
+		else
+			ipmove(gate, r->v6.gate);
 	}
-	v6routegeneration++;
 
-	ipifcremroute(f, 0, a, mask);
+	if((ifc = findipifc(f, local, gate, r->type)) == nil)
+		return nil;
+
+	r->ifc = ifc;
+	r->ifcid = ifc->ifcid;
+	return ifc;
 }
 
+/*
+ * v4lookup, v6lookup:
+ *  lookup a route to destination address a from source address s
+ *  and return the route. returns nil if no route was found.
+ *  an optional Routehint can be passed in rh to cache the lookup.
+ *
+ *  for v4lookup, addresses are in 4 byte format.
+ */
 Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
+	ulong la, ls;
 	Route *p, *q;
-	ulong la;
-	uchar gate[IPaddrlen];
 	Ipifc *ifc;
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v4routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
 	la = nhgetl(a);
+	ls = nhgetl(s);
 	q = nil;
-	for(p=f->v4root[V4H(la)]; p;)
-		if(la >= p->v4.address) {
-			if(la <= p->v4.endaddress) {
-				q = p;
-				p = p->mid;
-			} else
-				p = p->right;
-		} else
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
 			p = p->left;
-
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			hnputl(gate+IPv4off, q->v4.address);
-			memmove(gate, v4prefix, IPv4off);
-		} else
-			v4tov6(gate, q->v4.gate);
-		ifc = findipifc(f, gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		if(p->type & Rsrc){
+			if(ls < p->v4.source){
+				p = p->mid;
+				continue;
+			}
+			if(ls > p->v4.endsource){
+				p = p->mid;
+				continue;
+			}
+		}
+		q = p;
+		p = p->mid;
 	}
 
-	if(c != nil){
-		c->r = q;
-		c->rgen = v4routegeneration;
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v4routegeneration;
 	}
 
 	return q;
@@ -526,29 +643,35 @@
 }
 
 Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
-	Route *p, *q;
-	ulong la[IPllen];
-	int h;
+	ulong la[IPllen], ls[IPllen];
 	ulong x, y;
-	uchar gate[IPaddrlen];
+	Route *p, *q;
 	Ipifc *ifc;
+	int h;
 
-	if(memcmp(a, v4prefix, IPv4off) == 0){
-		q = v4lookup(f, a+IPv4off, c);
-		if(q != nil)
-			return q;
+	if(isv4(s)){
+		if(isv4(a))
+			return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+		return nil;
 	}
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v6routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
-	for(h = 0; h < IPllen; h++)
+	for(h = 0; h < IPllen; h++){
 		la[h] = nhgetl(a+4*h);
+		ls[h] = nhgetl(s+4*h);
+	}
 
-	q = 0;
-	for(p=f->v6root[V6H(la)]; p;){
+	q = nil;
+	for(p = f->v6root[V6H(la)]; p != nil;){
 		for(h = 0; h < IPllen; h++){
 			x = la[h];
 			y = p->v6.address[h];
@@ -571,42 +694,202 @@
 			}
 			break;
 		}
+		if(p->type & Rsrc){
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.source[h];
+				if(x == y)
+					continue;
+				if(x < y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.endsource[h];
+				if(x == y)
+					continue;
+				if(x > y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+		}
 		q = p;
 		p = p->mid;
 next:		;
 	}
 
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			for(h = 0; h < IPllen; h++)
-				hnputl(gate+4*h, q->v6.address[h]);
-			ifc = findipifc(f, gate, q->type);
-		} else
-			ifc = findipifc(f, q->v6.gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v6routegeneration;
 	}
-	if(c != nil){
-		c->r = q;
-		c->rgen = v6routegeneration;
-	}
 	
 	return q;
 }
 
+/*
+ * v4source, v6source:
+ *  lookup a route to destination address a and also find
+ *  a suitable source address s on the outgoing interface.
+ *  return the route on success or nil when no route
+ *  was found.
+ *
+ *  for v4source, addresses are in 4 byte format.
+ */
+Route*
+v4source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPv4addrlen];
+	int splen;
+	ulong x, la;
+	Route *p, *q;
+	Ipifc *ifc;
+
+	q = nil;
+	la = nhgetl(a);
+	rlock(&routelock);
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
+			p = p->left;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+				splen++;
+			hnputl(src, p->v4.source);
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+			p = p->mid;
+			continue;
+		}
+		memmove(s, src, IPv4addrlen);
+		q = p;
+		p = p->mid;
+	}
+	runlock(&routelock);
+	return q;
+}
+
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPaddrlen];
+	int splen, h;
+	ulong x, y, la[IPllen];
+	Route *p, *q;
+	Ipifc *ifc;
+
+	q = nil;
+	for(h = 0; h < IPllen; h++)
+		la[h] = nhgetl(a+4*h);
+	rlock(&routelock);
+	for(p = f->v6root[V6H(la)]; p != nil;){
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.address[h];
+			if(x == y)
+				continue;
+			if(x < y){
+				p = p->left;
+				goto next;
+			}
+			break;
+		}
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.endaddress[h];
+			if(x == y)
+				continue;
+			if(x > y){
+				p = p->right;
+				goto next;
+			}
+			break;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(h = 0; h < IPllen; h++){
+				hnputl(src+4*h, p->v6.source[h]);
+				if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+					for(; x & 0x80000000UL; x <<= 1)
+						splen++;
+					break;
+				}
+				splen += 32;
+			}
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv6local(ifc, src, splen, a)){
+			p = p->mid;
+			continue;
+		}
+		ipmove(s, src);
+		q = p;
+		p = p->mid;
+next:		;
+	}
+	runlock(&routelock);
+	return q;
+}
+
+static int
+parseroutetype(char *p)
+{
+	int type = 0;
+	switch(*p++){
+	default:	return -1;	
+	case '4':	type |= Rv4;
+	case '6':	break;
+	}
+	for(;;) switch(*p++){
+	default: 
+		return -1;
+	case 'i':
+		if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+		break;
+	case 'u':
+		if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+		break;
+	case 'b':
+		if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+		break;
+	case 'm':
+		if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+		break;
+	case 'p':
+		if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+		break;
+	case '\0':
+		return type;
+	}
+}
+
 void
-routetype(int type, char *p)
+routetype(int type, char p[8])
 {
-	memset(p, ' ', 4);
-	p[4] = 0;
 	if(type & Rv4)
 		*p++ = '4';
 	else
 		*p++ = '6';
+
 	if(type & Rifc)
 		*p++ = 'i';
+
 	if(type & Runi)
 		*p++ = 'u';
 	else if(type & Rbcast)
@@ -613,14 +896,14 @@
 		*p++ = 'b';
 	else if(type & Rmulti)
 		*p++ = 'm';
+
 	if(type & Rptpt)
-		*p = 'p';
+		*p++ = 'p';
+	*p = 0;
 }
 
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
 {
 	int i;
 
@@ -627,8 +910,16 @@
 	if(r->type & Rv4){
 		memmove(addr, v4prefix, IPv4off);
 		hnputl(addr+IPv4off, r->v4.address);
+
 		memset(mask, 0xff, IPv4off);
 		hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+		memmove(src, v4prefix, IPv4off);
+		hnputl(src+IPv4off, r->v4.source);
+
+		memset(smask, 0xff, IPv4off);
+		hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
 		memmove(gate, v4prefix, IPv4off);
 		memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
 	} else {
@@ -635,162 +926,186 @@
 		for(i = 0; i < IPllen; i++){
 			hnputl(addr + 4*i, r->v6.address[i]);
 			hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+			hnputl(src + 4*i, r->v6.source[i]);
+			hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
 		}
 		memmove(gate, r->v6.gate, IPaddrlen);
 	}
+}
 
-	routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+	uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+	char type[8], ifbuf[4], *iname;
 
-	if(r->ifc)
-		*nifc = r->ifc->conv->x;
+	convroute(r, addr, mask, src, smask, gate);
+	routetype(r->type, type);
+	if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+		snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
 	else
-		*nifc = -1;
+		iname = "-";
+	return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+		addr, mask, gate, type, r->tag, iname, src, smask);
 }
 
-/*
- *  this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
 {
-	int nifc, n;
-	char t[5], *iname, ifbuf[5];
-	uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
-	char *p;
+	int	o;
+	int	h;
+	char*	p;
+	char*	e;
+};
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	iname = "-";
-	if(nifc != -1) {
-		iname = ifbuf;
-		snprint(ifbuf, sizeof ifbuf, "%d", nifc);
-	}
-	p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+	int n = seprintroute(rw->p, rw->e, r) - rw->p;
 	if(rw->o < 0){
-		n = p - rw->p;
 		if(n > -rw->o){
-			memmove(rw->p, rw->p-rw->o, n+rw->o);
-			rw->p = p + rw->o;
+			memmove(rw->p, rw->p - rw->o, n + rw->o);
+			rw->p += n + rw->o;
 		}
 		rw->o += n;
 	} else
-		rw->p = p;
+		rw->p += n;
+	return rw->p < rw->e;
 }
 
-/*
- *  recurse descending tree, applying the function in Routewalk
- */
 static int
 rr(Route *r, Routewalk *rw)
 {
 	int h;
 
-	if(rw->e <= rw->p)
-		return 0;
 	if(r == nil)
 		return 1;
-
 	if(rr(r->left, rw) == 0)
 		return 0;
-
 	if(r->type & Rv4)
 		h = V4H(r->v4.address);
 	else
 		h = V6H(r->v6.address);
-
-	if(h == rw->h)
-		rw->walk(r, rw);
-
+	if(h == rw->h){
+		if(rr1(rw, r) == 0)
+			return 0;
+	}
 	if(rr(r->mid, rw) == 0)
 		return 0;
-
 	return rr(r->right, rw);
 }
 
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
 {
+	Routewalk rw[1];
+
+	rw->p = p;
+	rw->e = p+n;
+	rw->o = -offset;
+	if(rw->o > 0)
+		return 0;
+
 	rlock(&routelock);
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
 			if(rr(f->v4root[rw->h], rw) == 0)
 				break;
 	}
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
 			if(rr(f->v6root[rw->h], rw) == 0)
 				break;
 	}
 	runlock(&routelock);
-}
 
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
-	Routewalk rw;
-
-	rw.p = p;
-	rw.e = p+n;
-	rw.o = -offset;
-	rw.walk = sprintroute;
-
-	ipwalkroutes(f, &rw);
-
-	return rw.p - p;
+	return rw->p - p;
 }
 
 /*
- *  this code is not in routeflush to reduce stack size
+ *	4	add	addr	mask	gate
+ *	5	add	addr	mask	gate			ifc
+ *	6	add	addr	mask	gate				src	smask
+ *	7	add	addr	mask	gate			ifc	src	smask
+ *	8	add	addr	mask	gate		tag	ifc	src	smask
+ *	9	add	addr	mask	gate	type	tag	ifc	src	smask
+ *	3	remove	addr	mask
+ *	4	remove	addr	mask	gate
+ *	5	remove	addr	mask					src	smask
+ *	6	remove	addr	mask	gate				src	smask
+ *	7	remove	addr	mask	gate			ifc	src	smask
+ *	8	remove	addr	mask	gate		tag	ifc	src	smask
+ *	9	remove	addr	mask	gate	type	tag	ifc	src	smask
  */
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
 {
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
+	uchar addr[IPaddrlen], mask[IPaddrlen];
+	uchar src[IPaddrlen], smask[IPaddrlen];
 	uchar gate[IPaddrlen];
-	char t[5];
-	int nifc;
+	Ipifc *ifc;
+	char *tag;
+	int type;
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	if(r->type & Rv4)
-		v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
-	else
-		v6delroute(f, addr, mask, dolock);
-}
+	type = 0;
+	tag = nil;
+	ifc = nil;
+	ipmove(gate, IPnoaddr);
+	ipmove(src, IPnoaddr);
+	ipmove(smask, IPnoaddr);
 
-/*
- *  recurse until one route is deleted
- *    returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
-	if(r == nil)
-		return 0;
-	if(routeflush(f, r->mid, tag))
-		return 1;
-	if(routeflush(f, r->left, tag))
-		return 1;
-	if(routeflush(f, r->right, tag))
-		return 1;
-	if((r->type & Rifc) == 0){
-		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
-			delroute(f, r, 0);
-			return 1;
-		}
+	if(argc < 3)
+		error(Ebadctl);
+	if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+		error(Ebadip);
+
+	if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+		if(argc < 4)
+			error(Ebadctl);
+		if(parseip(gate, argv[3]) == -1)
+			error(Ebadip);
 	}
-	return 0;
+	if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+		if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+			error(Ebadip);
+	}
+	if(argc == 5 && strcmp(argv[0], "add") == 0)
+		ifc = findipifcstr(f, argv[4]);
+	if(argc > 6)
+		ifc = findipifcstr(f, argv[argc-3]);
+	if(argc > 7)
+		tag = argv[argc-4];
+	if(argc > 8){
+		if((type = parseroutetype(argv[argc-5])) < 0)
+			error(Ebadctl);
+	} else {
+		if(isv4(addr))
+			type |= Rv4;
+	}
+	if(argc > 9)
+		error(Ebadctl);
+
+	if(type & Rv4){
+		if(!isv4(addr))
+			error(Ebadip);
+		if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+			error(Ebadip);
+		if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+			error(Ebadip);
+	} else {
+		if(isv4(addr))
+			error(Ebadip);
+	}
+
+	return mkroute(addr, mask, src, smask, gate, type, ifc, tag);	
 }
 
 long
 routewrite(Fs *f, Chan *c, char *p, int n)
 {
-	int h, changed;
-	char *tag;
 	Cmdbuf *cb;
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar gate[IPaddrlen];
-	IPaux *a, *na;
+	IPaux *a;
+	Route *x, r;
 
 	cb = parsecmd(p, n);
 	if(waserror()){
@@ -797,54 +1112,44 @@
 		free(cb);
 		nexterror();
 	}
-
+	if(cb->nf < 1)
+		error("short control request");
 	if(strcmp(cb->f[0], "flush") == 0){
-		tag = cb->f[1];
+		char *tag = cb->nf < 2 ? nil : cb->f[1];
+		int h;
+
+		wlock(&routelock);
 		for(h = 0; h < nelem(f->v4root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v4root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v4root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+				routerem(f, &r);
 			}
 		for(h = 0; h < nelem(f->v6root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v6root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v6root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+				routerem(f, &r);
 			}
-	} else if(strcmp(cb->f[0], "remove") == 0){
-		if(cb->nf < 3)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
-		else
-			v6delroute(f, addr, mask, 1);
-	} else if(strcmp(cb->f[0], "add") == 0){
-		if(cb->nf < 4)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		parseip(gate, cb->f[3]);
-		tag = "none";
-		if(c != nil){
+		wunlock(&routelock);
+	} else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+		r = parseroute(f, cb->f, cb->nf);
+		if(*r.tag == 0){
 			a = c->aux;
-			tag = a->tag;
+			strncpy(r.tag, a->tag, sizeof(r.tag));
 		}
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+		wlock(&routelock);
+		if(strcmp(cb->f[0], "add") == 0)
+			routeadd(f, &r);
 		else
-			v6addroute(f, tag, addr, mask, gate, 0);
+			routerem(f, &r);
+		wunlock(&routelock);
 	} else if(strcmp(cb->f[0], "tag") == 0) {
 		if(cb->nf < 2)
 			error(Ebadarg);
-
 		a = c->aux;
-		na = newipaux(a->owner, cb->f[1]);
-		c->aux = na;
+		c->aux = newipaux(a->owner, cb->f[1]);
 		free(a);
-	}
+	} else
+		error(Ebadctl);
 
 	poperror();
 	free(cb);
--- a/os/ip/iprouter.c
+++ /dev/null
@@ -1,56 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-#include	"../ip/ip.h"
-
-IProuter iprouter;
-
-/*
- *  User level routing.  Ip packets we don't know what to do with
- *  come here.
- */
-void
-useriprouter(Fs *f, Ipifc *ifc, Block *bp)
-{
-	qlock(&f->iprouter);
-	if(f->iprouter.q != nil){
-		bp = padblock(bp, IPaddrlen);
-		if(bp == nil)
-			return;
-		ipmove(bp->rp, ifc->lifc->local);
-		qpass(f->iprouter.q, bp);
-	}else
-		freeb(bp);
-	qunlock(&f->iprouter);
-}
-
-void
-iprouteropen(Fs *f)
-{
-	qlock(&f->iprouter);
-	f->iprouter.opens++;
-	if(f->iprouter.q == nil)
-		f->iprouter.q = qopen(64*1024, 0, 0, 0);
-	else if(f->iprouter.opens == 1)
-		qreopen(f->iprouter.q);
-	qunlock(&f->iprouter);
-}
-
-void
-iprouterclose(Fs *f)
-{
-	qlock(&f->iprouter);
-	f->iprouter.opens--;
-	if(f->iprouter.opens == 0)
-		qclose(f->iprouter.q);
-	qunlock(&f->iprouter);
-}
-
-long
-iprouterread(Fs *f, void *a, int n)
-{
-	return qread(f->iprouter.q, a, n);
-}
--- a/os/ip/ipv6.c
+++ b/os/ip/ipv6.c
@@ -8,250 +8,127 @@
 #include	"ip.h"
 #include	"ipv6.h"
 
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= (32*1024),	/* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp)	(((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
-
-typedef struct	IP	IP;
-typedef struct	Fragment4	Fragment4;
-typedef struct	Fragment6	Fragment6;
-typedef struct	Ipfrag	Ipfrag;
-
-Block*		ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void		ipfragfree6(IP*, Fragment6*);
-Fragment6*	ipfragallo6(IP*);
+static Block*		ip6reassemble(IP*, int, Block*);
+static Fragment6*	ipfragallo6(IP*);
+static void		ipfragfree6(IP*, Fragment6*);
+static Block*		procopts(Block *bp);
 static Block*		procxtns(IP *ip, Block *bp, int doreasm);
-int		unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block*		procopts(Block *bp);
+static int		unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
 
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
 {
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
+	v6params *v6p;
 
-	Nstats,
-};
+	v6p = smalloc(sizeof(v6params));
 
-static char *statnames[] =
-{
-[Forwarding]	"Forwarding",
-[DefaultTTL]	"DefaultTTL",
-[InReceives]	"InReceives",
-[InHdrErrors]	"InHdrErrors",
-[InAddrErrors]	"InAddrErrors",
-[ForwDatagrams]	"ForwDatagrams",
-[InUnknownProtos]	"InUnknownProtos",
-[InDiscards]	"InDiscards",
-[InDelivers]	"InDelivers",
-[OutRequests]	"OutRequests",
-[OutDiscards]	"OutDiscards",
-[OutNoRoutes]	"OutNoRoutes",
-[ReasmTimeout]	"ReasmTimeout",
-[ReasmReqds]	"ReasmReqds",
-[ReasmOKs]	"ReasmOKs",
-[ReasmFails]	"ReasmFails",
-[FragOKs]	"FragOKs",
-[FragFails]	"FragFails",
-[FragCreates]	"FragCreates",
-};
+	v6p->rp.mflag		= 0;		/* default not managed */
+	v6p->rp.oflag		= 0;
+	v6p->rp.maxraint	= 600000;	/* millisecs */
+	v6p->rp.minraint	= 200000;
+	v6p->rp.linkmtu		= 0;		/* no mtu sent */
+	v6p->rp.reachtime	= 0;
+	v6p->rp.rxmitra		= 0;
+	v6p->rp.ttl		= MAXTTL;
+	v6p->rp.routerlt	= (3 * v6p->rp.maxraint) / 1000;
 
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
+	v6p->hp.rxmithost	= 1000;		/* v6 RETRANS_TIMER */
 
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
+	f->v6p			= v6p;
+}
 
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
-	int tentative;
-	Ipifc *ifc;
+	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+	int morefrags, blklen, rv = 0;
 	uchar *gate, nexthdr;
-	Ip6hdr *eh;
-	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
-	Route *r, *sr;
-	Fraghdr6 fraghdr;
 	Block *xp, *nb;
+	Fraghdr6 fraghdr;
 	IP *ip;
-	int rv = 0;
+	Ip6hdr *eh;
+	Ipifc *ifc;
+	Route *r;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip6hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip6hdr*)bp->rp;
+	assert(BLEN(bp) >= IP6HDR);
 	len = blocklen(bp);
-	
-	tentative = iptentative(f, eh->src);
-	if(tentative){
-		netlog(f, Logip, "reject tx of packet with tentative src address\n");
-		goto free;
-	}
-
-	if(gating){
-		chunk = nhgets(eh->ploadlen);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk + IPV6HDR_LEN < len)
-			len = chunk + IPV6HDR_LEN;
-	}
-
 	if(len >= IP_MAX){
-//		print("len > IP_MAX, free\n");
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v6lookup(f, eh->dst, c);
-	if(r == nil){
-//		print("no route for %I, src %I free\n", eh->dst, eh->src);
+	r = v6lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v6lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v6.gate;
 
-	if(!gating)
-		eh->vcf[0] = IP_VER6;
-	eh->ttl = ttl;
-	if(!gating) {
-		eh->vcf[0] |= (tos >> 4);
-		eh->vcf[1] = (tos << 4);
-	}
-
-	if(!canrlock(ifc)) {
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
 	}
-
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
 
-	if(ifc->m == nil) {
+	if(ifc->m == nil)
 		goto raise;
+
+	if(!gating){
+		eh->vcf[0] = IP_VER6;
+		eh->vcf[0] |= tos >> 4;
+		eh->vcf[1]  = tos << 4;
 	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		hnputs(eh->ploadlen, len-IPV6HDR_LEN);
-		ifc->m->bwrite(ifc, bp, V6, gate);
+		hnputs(eh->ploadlen, len - IP6HDR);
+		ipifcoput(ifc, bp, V6, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-	if(gating) 
-	if(ifc->reassemble <= 0) {
-
-		/* v6 intermediate nodes are not supposed to fragment pkts;
-		   we fragment if ifc->reassemble is turned on; an exception
-		   needed for nat.
+	if(gating && !ifc->reassemble) {
+		/*
+		 * v6 intermediate nodes are not supposed to fragment pkts;
+		 * we fragment if ifc->reassemble is turned on; an exception
+		 * needed for nat.
 		 */
-
 		ip->stats[OutDiscards]++;
 		icmppkttoobig6(f, ifc, bp);
-		netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
 		goto raise;
 	}
-		
+
 	/* start v6 fragmentation */
-	uflen = unfraglen(bp, &nexthdr, 1);
+	uflen = unfraglen(bp, &nexthdr, 1, 0);
+	if(uflen < IP6HDR || nexthdr == FH) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+		goto raise;
+	}
 	if(uflen > medialen) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
 		goto raise;
 	}
 
@@ -260,7 +137,7 @@
 	if(seglen < 8) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
@@ -271,13 +148,13 @@
 
 	xp = bp;
 	offset = uflen;
-	while (xp != nil && offset && offset >= BLEN(xp)) {
+	while (offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
 	xp->rp += offset;
 
-	fragoff = 0; 
+	fragoff = 0;
 	morefrags = 1;
 
 	for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
 		memmove(nb->wp, eh, uflen);
 		nb->wp += uflen;
 
-		hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+		hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
 		fraghdr.offsetRM[1] |= morefrags;
 		memmove(nb->wp, &fraghdr, IP6FHDR);
 		nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
 		/* Copy data */
 		chunk = seglen;
 		while (chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -316,10 +193,9 @@
 			xp->rp += blklen;
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
-				xp = xp->next; 
+				xp = xp->next;
 		}
-
-		ifc->m->bwrite(ifc, nb, V6, gate);
+		ipifcoput(ifc, nb, V6, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
 	runlock(ifc);
 	poperror();
 free:
-	freeblist(bp);	
+	freeblist(bp);
 	return rv;
 }
 
@@ -335,16 +211,10 @@
 void
 ipiput6(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos;
-	uchar proto;
+	int hl, len, hop, tos;
+	IP *ip;
 	Ip6hdr *h;
 	Proto *p;
-	int notforme;
-	int tentative;
-	uchar v6dst[IPaddrlen];
-	IP *ip;
-	Route *r, *sr;
 
 	ip = f->ip;
 	ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
 			return;
 	}
 
-	h = (Ip6hdr *)(bp->rp);
-
-	memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
-	notforme = ipforme(f, v6dst) == 0;
-	tentative = iptentative(f, v6dst);
-  
-	if(tentative && (h->proto != ICMPv6)) {
-		print("tentative addr, drop\n");
-		freeblist(bp);
-		return;
-	}
-
 	/* Check header version */
-	if(BLKIPVER(bp) != IP_VER6) {
+	h = (Ip6hdr*)bp->rp;
+	if((h->vcf[0] & 0xF0) != IP_VER6) {
 		ip->stats[InHdrErrors]++;
 		netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
-		freeblist(bp);
+		goto drop;
+	}
+	len = IP6HDR + nhgets(h->ploadlen);
+	if((bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
 		return;
 	}
+	h = (Ip6hdr*)bp->rp;
 
 	/* route */
-	if(notforme) {
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
+	if(!ipforme(f, h->dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
+
+		if(!ip->iprouting)
+			goto drop;
+
+		/* don't forward to link-local destinations */
+		if(islinklocal(h->dst) ||
+		   (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+			ip->stats[OutDiscards]++;
+			goto drop;
 		}
+			
 		/* don't forward to source's network */
-		sr = v6lookup(f, h->src, nil);
-		r = v6lookup(f, h->dst, nil);
-
-		if(r == nil || sr == r){
+		rh.r = nil;
+		r  = v6lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
 			icmpttlexceeded6(f, ifc, bp);
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* process headers & reassemble if the interface expects it */
-		bp = procxtns(ip, bp, r->ifc->reassemble);
-
+		bp = procxtns(ip, bp, nifc->reassemble);
 		if(bp == nil)
 			return;
 
 		ip->stats[ForwDatagrams]++;
-		h = (Ip6hdr *) (bp->rp);
-		tos = IPV6CLASS(h);
+		h = (Ip6hdr*)bp->rp;
+		tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
 		hop = h->ttl;
-		ipoput6(f, bp, 1, hop-1, tos, nil);
+		ipoput6(f, bp, 1, hop-1, tos, &rh);
 		return;
 	}
 
 	/* reassemble & process headers if needed */
 	bp = procxtns(ip, bp, 1);
-
 	if(bp == nil)
 		return;
 
-	h = (Ip6hdr *) (bp->rp);
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	h = (Ip6hdr*)bp->rp;
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
 
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -447,20 +318,20 @@
 /*
  * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
  */
-void
+static void
 ipfragfree6(IP *ip, Fragment6 *frag)
 {
 	Fragment6 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	memset(frag->src, 0, IPaddrlen);
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	memset(frag->src, 0, IPaddrlen);
+	memset(frag->dst, 0, IPaddrlen);
 
 	l = &ip->flisthead6;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -470,13 +341,12 @@
 
 	frag->next = ip->fragfree6;
 	ip->fragfree6 = frag;
-
 }
 
 /*
  * ipfragallo6 - copied from ipfragalloc4
  */
-Fragment6*
+static Fragment6*
 ipfragallo6(IP *ip)
 {
 	Fragment6 *f;
@@ -483,7 +353,7 @@
 
 	while(ip->fragfree6 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead6; f->next; f = f->next)
+		for(f = ip->flisthead6; f->next != nil; f = f->next)
 			;
 		ipfragfree6(ip, f);
 	}
@@ -497,108 +367,109 @@
 }
 
 static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
-	int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
 	uchar proto;
-	Ip6hdr *h;
+	int offset;
 
-	h = (Ip6hdr *) (bp->rp);
-	offset = unfraglen(bp, &proto, 0);
-
-	if((proto == FH) && (doreasm != 0)) {
-		bp = ip6reassemble(ip, offset, bp, h);
-		if(bp == nil) 
-			return nil; 
-		offset = unfraglen(bp, &proto, 0);
+	offset = unfraglen(bp, &proto, 0, doreasm);
+	if(offset >= IP6HDR && proto == FH && doreasm) {
+		bp = ip6reassemble(ip, offset, bp);
+		if(bp == nil)
+			return nil;
+		offset = unfraglen(bp, &proto, 0, 0);
+		if(proto == FH)
+			offset = -1;
 	}
-
-	if(proto == DOH || offset > IP6HDR) 
+	if(offset < IP6HDR){
+		ip->stats[InHdrErrors]++;
+		ip->stats[InDiscards]++;
+		freeblist(bp);
+		return nil;
+	}
+	if(proto == DOH || offset > IP6HDR)
 		bp = procopts(bp);
-
 	return bp;
 }
 
-
-/*	returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- *	hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- *	of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- *	field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
  */
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
 {
-	uchar *p, *q;
-	int ufl, hs;
+	uchar *e, *p, *q;
 
+	e = bp->wp;
 	p = bp->rp;
-	q = p+6;	/* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+	q = p+6;   /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
 	*nexthdr = *q;
-	ufl = IP6HDR;
-	p += ufl;
-
-	for(;;) {
-		if(*nexthdr == HBH || *nexthdr == RH) {
-			*nexthdr = *p;
-			hs = ((int)*(p+1) + 1) * 8;
-			ufl += hs;
-			q = p;
-			p += hs;
-		}
-		else
-			break;
+	p += IP6HDR;
+	while(*nexthdr == HBH || *nexthdr == RH){
+		if(p+2 > e)
+			return -1;
+		q = p;
+		*nexthdr = *q;
+		p += ((int)p[1] + 1) * 8;
 	}
-
-	if(*nexthdr == FH)
-		*q = *p;
-
-	if(setfh)
+	if(p > e)
+		return -1;
+	if(*nexthdr == FH){
+		if(p+IP6FHDR > e || *p == FH)
+			return -1;
+		if(popfh)
+			*q = *p;
+	} else if(setfh)
 		*q = FH;
-
-	return ufl;
+	return p - bp->rp;
 }
 
-Block*
+static Block*
 procopts(Block *bp)
 {
 	return bp;
 }
 
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
 {
-
-	int fend, offset;
+	int offset, ovlap, fragsize, len;
+	uchar src[IPaddrlen], dst[IPaddrlen];
 	uint id;
-	Fragment6 *f, *fnext;
+	Block *bl, **l, *prev;
 	Fraghdr6 *fraghdr;
-	uchar src[IPaddrlen], dst[IPaddrlen];
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Fragment6 *f, *fnext;
+	Ipfrag *fp, *fq;
+	Ip6hdr* ih;
 
-	fraghdr = (Fraghdr6 *) (bp->rp + uflen);
-	memmove(src, ih->src, IPaddrlen);
-	memmove(dst, ih->dst, IPaddrlen);
-	id = nhgetl(fraghdr->id);
-	offset = nhgets(fraghdr->offsetRM) & ~7;
-
 	/*
-	 *  block lists are too hard, pullupblock into a single block
+	 *  block lists are too hard, concatblock into a single block
 	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip6hdr *)(bp->rp);
-	}
+	bp = concatblock(bp);
 
+	ih = (Ip6hdr*)bp->rp;
+	fraghdr = (Fraghdr6*)(bp->rp + uflen);
+	id = nhgetl(fraghdr->id);
+	offset = nhgets(fraghdr->offsetRM);
+	fragsize = BLEN(bp) - uflen - IP6FHDR;
 
+	memmove(src, ih->src, IPaddrlen);
+	memmove(dst, ih->dst, IPaddrlen);
+
 	qlock(&ip->fraglock6);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead6; f; f = fnext){
+	for(f = ip->flisthead6; f != nil; f = fnext){
 		fnext = f->next;
-		if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+		if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
 		}
 	}
 
-
 	/*
 	 *  if this isn't a fragmented packet, accept it
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(nhgets(fraghdr->offsetRM)==0) {	// first frag is also the last
+	if((offset & ~6) == 0) {	/* 1st frag is also last */
 		if(f != nil) {
-			ipfragfree6(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree6(ip, f);
 		}
 		qunlock(&ip->fraglock6);
+
+		/* get rid of frag header */
+		memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+		bp->rp += IP6FHDR;
+		ih = (Ip6hdr*)bp->rp;
+		hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset;
-	BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = offset & ~7;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -638,8 +516,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock6);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock6);
+
 		return nil;
 	}
 
@@ -649,7 +528,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -656,15 +535,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock6);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -673,29 +553,27 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 
 		/* Take completely covered segments out */
-
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
-
-			if(ovlap <= 0) 
-				break; 
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, uflen);
-				(*l)->rp += ovlap;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
+			if(ovlap <= 0)
 				break;
+			if(ovlap < fq->flen) {
+				/* move up ip and frag header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
+				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -703,45 +581,55 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-	
-		fraghdr = (Fraghdr6 *) (bl->rp + uflen);
-		if((fraghdr->offsetRM[1] & 1) == 0) {
 
-			bl = f->blist;
+		fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+		if(fraghdr->offsetRM[1] & 1)
+			continue;
 
-			/* get rid of frag header in first fragment */
+		bl = f->blist;
+		fq = (Ipfrag*)bl->base;
 
-			memmove(bl->rp + IP6FHDR, bl->rp, uflen);
-			bl->rp += IP6FHDR;
-			len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
-			bl->wp = bl->rp + len + IP6HDR;
+		/* get rid of frag header in first fragment */
+		memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+		bl->rp += IP6FHDR;
+		len = BLEN(bl);
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += uflen + IP6FHDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
 
-			bl = f->blist;
-			f->blist = nil;
+		if(len >= IP_MAX){
 			ipfragfree6(ip, f);
-			ih = (Ip6hdr*)(bl->rp);
-			hnputs(ih->ploadlen, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock6);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree6(ip, f);
+
+		ih = (Ip6hdr*)bl->rp;
+		hnputs(ih->ploadlen, len-IP6HDR);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock6);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock6);
 	return nil;
 }
-
--- a/os/ip/ipv6.h
+++ b/os/ip/ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration.  rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
 #define isv6mcast(addr)	  ((addr)[0] == 0xff)
 #define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
 
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np)	(nhgets((np)->ploadlen) > 24)
+#define issmcast(addr)	(memcmp((addr), v6solicitednode, 13) == 0)
 
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
 
-typedef struct Ip6hdr     Ip6hdr;
-typedef struct Opthdr     Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6    Fraghdr6;
-
-struct Ip6hdr {
-	uchar vcf[4];       	// version:4, traffic class:8, flow label:20
-	uchar ploadlen[2];  	// payload length: packet length - 40
-	uchar proto;		// next header type
-	uchar ttl;          	// hop limit
-	uchar src[IPaddrlen];
-	uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
-	uchar nexthdr;
-	uchar len;
-};
-
-struct Routinghdr {
-	uchar nexthdr;
-	uchar len;
-	uchar rtetype;
-	uchar segrem;
-};
-
-struct Fraghdr6 {
-	uchar nexthdr;
-	uchar res;
-	uchar offsetRM[2];	// Offset, Res, M flag
-	uchar id[4];
-};
-
-
-enum {			/* Header Types */
-	HBH		= 0,	//?
+enum {				/* Header Types */
+	HBH		= 0,	/* hop-by-hop multicast routing protocol */
 	ICMP		= 1,
 	IGMP		= 2,
 	GGP		= 3,
@@ -72,89 +50,113 @@
 	Maxhdrtype	= 256,
 };
 
-
 enum {
-	//	multicast flgs and scop
+	/* multicast flags and scopes */
 
-	well_known_flg				= 0,
-	transient_flg				= 1,
+//	Well_known_flg	= 0,
+//	Transient_flg	= 1,
 
-	node_local_scop 			= 1,
-	link_local_scop 			= 2,
-	site_local_scop 			= 5,
-	org_local_scop				= 8,
-	global_scop				= 14,
+//	Interface_local_scop = 1,
+	Link_local_scop	= 2,
+//	Site_local_scop	= 5,
+//	Org_local_scop	= 8,
+	Global_scop	= 14,
 
-	//	various prefix lengths
+	/* various prefix lengths */
+	SOLN_PREF_LEN	= 13,
 
-	SOLN_PREF_LEN				= 13,
+	/* icmpv6 unreachability codes */
+	Icmp6_no_route		= 0,
+	Icmp6_ad_prohib		= 1,
+	Icmp6_out_src_scope	= 2,
+	Icmp6_adr_unreach	= 3,
+	Icmp6_port_unreach	= 4,
+	Icmp6_gress_src_fail	= 5,
+	Icmp6_rej_route		= 6,
+	Icmp6_unknown		= 7,  /* our own invention for internal use */
 
-	//	icmpv6 unreach codes
-	icmp6_no_route				= 0,
-	icmp6_ad_prohib				= 1,
-	icmp6_unassigned			= 2,
-	icmp6_adr_unreach			= 3,
-	icmp6_port_unreach			= 4,
-	icmp6_unkn_code				= 5,
+	/* various flags & constants */
+	v6MINTU		= 1280,
+	HOP_LIMIT	= 255,
+	IP6HDR		= 40,		/* sizeof(Ip6hdr) = 8 + 2*16 */
+	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
 
-	// 	various flags & constants
+	/* option types */
 
-	v6MINTU      				= 1280,
-	HOP_LIMIT    				= 255,
-	ETHERHDR_LEN 				= 14,
-	IPV6HDR_LEN  				= 40,
-	IPV4HDR_LEN  				= 20,
+	/* neighbour discovery */
+	SRC_LLADDR	= 1,
+	TARGET_LLADDR	= 2,
+	PREFIX_INFO	= 3,
+	REDIR_HEADER	= 4,
+	MTU_OPTION	= 5,
+	/* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+	V6nd_home	= 8,
+	V6nd_srcaddrs	= 9,		/* rfc3122 */
+	V6nd_ip		= 17,
+	/* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+	V6nd_rdns	= 25,
+	/* plan 9 extensions */
+	V6nd_9fs	= 250,
+	V6nd_9auth	= 251,
 
-	// 	option types
+	SRC_UNSPEC	= 0,
+	SRC_UNI		= 1,
+	TARG_UNI	= 2,
+	TARG_MULTI	= 3,
 
-	SRC_LLADDRESS    			= 1,
-	TARGET_LLADDRESS 			= 2,
-	PREFIX_INFO      			= 3,
-	REDIR_HEADER     			= 4,
-	MTU_OPTION       			= 5,
+	Tunitent	= 1,
+	Tuniproxy	= 2,
+	Tunirany	= 3,
 
-	SRC_UNSPEC  				= 0,
-	SRC_UNI     				= 1,
-	TARG_UNI    				= 2,
-	TARG_MULTI  				= 3,
+	/* Node constants */
+	MAX_MULTICAST_SOLICIT	= 3,
+	RETRANS_TIMER		= 1000,
+};
 
-	t_unitent   				= 1,
-	t_uniproxy  				= 2,
-	t_unirany   				= 3,
+typedef struct Ip6hdr	Ip6hdr;
+typedef struct Opthdr	Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6	Fraghdr6;
 
-	//	Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */\
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */ \
+	uchar	proto;		/* next header type */ \
+	uchar	ttl;		/* hop limit */ \
+	uchar	src[IPaddrlen]; \
+	uchar	dst[IPaddrlen]
 
-	MAX_INITIAL_RTR_ADVERT_INTERVAL 	= 16000,
-	MAX_INITIAL_RTR_ADVERTISEMENTS  	= 3,
-	MAX_FINAL_RTR_ADVERTISEMENTS    	= 3,
-	MIN_DELAY_BETWEEN_RAS 			= 3000,
-	MAX_RA_DELAY_TIME     			= 500,
+struct	Ip6hdr {
+	IPV6HDR;
+	uchar	payload[];
+};
 
-	//	Host constants
+struct	Opthdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+};
 
-	MAX_RTR_SOLICITATION_DELAY 		= 1000,
-	RTR_SOLICITATION_INTERVAL  		= 4000,
-	MAX_RTR_SOLICITATIONS      		= 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused.  Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct	Routinghdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+	uchar	rtetype;
+	uchar	segrem;
+};
 
-	//	Node constants
-
-	MAX_MULTICAST_SOLICIT   		= 3,
-	MAX_UNICAST_SOLICIT     		= 3,
-	MAX_ANYCAST_DELAY_TIME  		= 1000,
-	MAX_NEIGHBOR_ADVERTISEMENT 		= 3,
-	REACHABLE_TIME 				= 30000,
-	RETRANS_TIMER  				= 1000,
-	DELAY_FIRST_PROBE_TIME 			= 5000,
-
+struct	Fraghdr6 {
+	uchar	nexthdr;
+	uchar	res;
+	uchar	offsetRM[2];	/* Offset, Res, M flag */
+	uchar	id[4];
 };
 
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
 extern uchar v6allnodesN[IPaddrlen];
 extern uchar v6allnodesL[IPaddrlen];
 extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
 extern uchar v6allroutersL[IPaddrlen];
 extern uchar v6allnodesNmask[IPaddrlen];
 extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
 extern uchar v6solicitednode[IPaddrlen];
 extern uchar v6solicitednodemask[IPaddrlen];
 extern uchar v6Unspecified[IPaddrlen];
 extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
 extern uchar v6linklocal[IPaddrlen];
 extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
 extern uchar v6multicast[IPaddrlen];
 extern uchar v6multicastmask[IPaddrlen];
 
 extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
 extern int v6mcpreflen;
 extern int v6snpreflen;
 extern int v6aNpreflen;
@@ -184,3 +179,10 @@
 extern int v6aLpreflen;
 
 extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
--- a/os/ip/kernel.h
+++ /dev/null
@@ -1,10 +1,0 @@
-extern	int	kclose(int);
-extern	int	kdial(char*, char*, char*, int*);
-extern	int	kannounce(char*, char*);
-extern	void	kerrstr(char*);
-extern	void	kgerrstr(char*);
-extern	int	kopen(char*, int);
-extern	long	kread(int, void*, long);
-extern	long	kseek(int, vlong, int);
-extern	long	kwrite(int, void*, long);
-extern	void	kwerrstr(char *, ...);
--- a/os/ip/loopbackmedium.c
+++ b/os/ip/loopbackmedium.c
@@ -28,13 +28,12 @@
 	LB *lb;
 
 	lb = smalloc(sizeof(*lb));
+	lb->readp = (void*)-1;
 	lb->f = ifc->conv->p->f;
-	/* TO DO: make queue size a function of kernel memory */
-	lb->q = qopen(128*1024, Qmsg, nil, nil);
+	lb->q = qopen(1024*1024, Qmsg, nil, nil);
 	ifc->arg = lb;
-	ifc->mbps = 1000;
 
-	kproc("loopbackread", loopbackread, ifc, 0);
+	kproc("loopbackread", loopbackread, ifc);
 
 }
 
@@ -43,13 +42,29 @@
 {
 	LB *lb = ifc->arg;
 
-	if(lb->readp)
+	while(waserror())
+		;
+
+	/* wat for reader to start */
+	while(lb->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+		
+	if(lb->readp != nil)
 		postnote(lb->readp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for reader to die */
-	while(lb->readp != 0)
+	while(lb->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	/* clean up */
 	qfree(lb->q);
 	free(lb);
@@ -76,23 +91,14 @@
 	ifc = a;
 	lb = ifc->arg;
 	lb->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		lb->readp = 0;
-		pexit("hangup", 1);
-	}
-	for(;;){
-		bp = qbread(lb->q, Maxtu);
-		if(bp == nil)
-			continue;
-		ifc->in++;
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+	if(!waserror())
+	while((bp = qbread(lb->q, Maxtu)) != nil){
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
+		ifc->in++;
 		if(ifc->lifc == nil)
 			freeb(bp);
 		else
@@ -100,6 +106,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	lb->readp = nil;
+	pexit("hangup", 1);
 }
 
 Medium loopbackmedium =
--- a/os/ip/nat.c
+++ /dev/null
@@ -1,549 +1,0 @@
-#include		"u.h"
-#include		"../port/lib.h"
-#include		"mem.h"
-#include		"dat.h"
-#include		"fns.h"
-#include		"../port/error.h"
-
-#include		"ip.h"
-
-typedef struct NatProto NatProto;
-typedef struct NatAddr NatAddr;
-
-/*
- * NAT.
- */
-struct Nat
-{
-	uchar	src[IPv4addrlen];	/* Source address */
-	uchar	sport[2];		/* Source port */
-	uchar	lport[2];		/* Local port */
-	uchar	proto;			/* Protocol */
-	long	time;			/* Time */
-	Conv	*conv;			/* Conversation */
-	Nat	*next;			/* Next node */
-};
-
-/*
- * Protocol list.
- */
-struct NatProto
-{
-	uchar	proto;			/* Protocol */
-	int	sport;			/* Source port offset */
-	int	dport;			/* Destination port offset */
-	int	cksum;			/* Checksum offset */
-	int	timeout;		/* Timeout */
-};
-
-/*
- * Address list.
- */
-struct NatAddr
-{
-	uchar	src[IPaddrlen];		/* Source address */
-	uchar	mask[IPaddrlen];	/* Source address mask */
-	uchar	net[IPaddrlen];		/* Source network address */
-	Iplifc	*dst;			/* Destination interface */
-	NatAddr	*next;			/* Next node */
-};
-
-static Nat *head = nil;
-static NatAddr *addrhead = nil;
-
-/*
- * Timeouts for ICMP, TCP and UDP are respectively confirmed
- * in RFC 5508, RFC 5382 and RFC 4787.
- */
-static NatProto prototab[] =
-{
-	{ 1, 4, 4, 2, 60*1000 },		/* ICMP */
-	{ 6, 0, 2, 16, (2*60*60+4*60)*1000 },	/* TCP */
-	{ 17, 0, 2, 6, 2*60*1000 },		/* UDP */
-	{ 40, 6, 8, 0, 10*30*1000 },		/* IL */
-	{ 255, 0, 2, 6, 2*60*1000 },		/* RUDP */
-	{ 0 }
-};
-
-NatProto*	parseproto(uchar);
-void		natprepend(Nat*);
-Nat*		natexistout(uchar*, uchar, uchar*);
-Nat*		natexistin(uchar, uchar*);
-int		natdelete(uchar*, uchar, uchar*);
-int		natpurge(uchar);
-Nat*		natlport(Proto*, Ip4hdr*, uchar*);
-int		natgc(uchar);
-void		checksumadjust(uchar*, uchar*, int, uchar*, int);
-Iplifc*		natonifco(Ipifc*, Ip4hdr*);
-Iplifc*		natonifci(Ipifc*);
-void		nataddrprepend(NatAddr*);
-NatAddr*	nataddrexist(uchar*, uchar*, Iplifc*);
-int		addnataddr(uchar*, uchar*, Iplifc*);
-int		removenataddr(uchar*, uchar*, Iplifc*);
-void		shownataddr(void);
-void		flushnataddr(void);
-
-/*
- * Return protocol attributes if known.
- */
-NatProto*
-parseproto(uchar proto)
-{
-	NatProto *np;
-
-	for(np = prototab; np->proto; np++)
-		if(proto == np->proto)
-			return np;
-
-	return nil;
-}
-
-/*
- * Output NAT.
- * Return -1 if the packet must be NATed but the protocol is unknown.
- */
-int
-nato(Block *b, Ipifc *ifc, Fs *f)
-{
-	Nat *n;		/* NAT table */
-	NatProto *np;	/* Protocol list */
-	Iplifc *lifc;	/* Logical interface */
-	Ip4hdr *h;	/* Source IPv4 header */
-	Proto *p;	/* New protocol */
-	uchar *laddr;	/* Local address on Iplifc */
-	uchar *sport;	/* Source port */
-	uchar *cksum;	/* Source checksum */
-
-	h = (Ip4hdr*)(b->rp);
-
-	/* Verify on which logical interface NAT is enabled,
-           and if this source address must be translated */
-	if((lifc=natonifco(ifc, h)) == nil)
-		return 0;
-
-	laddr = lifc->local+IPv4off;
-	p = Fsrcvpcolx(f, h->proto);
-
-	if(ip4cmp(h->src, laddr) != 0){
-		if((np=parseproto(h->proto)) != nil){
-			/* Protocol layer */
-			sport = (b->rp)+sizeof(Ip4hdr)+np->sport;
-			cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
-			if((n = natlport(p, h, sport)) == nil)
-				return -1;
-			memmove(sport, n->lport, 2);
-			checksumadjust(cksum, n->sport, 2, n->lport, 2);
-			if(np->proto != 1)
-				/* ICMP checksum doesn't include IP header */
-				checksumadjust(cksum, n->src, IPv4addrlen,
-					laddr, IPv4addrlen);
-			/* IP layer */
-			ip4move(h->src, laddr);
-			checksumadjust(h->cksum, n->src, IPv4addrlen,
-				h->src, IPv4addrlen);
-			return 0;
-		}else{
-			netlog(f, Lognat, "nat: unknown protocol %d\n", h->proto);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Input NAT.
- */
-void
-nati(Block *b, Ipifc *ifc)
-{
-	Nat *n;		/* NAT table */
-	NatProto *np;	/* Protocol list */
-	Ip4hdr *h;	/* Source IPv4 header */
-	uchar *lport;	/* Our local port, and dst port for the packet */
-	uchar *cksum;	/* Source checksum */
-
-	h = (Ip4hdr*)(b->rp);
-
-	/* Verify if NAT is enabled on this interface */
-	if(natonifci(ifc) == nil)
-		return;
-
-	if((np=parseproto(h->proto)) != nil){
-		lport = (b->rp)+sizeof(Ip4hdr)+np->dport;
-		if((n=natexistin(h->proto, lport)) != nil){
-			/* Protocol layer */
-			cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
-			checksumadjust(cksum, lport, 2, n->sport, 2);
-			memmove(lport, n->sport, 2);
-			if(np->proto != 1)
-				/* ICMP checksum doesn't include IP header */
-		   		checksumadjust(cksum, h->dst, IPv4addrlen,
-					n->src, IPv4addrlen);
-			/* IP layer */
-			checksumadjust(h->cksum, h->dst, IPv4addrlen,
-				n->src, IPv4addrlen);
-			ip4move(h->dst, n->src);
-		}
-	}
-}
-
-/*
- * Add Nat to Nat list.
- */
-void
-natprepend(Nat *n)
-{
-	n->next = head;
-	head = n;
-}
-
-/*
- * Return Nat if it exists in Nat list.
- */
-Nat*
-natexistout(uchar *src, uchar proto, uchar *sport)
-{
-	Nat *c;		/* Current node */
-
-	for(c=head; c!=nil; c=c->next)
-		if(ip4cmp(src, c->src) == 0 &&
-			memcmp(sport, c->sport, 2) == 0 &&
-			proto == c->proto){
-			c->time = NOW;
-			return c;
-		}
-
-	return nil;
-}
-
-/*
- * Return Nat if it exists in Nat list.
- */
-Nat*
-natexistin(uchar proto, uchar *lport)
-{
-	Nat *c;		/* Current node */
-
-	for(c=head; c!=nil; c=c->next)
-		if(memcmp(lport, c->lport, 2) == 0 &&
-			proto == c->proto){
-			c->time = NOW;
-			return c;
-		}
-
-	return nil;
-}
-
-/*
- * Delete Nat in Nat list.
- * Return -1 if it doesn't exist.
- */
-int
-natdelete(uchar src[IPv4addrlen], uchar proto, uchar sport[2])
-{
-	Nat *p;		/* Precedent node */
-	Nat *c;		/* Current node */
-
-	for(p=nil, c=head; c!=nil; p=c, c=c->next)
-		if(ip4cmp(src, c->src) == 0 &&
-			memcmp(sport, c->sport, 2) == 0 &&
-			proto == c->proto)
-			break;
-
-	if(c == nil)
-		return -1;
-
-	if(p == nil)
-		head = head->next;
-	else
-		p->next = c->next;
-
-	closeconv(c->conv);
-	free(c);
-
-	return 0;
-}
-
-/*
- * Purge Nat list.
- */
-int
-natpurge(uchar proto)
-{
-	Nat *c;		/* Current node */
-	int n;		/* Number of purged connections */
-
-	for(n = 0;; n++){
-		do{
-			if((c = head) == nil)
-				return n;
-			head = head->next;
-		}while(c->proto != proto);
-		closeconv(c->conv);
-		free(c);
-	}
-}
-
-/*
- * Create a new Nat if necessary.
- */
-Nat*
-natlport(Proto *p, Ip4hdr *h, uchar *sport)
-{
-	Nat *n;		/* New NAT node */
-	Conv *s;	/* New conversation */
-
-	if((n=natexistout(h->src, h->proto, sport)) == nil){
-		qlock(p);
-		s = Fsprotoclone(p, "network");
-		qunlock(p);
-		if(s == nil){
-			error(Enodev);
-			return nil;
-		}
-		setlport(s);
-		n = malloc(sizeof(Nat));
-		ip4move(n->src, h->src);
-		memmove(n->sport, sport, 2);
-		memmove(n->lport, &s->lport, 2);
-		n->proto = h->proto;
-		n->time = NOW;
-		n->conv = s;
-		natprepend(n);
-	}
-
-	return n;
-}
-
-/*
- * Nat list garbage collector.
- */
-int
-natgc(uchar proto){
-	Nat *p;		/* Precedent node */
-	Nat *c;		/* Current node */
-	NatProto *np;	/* Protocol list */
-	int n;		/* Number of garbage collected connections */
-
-	n = 0;
-	p = nil;
-	c = head;
-
-	np = parseproto(proto);
-
-	while(c != nil){
-		if(NOW - c->time > np->timeout){
- 			if(p == nil){
- 				head = head->next;
-				if(proto == c->proto)
-					n++;
-				closeconv(c->conv);
-				free(c);
-				p = nil;
-				c = head;
- 			}else{
- 				p->next = c->next;
-				if(proto == c->proto)
-					n++;
-				closeconv(c->conv);
-				free(c);
- 				c = p->next;
- 			}
-		}else{
-			p = c;
-			c = c->next;
-		}
-	}
-
-	if(n == 0)	/* Prevent Conv saturation */
-		n = natpurge(proto);
-
-	return n;
-}
-
-/*
- * Function checksumadjust from RFC 3022.
- */
-void
-checksumadjust(uchar *chksum, uchar *optr, int olen, uchar *nptr, int nlen)
-{
-	long x, old, new;
-
-	x=chksum[0]*256+chksum[1];
-	x=~x & 0xffff;
-	while(olen){
-		old=optr[0]*256+optr[1];
-		optr+=2;
-		x-=old & 0xffff;
-		if(x<=0){
-			x--;
-			x&=0xffff;
-		}
-		olen-=2;
-	}
-	while(nlen){
-		new=nptr[0]*256+nptr[1];
-		nptr+=2;
-		x+=new & 0xffff;
-		if(x & 0x10000){
-			x++;
-			x&=0xffff;
-		}
-		nlen-=2;
-	}
-	x=~x & 0xffff;
-	chksum[0]=x/256;
-	chksum[1]=x & 0xff;
-}
-
-/*
- * Add NatAddr to NatAddr list.
- */
-void
-nataddrprepend(NatAddr *na)
-{
-	na->next = addrhead;
-	addrhead = na;
-}
-
-/*
- * Return NatAddr if it exists in NatAddr list.
- */
-NatAddr*
-nataddrexist(uchar *src, uchar *mask, Iplifc *dst)
-{
-	NatAddr *c;	/* Current node */
-
-	for(c=addrhead; c!=nil; c=c->next)
-		if(ipcmp(src, c->src) == 0 &&
-			ipcmp(mask, c->mask) == 0 &&
-			dst == c->dst)
-			return c;
-
-	return nil;
-}
-
-/*
- * Create a new NatAddr.
- * Return -1 if it already exist.
- */
-int
-addnataddr(uchar *src, uchar *mask, Iplifc *dst)
-{
-	NatAddr *na;		/* New address node */
-	uchar net[IPaddrlen];	/* Network address */
-
-	maskip(src, mask, net);
-
-	if(nataddrexist(src, mask, dst) != nil)
-		return -1;
-
-	na = malloc(sizeof(NatAddr));
-	ipmove(na->src, src);
-	ipmove(na->mask, mask);
-	ipmove(na->net, net);
-	na->dst = dst;
-
-	nataddrprepend(na);
-
-	return 0;
-}
-
-/*
- * Remove a NatAddr.
- * Return -1 if it doesn't exist.
- */
-int
-removenataddr(uchar *src, uchar *mask, Iplifc *dst)
-{
-	NatAddr *c;	/* Current node */
-	NatAddr *p;	/* Precedent node */
-
-	for(p=nil, c=addrhead; c!=nil; p=c, c=c->next)
-		if(ipcmp(src, c->src) == 0 &&
-			ipcmp(mask, c->mask) == 0 &&
-			dst == c->dst)
-			break;
-
-	if(c == nil)
-		return -1;
-
-	if(p == nil)
-		addrhead = addrhead->next;
-	else
-		p->next = c->next;
-
-	return 0;
-}
-
-/*
- * Display NatAddr list.
- */
-void
-shownataddr(void)
-{
-	NatAddr *c;	/* Current node */
-
-	for(c=addrhead; c!=nil; c=c->next)
-		print("%I %V %I\n", c->src, c->mask+IPv4off, c->dst->local);
-}
-
-/*
- * Flush NatAddr list.
- */
-void
-flushnataddr(void)
-{
-	NatAddr *c;	/* Current node */
-
-	while((c=addrhead) != nil){
-		addrhead = addrhead->next;
-		free(c);
-	}
-}
-
-/*
- * Return logical interface if NAT is enabled on this interface,
- * and the source address must be translated.
- */
-Iplifc*
-natonifco(Ipifc *ifc, Ip4hdr* h)
-{
-	NatAddr *na;		/* Address list */
-	Iplifc *lifc;		/* Logical interface */
-	uchar src[IPaddrlen];	/* Source address */
-	uchar net[IPaddrlen];	/* Source network address */
-
-	for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
-		for(na=addrhead; na; na=na->next)
-			if(lifc == na->dst){
-				/* NAT enabled on this logical interface */
-				v4tov6(src, h->src);
-				maskip(src, na->mask, net);
-				if(ipcmp(net, na->net) == 0)
-					/* Source address must be translated */
-					return lifc;
-			}
-
-	return nil;
-}
-
-/*
- * Return logical interface if NAT is enabled on this interface.
- */
-Iplifc*
-natonifci(Ipifc *ifc)
-{
-	NatAddr *na;		/* Address list */
-	Iplifc *lifc;		/* Logical interface */
-
-	for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
-		for(na=addrhead; na; na=na->next)
-			if(lifc == na->dst){
-				/* NAT enabled on this logical interface */
-				return lifc;
-			}
-
-	return nil;
-}
--- a/os/ip/netdevmedium.c
+++ b/os/ip/netdevmedium.c
@@ -49,12 +49,13 @@
 	mchan = namec(argv[2], Aopen, ORDWR, 0);
 
 	er = smalloc(sizeof(*er));
+	er->readp = (void*)-1;
 	er->mchan = mchan;
 	er->f = ifc->conv->p->f;
 
 	ifc->arg = er;
 
-	kproc("netdevread", netdevread, ifc, 0);
+	kproc("netdevread", netdevread, ifc);
 }
 
 /*
@@ -65,13 +66,29 @@
 {
 	Netdevrock *er = ifc->arg;
 
+	while(waserror())
+		;
+
+	/* wait for reader to start */
+	while(er->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
 	if(er->readp != nil)
 		postnote(er->readp, 1, "unbind", 0);
 
-	/* wait for readers to die */
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
+	/* wait for reader to die */
 	while(er->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan != nil)
 		cclose(er->mchan);
 
@@ -86,8 +103,6 @@
 {
 	Netdevrock *er = ifc->arg;
 
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 
@@ -104,34 +119,22 @@
 	Ipifc *ifc;
 	Block *bp;
 	Netdevrock *er;
-	char *argv[1];
 
 	ifc = a;
 	er = ifc->arg;
 	er->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->readp = nil;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
 		if(bp == nil){
-			/*
-			 * get here if mchan is a pipe and other side hangs up
-			 * clean up this interface & get out
-ZZZ is this a good idea?
-			 */
 			poperror();
-			er->readp = nil;
-			argv[0] = "unbind";
-			if(!waserror())
+			if(!waserror()){
+				static char *argv[]  = { "unbind" };
 				ifc->conv->p->ctl(ifc->conv, argv, 1);
-			pexit("hangup", 1);
+			}
+			break;
 		}
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
@@ -144,6 +147,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	er->readp = nil;
+	pexit("hangup", 1);
 }
 
 void
--- a/os/ip/netlog.c
+++ b/os/ip/netlog.c
@@ -7,7 +7,7 @@
 #include	"../ip/ip.h"
 
 enum {
-	Nlog		= 4*1024,
+	Nlog		= 16*1024,
 };
 
 /*
@@ -39,12 +39,12 @@
 	{ "ppp",	Logppp, },
 	{ "ip",		Logip, },
 	{ "fs",		Logfs, },
-	{ "tcp",	Logtcp, },
 	{ "il",		Logil, },
+	{ "tcp",	Logtcp, },
 	{ "icmp",	Logicmp, },
 	{ "udp",	Logudp, },
 	{ "compress",	Logcompress, },
-	{ "ilmsg",	Logil|Logilmsg, },
+	{ "logilmsg",	Logilmsg, },
 	{ "gre",	Loggre, },
 	{ "tcpwin",	Logtcp|Logtcpwin, },
 	{ "tcprxmt",	Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
 		nexterror();
 	}
 	if(f->alog->opens == 0){
-		if(f->alog->buf == nil)
+		if(f->alog->buf == nil){
 			f->alog->buf = malloc(Nlog);
+			if(f->alog->buf == nil)
+				error(Enomem);
+		}
 		f->alog->rptr = f->alog->buf;
 		f->alog->end = f->alog->buf + Nlog;
 	}
@@ -202,6 +205,7 @@
 		else
 			f->alog->iponlyset = 1;
 		free(cb);
+		poperror();
 		return;
 
 	default:
@@ -227,7 +231,7 @@
 void
 netlog(Fs *f, int mask, char *fmt, ...)
 {
-	char buf[128], *t, *fp;
+	char buf[256], *t, *fp;
 	int i, n;
 	va_list arg;
 
--- a/os/ip/nullmedium.c
+++ b/os/ip/nullmedium.c
@@ -19,8 +19,9 @@
 }
 
 static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
 {
+	freeb(bp);
 	error("nullbwrite");
 }
 
--- a/os/ip/pktmedium.c
+++ b/os/ip/pktmedium.c
@@ -16,10 +16,10 @@
 Medium pktmedium =
 {
 .name=		"pkt",
-.hsize=		14,
-.mintu=		40,
+.hsize=		0,
+.mintu=		0,
 .maxtu=		4*1024,
-.maclen=	6,
+.maclen=	0,
 .bind=		pktbind,
 .unbind=	pktunbind,
 .bwrite=	pktbwrite,
@@ -28,12 +28,13 @@
 };
 
 /*
- *  called to bind an IP ifc to an ethernet device
+ *  called to bind an IP ifc to an packet device
  *  called with ifc wlock'd
  */
 static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
 {
+	USED(argc, argv);
 }
 
 /*
@@ -51,7 +52,6 @@
 pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
 {
 	/* enqueue onto the conversation's rq */
-	bp = concatblock(bp);
 	if(ifc->conv->snoopers.ref > 0)
 		qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
 	qpass(ifc->conv->rq, bp);
--- a/os/ip/plan9.c
+++ /dev/null
@@ -1,36 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-#include	"ip.h"
-
-/*
- *  some hacks for commonality twixt inferno and plan9
- */
-
-char*
-commonuser(void)
-{
-	return up->env->user;
-}
-
-Chan*
-commonfdtochan(int fd, int mode, int a, int b)
-{
-	return fdtochan(up->env->fgrp, fd, mode, a, b);
-}
-
-char*
-commonerror(void)
-{
-	return up->env->errstr;
-}
-
-int
-postnote(Proc *p, int, char *, int)
-{
-	swiproc(p, 0);
-	return 0;
-}
--- a/os/ip/ppp.c
+++ /dev/null
@@ -1,1656 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-#include	<libcrypt.h>
-#include	<kernel.h>
-#include	"ip.h"
-#include	"ppp.h"
-
-int	nocompress;
-Ipaddr	pppdns[2];
-
-/*
- * Calculate FCS - rfc 1331
- */
-ushort fcstab[256] =
-{
-      0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
-      0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
-      0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
-      0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
-      0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
-      0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
-      0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
-      0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
-      0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
-      0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
-      0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
-      0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
-      0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
-      0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
-      0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
-      0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
-      0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
-      0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
-      0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
-      0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
-      0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
-      0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
-      0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
-      0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
-      0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
-      0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
-      0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
-      0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
-      0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
-      0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
-      0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
-      0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
-};
-
-static char *snames[] =
-{
-	"Sclosed",
-	"Sclosing",
-	"Sreqsent",
-	"Sackrcvd",
-	"Sacksent",
-	"Sopened",
-};
-
-static void	init(PPP*);
-static void	setphase(PPP*, int);
-static void	pinit(PPP*, Pstate*);
-static void	ppptimer(void*);
-static void	ptimer(PPP*, Pstate*);
-static int	getframe(PPP*, Block**);
-static Block*	putframe(PPP*, int, Block*);
-static uchar*	escapebyte(PPP*, ulong, uchar*, ushort*);
-static void	config(PPP*, Pstate*, int);
-static int	getopts(PPP*, Pstate*, Block*);
-static void	rejopts(PPP*, Pstate*, Block*, int);
-static void	newstate(PPP*, Pstate*, int);
-static void	rcv(PPP*, Pstate*, Block*);
-static void	getchap(PPP*, Block*);
-static void	getpap(PPP*, Block*);
-static void	sendpap(PPP*);
-static void	getlqm(PPP*, Block*);
-static void	putlqm(PPP*);
-static void	hangup(PPP*);
-static void	remove(PPP*);
-
-static	int		validv4(Ipaddr);
-static	void		invalidate(Ipaddr);
-static	void		ipconnect(PPP *);
-static	void		setdefroute(PPP *, Ipaddr);
-static	void		printopts(PPP *, Pstate*, Block*, int);
-static	void		sendtermreq(PPP*, Pstate*);
-
-static void
-errlog(PPP *ppp, char *err)
-{
-	int n;
-	char msg[64];
-
-	n = snprint(msg, sizeof(msg), "%s\n", err);
-	qproduce(ppp->ifc->conv->eq, msg, n);
-}
-
-static void
-init(PPP* ppp)
-{
-	if(ppp->inbuf == nil){
-		ppp->inbuf = allocb(4096);
-		ppp->outbuf = allocb(4096);
-
-		ppp->lcp = malloc(sizeof(Pstate));
-		ppp->ipcp = malloc(sizeof(Pstate));
-		if(ppp->lcp == nil || ppp->ipcp == nil)
-			error("ppp init: malloc");
-
-		ppp->lcp->proto = Plcp;
-		ppp->lcp->state = Sclosed;
-		ppp->ipcp->proto = Pipcp;
-		ppp->ipcp->state = Sclosed;
-
-		kproc("ppptimer", ppptimer, ppp, KPDUPPG|KPDUPFDG);
-	}
-
-	pinit(ppp, ppp->lcp);
-	setphase(ppp, Plink);
-}
-
-static void
-setphase(PPP *ppp, int phase)
-{
-	int oldphase;
-
-	oldphase = ppp->phase;
-
-	ppp->phase = phase;
-	switch(phase){
-	default:
-		panic("ppp: unknown phase %d", phase);
-	case Pdead:
-		/* restart or exit? */
-		pinit(ppp, ppp->lcp);
-		setphase(ppp, Plink);
-		break;
-	case Plink:
-		/* link down */
-		switch(oldphase) {
-		case Pnet:
-			newstate(ppp, ppp->ipcp, Sclosed);
-		}
-		break;
-	case Pauth:
-		if(ppp->usepap)
-			sendpap(ppp);
-		else if(!ppp->usechap)
-			setphase(ppp, Pnet);
-		break;
-	case Pnet:
-		pinit(ppp, ppp->ipcp);
-		break;
-	case Pterm:
-		/* what? */
-		break;
-	}
-}
-
-static void
-pinit(PPP *ppp, Pstate *p)
-{
-	p->timeout = 0;
-
-	switch(p->proto){
-	case Plcp:
-		ppp->magic = TK2MS(MACHP(0)->ticks);
-		ppp->xctlmap = 0xffffffff;
-		ppp->period = 0;
-		p->optmask = 0xffffffff;
-		ppp->rctlmap = 0;
-		ppp->ipcp->state = Sclosed;
-		ppp->ipcp->optmask = 0xffffffff;
-
-		/* quality goo */
-		ppp->timeout = 0;
-		memset(&ppp->in, 0, sizeof(ppp->in));
-		memset(&ppp->out, 0, sizeof(ppp->out));
-		memset(&ppp->pin, 0, sizeof(ppp->pin));
-		memset(&ppp->pout, 0, sizeof(ppp->pout));
-		memset(&ppp->sin, 0, sizeof(ppp->sin));
-		break;
-	case Pipcp:
-		if(ppp->localfrozen == 0)
-			invalidate(ppp->local);
-		if(ppp->remotefrozen == 0)
-			invalidate(ppp->remote);
-		p->optmask = 0xffffffff;
-		ppp->ctcp = compress_init(ppp->ctcp);
-		ppp->usedns = 3;
-		invalidate(ppp->dns1);
-		invalidate(ppp->dns2);
-		break;
-	}
-	p->confid = p->rcvdconfid = -1;
-	config(ppp, p, 1);
-	newstate(ppp, p, Sreqsent);
-}
-
-/*
- *  change protocol to a new state.
- */
-static void
-newstate(PPP *ppp, Pstate *p, int state)
-{
-	netlog(ppp->f, Logppp, "%ux %ux %s->%s ctlmap %lux/%lux flags %ux mtu %d mru %d\n", ppp, p->proto,
-		snames[p->state], snames[state], ppp->rctlmap, ppp->xctlmap, p->flags,
-		ppp->mtu, ppp->mru);
-
-	if(p->proto == Plcp) {
-		if(state == Sopened)
-			setphase(ppp, Pauth);
-		else if(state == Sclosed)
-			setphase(ppp, Pdead);
-		else if(p->state == Sopened)
-			setphase(ppp, Plink);
-	}
-
-	if(p->proto == Pipcp && state == Sopened && validv4(ppp->local) && validv4(ppp->remote)){
-		netlog(ppp->f, Logppp, "pppnewstate: local %I remote %I\n", ppp->local, ppp->remote);
-		ipmove(pppdns[0], ppp->dns1);
-		ipmove(pppdns[1], ppp->dns2);
-		ipconnect(ppp);
-		/* if this is the only network, set up a default route */
-//		if(ppp->ifc->link==nil)		/* how??? */
-			setdefroute(ppp, ppp->remote);
-		errlog(ppp, Enoerror);
-	}
-
-	p->state = state;
-}
-
-static void
-remove(PPP *ppp)
-{
-	free(ppp->ipcp);
-	ppp->ipcp = 0;
-	free(ppp->ctcp);
-	ppp->ctcp = 0;
-	free(ppp->lcp);
-	ppp->lcp = 0;
-	if (ppp->inbuf) {
-		freeb(ppp->inbuf);
-		ppp->inbuf = nil;
-	}
-	if (ppp->outbuf) {
-		freeb(ppp->outbuf);
-		ppp->outbuf = nil;
-	}
-	free(ppp);
-}
-
-void
-pppclose(PPP *ppp)
-{
-	hangup(ppp);
-	remove(ppp);
-}
-
-static void
-dumpblock(Block *b)
-{
-	char x[256];
-	int i;
-
-	for(i = 0; i < (sizeof(x)-1)/3 && b->rp+i < b->wp; i++)
-		sprint(&x[3*i], "%2.2ux ", b->rp[i]);
-	print("%s\n", x);
-}
-
-/* returns (protocol, information) */
-static int
-getframe(PPP *ppp, Block **info)
-{
-	uchar *p, *from, *to;
-	int n, len, proto;
-	ulong c;
-	ushort fcs;
-	Block *buf, *b;
-
-	buf = ppp->inbuf;
-	for(;;){
-		/* read till we hit a frame byte or run out of room */
-		for(p = buf->rp; buf->wp < buf->lim;){
-			for(; p < buf->wp; p++)
-				if(*p == HDLC_frame)
-					goto break2;
-
-			len = buf->lim - buf->wp;
-			n = 0;
-			if(ppp->dchan != nil)
-				n = kchanio(ppp->dchan, buf->wp, len, OREAD);
-				netlog(ppp->f, Logppp, "ppp kchanio %d bytes\n", n);
-			if(n <= 0){
-				buf->wp = buf->rp;
-//				if(n < 0)
-//					print("ppp kchanio(%s) returned %d: %r",
-//						ppp->dchan->path->elem, n);
-				*info = nil;
-				return 0;
-			}
-			buf->wp += n;
-		}
-break2:
-
-		/* copy into block, undoing escapes, and caculating fcs */
-		fcs = PPP_initfcs;
-		b = allocb(p - buf->rp);
-		to = b->wp;
-		for(from = buf->rp; from != p;){
-			c = *from++;
-			if(c == HDLC_esc){
-				if(from == p)
-					break;
-				c = *from++ ^ 0x20;
-			} else if((c < 0x20) && (ppp->rctlmap & (1 << c)))
-				continue;
-			*to++ = c;
-			fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
-		}
-
-		/* copy down what's left in buffer */
-		p++;
-		memmove(buf->rp, p, buf->wp - p);
-		n = p - buf->rp;
-		buf->wp -= n;
-		b->wp = to - 2;
-
-		/* return to caller if checksum matches */
-		if(fcs == PPP_goodfcs){
-			if(b->rp[0] == PPP_addr && b->rp[1] == PPP_ctl)
-				b->rp += 2;
-			proto = *b->rp++;
-			if((proto & 0x1) == 0)
-				proto = (proto<<8) | *b->rp++;
-			if(b->rp < b->wp){
-				ppp->in.bytes += n;
-				ppp->in.packets++;
-				*info = b;
-				return proto;
-			}
-		} else if(BLEN(b) > 0){
-			ppp->ifc->inerr++;
-			ppp->in.discards++;
-			netlog(ppp->f, Logppp, "len %d/%d cksum %ux (%ux %ux %ux %ux)\n",
-				BLEN(b), BLEN(buf), fcs, b->rp[0],
-				b->rp[1], b->rp[2], b->rp[3]);
-		}
-
-		freeblist(b);
-	}
-	*info = nil;
-	return 0;
-}
-
-/* send a PPP frame */
-static Block *
-putframe(PPP *ppp, int proto, Block *b)
-{
-	Block *buf;
-	uchar *to, *from;
-	ushort fcs;
-	ulong ctlmap;
-	int c;
-	Block *bp;
-
-	if(ppp->dchan == nil){
-		netlog(ppp->f, Logppp, "putframe: dchan down\n");
-		errlog(ppp, Ehungup);
-		return b;
-	}
-	netlog(ppp->f, Logppp, "putframe %ux %d %d (%d bytes)\n", proto, b->rp[0], b->rp[1], BLEN(b));
-
-	ppp->out.packets++;
-
-	if(proto == Plcp)
-		ctlmap = 0xffffffff;
-	else
-		ctlmap = ppp->xctlmap;
-
-	/* make sure we have head room */
-	if(b->rp - b->base < 4){
-		b = padblock(b, 4);
-		b->rp += 4;
-	}
-
-	/* add in the protocol and address, we'd better have left room */
-	from = b->rp;
-	*--from = proto;
-	if(!(ppp->lcp->flags&Fpc) || proto > 0x100 || proto == Plcp)
-		*--from = proto>>8;
-	if(!(ppp->lcp->flags&Fac) || proto == Plcp){
-		*--from = PPP_ctl;
-		*--from = PPP_addr;
-	}
-
-	qlock(&ppp->outlock);
-	buf = ppp->outbuf;
-
-	/* escape and checksum the body */
-	fcs = PPP_initfcs;
-	to = buf->rp;
-
-	*to++ = HDLC_frame;
-
-	for(bp = b; bp; bp = bp->next){
-		if(bp != b)
-			from = bp->rp;
-		for(; from < bp->wp; from++){
-			c = *from;
-			if(c == HDLC_frame || c == HDLC_esc
-			   || (c < 0x20 && ((1<<c) & ctlmap))){
-				*to++ = HDLC_esc;
-				*to++ = c ^ 0x20;
-			} else 
-				*to++ = c;
-			fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
-		}
-	}
-
-	/* add on and escape the checksum */
-	fcs = ~fcs;
-	c = fcs;
-	if(c == HDLC_frame || c == HDLC_esc
-	   || (c < 0x20 && ((1<<c) & ctlmap))){
-		*to++ = HDLC_esc;
-		*to++ = c ^ 0x20;
-	} else 
-		*to++ = c;
-	c = fcs>>8;
-	if(c == HDLC_frame || c == HDLC_esc
-	   || (c < 0x20 && ((1<<c) & ctlmap))){
-		*to++ = HDLC_esc;
-		*to++ = c ^ 0x20;
-	} else 
-		*to++ = c;
-
-	/* add frame marker and send */
-	*to++ = HDLC_frame;
-	buf->wp = to;
-	if(ppp->dchan == nil){
-		netlog(ppp->f, Logppp, "putframe: dchan down\n");
-		errlog(ppp, Ehungup);
-	}else{
-		kchanio(ppp->dchan, buf->rp, BLEN(buf), OWRITE);
-		ppp->out.bytes += BLEN(buf);
-	}
-
-	qunlock(&ppp->outlock);
-	return b;
-}
-
-#define IPB2LCP(b) ((Lcpmsg*)((b)->wp-4))
-
-static Block*
-alloclcp(int code, int id, int len)
-{
-	Block *b;
-	Lcpmsg *m;
-
-	/*
-	 *  leave room for header
-	 */
-	b = allocb(len);
-
-	m = (Lcpmsg*)b->wp;
-	m->code = code;
-	m->id = id;
-	b->wp += 4;
-
-	return b;
-}
-
-static void
-putao(Block *b, int type, int aproto, int alg)
-{
-	*b->wp++ = type;
-	*b->wp++ = 5;
-	hnputs(b->wp, aproto);
-	b->wp += 2;
-	*b->wp++ = alg;
-}
-
-static void
-putlo(Block *b, int type, ulong val)
-{
-	*b->wp++ = type;
-	*b->wp++ = 6;
-	hnputl(b->wp, val);
-	b->wp += 4;
-}
-
-static void
-putv4o(Block *b, int type, Ipaddr val)
-{
-	*b->wp++ = type;
-	*b->wp++ = 6;
-	if(v6tov4(b->wp, val) < 0){
-		/*panic("putv4o")*/;
-	}
-	b->wp += 4;
-}
-
-static void
-putso(Block *b, int type, ulong val)
-{
-	*b->wp++ = type;
-	*b->wp++ = 4;
-	hnputs(b->wp, val);
-	b->wp += 2;
-}
-
-static void
-puto(Block *b, int type)
-{
-	*b->wp++ = type;
-	*b->wp++ = 2;
-}
-
-/*
- *  send configuration request
- */
-static void
-config(PPP *ppp, Pstate *p, int newid)
-{
-	Block *b;
-	Lcpmsg *m;
-	int id;
-
-	if(newid){
-		id = ++(p->id);
-		p->confid = id;
-		p->timeout = Timeout;
-	} else
-		id = p->confid;
-	b = alloclcp(Lconfreq, id, 256);
-	m = IPB2LCP(b);
-	USED(m);
-
-	switch(p->proto){
-	case Plcp:
-		if(p->optmask & Fmagic)
-			putlo(b, Omagic, ppp->magic);
-		if(p->optmask & Fmtu)
-			putso(b, Omtu, ppp->mru);
-		if(p->optmask & Fac)
-			puto(b, Oac);
-		if(p->optmask & Fpc)
-			puto(b, Opc);
-		if(p->optmask & Fctlmap)
-			putlo(b, Octlmap, 0);	/* we don't want anything escaped */
-		break;
-	case Pipcp:
-		if((p->optmask & Fipaddr) /*&& validv4(ppp->local)*/)
-			putv4o(b, Oipaddr, ppp->local);
-		if(!nocompress && (p->optmask & Fipcompress)){
-			*b->wp++ = Oipcompress;
-			*b->wp++ = 6;
-			hnputs(b->wp, Pvjctcp);
-			b->wp += 2;
-			*b->wp++ = MAX_STATES-1;
-			*b->wp++ = 1;
-		}
-		if(ppp->usedns & 1)
-			putlo(b, Oipdns, 0);
-		if(ppp->usedns & 2)
-			putlo(b, Oipdns2, 0);
-		break;
-	}
-
-	hnputs(m->len, BLEN(b));
-	b = putframe(ppp, p->proto, b);
-	freeblist(b);
-}
-
-/*
- *  parse configuration request, sends an ack or reject packet
- *
- *	returns:	-1 if request was syntacticly incorrect
- *			 0 if packet was accepted
- *			 1 if packet was rejected
- */
-static int
-getopts(PPP *ppp, Pstate *p, Block *b)
-{
-	Lcpmsg *m, *repm;	
-	Lcpopt *o;
-	uchar *cp;
-	ulong rejecting, nacking, flags, proto;
-	ulong mtu, ctlmap, period;
-	ulong x;
-	Block *repb;
-	Ipaddr ipaddr;
-
-	rejecting = 0;
-	nacking = 0;
-	flags = 0;
-
-	/* defaults */
-	invalidate(ipaddr);
-	mtu = ppp->mtu;
-
-	ctlmap = 0xffffffff;
-	period = 0;
-
-	m = (Lcpmsg*)b->rp;
-	repb = alloclcp(Lconfack, m->id, BLEN(b));
-	repm = IPB2LCP(repb);
-
-	/* copy options into ack packet */
-	memmove(repm->data, m->data, b->wp - m->data);
-	repb->wp += b->wp - m->data;
-
-	/* look for options we don't recognize or like */
-	for(cp = m->data; cp < b->wp; cp += o->len){
-		o = (Lcpopt*)cp;
-		if(cp + o->len > b->wp || o->len == 0){
-			freeblist(repb);
-			netlog(ppp->f, Logppp, "ppp %s: bad option length %ux\n", ppp->ifc->dev,
-				o->type);
-			return -1;
-		}
-
-		switch(p->proto){
-		case Plcp:
-			switch(o->type){
-			case Oac:
-				flags |= Fac;
-				continue;
-			case Opc:
-				flags |= Fpc;
-				continue;
-			case Omtu:
-				mtu = nhgets(o->data);
-				if(mtu < ppp->ifc->m->mintu){
-					netlog(ppp->f, Logppp, "bogus mtu %d\n", mtu);
-					mtu = ppp->ifc->m->mintu;
-				}
-				continue;
-			case Omagic:
-				if(ppp->magic == nhgetl(o->data))
-					netlog(ppp->f, Logppp, "ppp: possible loop\n");
-				continue;
-			case Octlmap:
-				ctlmap = nhgetl(o->data);
-				continue;
-			case Oquality:
-				proto = nhgets(o->data);
-				if(proto != Plqm)
-					break;
-				x = nhgetl(o->data+2)*10;
-				period = (x+Period-1)/Period;
-				continue;
-			case Oauth:
-				proto = nhgets(o->data);
-				if(proto == Ppap && ppp->chapname[0] && ppp->secret[0]){
-					ppp->usepap = 1;
-					netlog(ppp->f, Logppp, "PPP %s: select PAP\n", ppp->ifc->dev);
-					continue;
-				}
-				if(proto != Pchap || o->data[2] != APmd5){
-					if(!nacking){
-						nacking = 1;
-						repb->wp = repm->data;
-						repm->code = Lconfnak;
-					}
-					putao(repb, Oauth, Pchap, APmd5);
-				}
-				else
-					ppp->usechap = 1;
-				ppp->usepap = 0;
-				continue;
-			}
-			break;
-		case Pipcp:
-			switch(o->type){
-			case Oipaddr:	
-				v4tov6(ipaddr, o->data);
-				if(!validv4(ppp->remote))
-					continue;
-				if(!validv4(ipaddr) && !rejecting){
-					/* other side requesting an address */
-					if(!nacking){
-						nacking = 1;
-						repb->wp = repm->data;
-						repm->code = Lconfnak;
-					}
-					putv4o(repb, Oipaddr, ppp->remote);
-				}
-				continue;
-			case Oipcompress:
-				proto = nhgets(o->data);
-				if(nocompress || proto != Pvjctcp || compress_negotiate(ppp->ctcp, o->data+2) < 0)
-					break;
-				flags |= Fipcompress;
-				continue;
-			}
-			break;
-		}
-
-		/* come here if option is not recognized */
-		if(!rejecting){
-			rejecting = 1;
-			repb->wp = repm->data;
-			repm->code = Lconfrej;
-		}
-		netlog(ppp->f, Logppp, "ppp %s: bad %ux option %d\n", ppp->ifc->dev, p->proto, o->type);
-		memmove(repb->wp, o, o->len);
-		repb->wp += o->len;
-	}
-
-	/* permanent changes only after we know that we liked the packet */
-	if(!rejecting && !nacking){
-		switch(p->proto){
-		case Plcp:
-			netlog(ppp->f, Logppp, "Plcp: mtu: %d %d x:%lux/r:%lux %lux\n", mtu, ppp->mtu, ppp->xctlmap, ppp->rctlmap, ctlmap);
-			ppp->period = period;
-			ppp->xctlmap = ctlmap;
-			if(mtu > Maxmtu)
-				mtu = Maxmtu;
-			if(mtu < Minmtu)
-				mtu = Minmtu;
-			ppp->mtu = mtu;
-			break;
-		case Pipcp:
-			if(validv4(ipaddr) && ppp->remotefrozen == 0)
- 				ipmove(ppp->remote, ipaddr);
-			break;
-		}
-		p->flags = flags;
-	}
-
-	hnputs(repm->len, BLEN(repb));
-	repb = putframe(ppp, p->proto, repb);
-	freeblist(repb);
-
-	return rejecting || nacking;
-}
-
-/*
- *  parse configuration rejection, just stop sending anything that they
- *  don't like (except for ipcp address nak).
- */
-static void
-rejopts(PPP *ppp, Pstate *p, Block *b, int code)
-{
-	Lcpmsg *m;
-	Lcpopt *o;
-
-	/* just give up trying what the other side doesn't like */
-	m = (Lcpmsg*)b->rp;
-	for(b->rp = m->data; b->rp < b->wp; b->rp += o->len){
-		o = (Lcpopt*)b->rp;
-		if(b->rp + o->len > b->wp || o->len == 0){
-			netlog(ppp->f, Logppp, "ppp %s: bad roption length %ux\n", ppp->ifc->dev,
-				o->type);
-			return;
-		}
-
-		if(code == Lconfrej){
-			if(o->type < 8*sizeof(p->optmask))
-				p->optmask &= ~(1<<o->type);
-			if(o->type == Oipdns)
-				ppp->usedns &= ~1;
-			else if(o->type == Oipdns2)
-				ppp->usedns &= ~2;
-			netlog(ppp->f, Logppp, "ppp %s: %ux rejecting %d\n", ppp->ifc->dev, p->proto,
-				o->type);
-			continue;
-		}
-
-		switch(p->proto){
-		case Plcp:
-			switch(o->type){
-			case Octlmap:
-				ppp->rctlmap = nhgetl(o->data);
-				break;
-			default:
-				if(o->type < 8*sizeof(p->optmask))
-					p->optmask &= ~(1<<o->type);
-				break;
-			};
-		case Pipcp:
-			switch(o->type){
-			case Oipaddr:
-				if(!validv4(ppp->local))
-					v4tov6(ppp->local, o->data);
-//				if(o->type < 8*sizeof(p->optmask))
-//					p->optmask &= ~(1<<o->type);
-				break;
-			case Oipdns:
-				if(!validv4(ppp->dns1))
-					v4tov6(ppp->dns1, o->data);
-				ppp->usedns &= ~1;
-				break;
-			case Oipdns2:
-				if(!validv4(ppp->dns2))
-					v4tov6(ppp->dns2, o->data);
-				ppp->usedns &= ~2;
-				break;
-			default:
-				if(o->type < 8*sizeof(p->optmask))
-					p->optmask &= ~(1<<o->type);
-				break;
-			}
-			break;
-		}
-	}
-}
-
-
-/*
- *  put a messages through the lcp or ipcp state machine.  They are
- *  very similar.
- */
-static void
-rcv(PPP *ppp, Pstate *p, Block *b)
-{
-	ulong len;
-	int err;
-	Lcpmsg *m;
-
-	if(BLEN(b) < 4){
-		netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
-		freeblist(b);
-		return;
-	}
-	m = (Lcpmsg*)b->rp;
-	len = nhgets(m->len);
-	if(BLEN(b) < len){
-		netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
-		freeblist(b);
-		return;
-	}
-
-	netlog(ppp->f, Logppp, "ppp: %ux rcv %d len %d id %d/%d/%d\n",
-		p->proto, m->code, len, m->id, p->confid, p->id);
-
-	if(p->proto != Plcp && ppp->lcp->state != Sopened){
-		netlog(ppp->f, Logppp, "ppp: non-lcp with lcp not open\n");
-		freeb(b);
-		return;
-	}
-
-	qlock(ppp);
-	switch(m->code){
-	case Lconfreq:
-		/* flush the output queue */
-		if(p->state == Sopened && p->proto == Plcp)
-			kchanio(ppp->cchan, "f", 1, OWRITE);
-
-		printopts(ppp, p, b, 0);
-		err = getopts(ppp, p, b);
-		if(err < 0)
-			break;
-
-		if(m->id == p->rcvdconfid)
-			break;			/* don't change state for duplicates */
-		p->rcvdconfid = m->id;
-
-		switch(p->state){
-		case Sackrcvd:
-			if(err)
-				break;
-			newstate(ppp, p, Sopened);
-			break;
-		case Sclosed:
-		case Sopened:
-			config(ppp, p, 1);
-			if(err == 0)
-				newstate(ppp, p, Sacksent);
-			else
-				newstate(ppp, p, Sreqsent);
-			break;
-			break;
-		case Sreqsent:
-		case Sacksent:
-			if(err == 0)
-				newstate(ppp, p, Sacksent);
-			else
-				newstate(ppp, p, Sreqsent);
-			break;
-		}
-		break;
-	case Lconfack:
-		if(p->confid != m->id){
-			/* ignore if it isn't the message we're sending */
-			netlog(ppp->f, Logppp, "ppp: dropping confack\n");
-			break;
-		}
-		p->confid = -1;		/* ignore duplicates */
-		p->id++;		/* avoid sending duplicates */
-
-		switch(p->state){
-		case Sopened:
-		case Sackrcvd:
-			config(ppp, p, 1);
-			newstate(ppp, p, Sreqsent);
-			break;
-		case Sreqsent:
-			newstate(ppp, p, Sackrcvd);
-			break;
-		case Sacksent:
-			newstate(ppp, p, Sopened);
-			break;
-		}
-		break;
-	case Lconfrej:
-	case Lconfnak:
-		if(p->confid != m->id) {
-			/* ignore if it isn't the message we're sending */
-			netlog(ppp->f, Logppp, "ppp: dropping confrej or confnak\n");
-			break;
-		}
-		p->confid = -1;		/* ignore duplicates */
-		p->id++;		/* avoid sending duplicates */
-
-		switch(p->state){
-		case Sopened:
-		case Sackrcvd:
-			config(ppp, p, 1);
-			newstate(ppp, p, Sreqsent);
-			break;
-		case Sreqsent:
-		case Sacksent:
-			printopts(ppp, p, b, 0);
-			rejopts(ppp, p, b, m->code);
-			config(ppp, p, 1);
-			break;
-		}
-		break;
-	case Ltermreq:
-		m->code = Ltermack;
-		b = putframe(ppp, p->proto, b);
-
-		switch(p->state){
-		case Sackrcvd:
-		case Sacksent:
-			newstate(ppp, p, Sreqsent);
-			break;
-		case Sopened:
-			newstate(ppp, p, Sclosing);
-			break;
-		}
-		break;
-	case Ltermack:
-		if(p->termid != m->id)	/* ignore if it isn't the message we're sending */
-			break;
-
-		if(p->proto == Plcp)
-			ppp->ipcp->state = Sclosed;
-		switch(p->state){
-		case Sclosing:
-			newstate(ppp, p, Sclosed);
-			break;
-		case Sackrcvd:
-			newstate(ppp, p, Sreqsent);
-			break;
-		case Sopened:
-			config(ppp, p, 0);
-			newstate(ppp, p, Sreqsent);
-			break;
-		}
-		break;
-	case Lcoderej:
-		netlog(ppp->f, Logppp, "ppp %s: code reject %d\n", ppp->ifc->dev, m->data[0]);
-		break;
-	case Lprotorej:
-		netlog(ppp->f, Logppp, "ppp %s: proto reject %lux\n", ppp->ifc->dev, nhgets(m->data));
-		break;
-	case Lechoreq:
-		m->code = Lechoack;
-		b = putframe(ppp, p->proto, b);
-		break;
-	case Lechoack:
-	case Ldiscard:
-		/* nothing to do */
-		break;
-	}
-
-	qunlock(ppp);
-	freeblist(b);
-}
-
-/*
- *  timer for protocol state machine
- */
-static void
-ptimer(PPP *ppp, Pstate *p)
-{
-	if(p->state == Sopened || p->state == Sclosed)
-		return;
-
-	p->timeout--;
-	switch(p->state){
-	case Sclosing:
-		sendtermreq(ppp, p);
-		break;
-	case Sreqsent:
-	case Sacksent:
-		if(p->timeout <= 0){
-			if(p->proto && ppp->cchan != nil)
-				kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
-			newstate(ppp, p, Sclosed);
-		} else {
-			config(ppp, p, 0);
-		}
-		break;
-	case Sackrcvd:
-		if(p->timeout <= 0){
-			if(p->proto && ppp->cchan != nil)
-				kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
-			newstate(ppp, p, Sclosed);
-		}
-		else {
-			config(ppp, p, 0);
-			newstate(ppp, p, Sreqsent);
-		}
-		break;
-	}
-}
-
-/*
- *  timer for ppp
- */
-static void
-ppptimer(void *arg)
-{
-	PPP *ppp;
-
-	ppp = arg;
-	ppp->timep = up;
-	if(waserror()){
-		netlog(ppp->f, Logppp, "ppptimer: %I: %s\n", ppp->local, up->env->errstr);
-		ppp->timep = 0;
-		pexit("hangup", 1);
-	}
-	for(;;){
-		tsleep(&up->sleep, return0, nil, Period);
-		if(ppp->pppup){
-			qlock(ppp);
-
-			ptimer(ppp, ppp->lcp);
-			if(ppp->lcp->state == Sopened)
-				ptimer(ppp, ppp->ipcp);
-
-			if(ppp->period && --(ppp->timeout) <= 0){
-				ppp->timeout = ppp->period;
-				putlqm(ppp);
-			}
-
-			qunlock(ppp);
-		}
-	}
-}
-
-static void
-setdefroute(PPP *ppp, Ipaddr gate)
-{
-	int fd, n;
-	char path[128], msg[128];
-
-	snprint(path, sizeof path, "#I%d/iproute", ppp->f->dev);
-	fd = kopen(path, ORDWR);
-	if(fd < 0)
-		return;
-	n = snprint(msg, sizeof(msg), "add 0 0 %I", gate);
-	kwrite(fd, msg, n);
-	kclose(fd);
-}
-
-static void
-ipconnect(PPP *ppp)
-{
-	int fd, n;
-	char path[128], msg[128];
-
-	snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ppp->ifc->conv->x);
-	fd = kopen(path, ORDWR);
-	if(fd < 0)
-		return;
-	n = snprint(msg, sizeof(msg), "connect %I 255.255.255.255 %I", ppp->local, ppp->remote);
-	if (kwrite(fd, msg, n) != n)
-		print("ppp ipconnect: %s: %r\n", msg);
-	kclose(fd);
-}
-
-PPP*
-pppopen(PPP *ppp, char *dev,
-	Ipaddr ipaddr, Ipaddr remip,
-	int mtu, int framing,
-	char *chapname, char *secret)
-{
-	int fd, cfd;
-	char ctl[Maxpath];
-
-	invalidate(ppp->remote);
-	invalidate(ppp->local);
-	invalidate(ppp->dns1);
-	invalidate(ppp->dns2);
-	ppp->mtu = Defmtu;
-	ppp->mru = mtu;
-	ppp->framing = framing;
-
-	if(remip != nil && validv4(remip)){
-		ipmove(ppp->remote, remip);
-		ppp->remotefrozen = 1;
-	}
-	if(ipaddr != nil && validv4(ipaddr)){
-		ipmove(ppp->local, ipaddr);
-		ppp->localfrozen = 1;
-	}
-
-	/* authentication goo */
-	ppp->secret[0] = 0;
-	if(secret != nil)
-		strncpy(ppp->secret, secret, sizeof(ppp->secret));
-	ppp->chapname[0] = 0;
-	if(chapname != nil)
-		strncpy(ppp->chapname, chapname, sizeof(ppp->chapname));
-
-	if(strchr(dev, '!'))
-		fd = kdial(dev, nil, nil, nil);
-	else
-		fd = kopen(dev, ORDWR);
-	if(fd < 0){
-		netlog(ppp->f, Logppp, "ppp: can't open %s\n", dev);
-		return nil;
-	}
-	ppp->dchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
-	kclose(fd);
-
-	/* set up serial line */
-/* XXX this stuff belongs in application, not driver */
-	sprint(ctl, "%sctl", dev);
-	cfd = kopen(ctl, ORDWR);
-	if(cfd >= 0){
-		ppp->cchan = fdtochan(up->env->fgrp, cfd, ORDWR, 0, 1);
-		kclose(cfd);
-		kchanio(ppp->cchan, "m1", 2, OWRITE);	/* cts/rts flow control/fifo's) on */
-		kchanio(ppp->cchan, "q64000", 6, OWRITE);/* increas q size to 64k */
-		kchanio(ppp->cchan, "n1", 2, OWRITE);	/* nonblocking writes on */
-		kchanio(ppp->cchan, "r1", 2, OWRITE);	/* rts on */
-		kchanio(ppp->cchan, "d1", 2, OWRITE);	/* dtr on */
-	}
-
-	ppp->pppup = 1;
-	init(ppp);
-	return ppp;
-}
-
-static void
-hangup(PPP *ppp)
-{
-	qlock(ppp);
-	if(waserror()){
-		qunlock(ppp);
-		nexterror();
-	}
-	netlog(ppp->f, Logppp, "PPP Hangup\n");
-	errlog(ppp, Ehungup);
-	if(ppp->pppup && ppp->cchan != nil){
-		kchanio(ppp->cchan, "f", 1, OWRITE);	/* flush */
-		kchanio(ppp->cchan, "h", 1, OWRITE);	/* hangup */
-	}
-	cclose(ppp->dchan);
-	cclose(ppp->cchan);
-	ppp->dchan = nil;
-	ppp->cchan = nil;
-	ppp->pppup = 0;
-	qunlock(ppp);
-	poperror();
-}
-
-/* return next input IP packet */
-Block*
-pppread(PPP *ppp)
-{
-	Block *b;
-	int proto;
-	Lcpmsg *m;
-
-	for(;;){
-		proto = getframe(ppp, &b);
-		if(b == nil)
-			return nil;
-		netlog(ppp->f, Logppp, "ppp: read proto %d len %d\n", proto, blocklen(b));
-		switch(proto){
-		case Plcp:
-			rcv(ppp, ppp->lcp, b);
-			break;
-		case Pipcp:
-			rcv(ppp, ppp->ipcp, b);
-			break;
-		case Pip:
-			if(ppp->ipcp->state == Sopened)
-				return b;
-			freeblist(b);
-			break;
-		case Plqm:
-			getlqm(ppp, b);
-			break;
-		case Pchap:
-			getchap(ppp, b);
-			break;
-		case Ppap:
-			getpap(ppp, b);
-			break;
-		case Pvjctcp:
-		case Pvjutcp:
-			if(ppp->ipcp->state == Sopened){
-				b = tcpuncompress(ppp->ctcp, b, proto, ppp->f);
-				if(b != nil)
-					return b;
-			}
-			freeblist(b);
-			break;
-		default:
-			netlog(ppp->f, Logppp, "unknown proto %ux\n", proto);
-			if(ppp->lcp->state == Sopened){
-				/* reject the protocol */
-				b->rp -= 6;
-				m = (Lcpmsg*)b->rp;
-				m->code = Lprotorej;
-				m->id = ++ppp->lcp->id;
-				hnputs(m->data, proto);
-				hnputs(m->len, BLEN(b));
-				b = putframe(ppp, Plcp, b);
-			}
-			freeblist(b);
-			break;
-		}
-	}
-	return nil;		/* compiler confused */
-}
-
-/* transmit an IP packet */
-int
-pppwrite(PPP *ppp, Block *b)
-{
-	ushort proto;
-	int r;
-
-	qlock(ppp);
-
-	/* can't send ip packets till we're established */
-	if(ppp->ipcp->state != Sopened)
-		goto ret;
-
-	/* link hung up */
-	if(ppp->dchan == nil)
-		goto ret;
-
-	b = concatblock(b);		/* or else compression will barf */
-
-	proto = Pip;
-	if(ppp->ipcp->flags & Fipcompress)
-		proto = compress(ppp->ctcp, b, ppp->f);
-	b = putframe(ppp, proto, b);
-
-
-ret:
-	qunlock(ppp);
-
-	r = blocklen(b);
-	netlog(ppp->f, Logppp, "ppp wrt len %d\n", r);
-
-	freeblist(b);
-	return r;
-}
-
-/*
- *  link quality management
- */
-static void
-getlqm(PPP *ppp, Block *b)
-{
-	Qualpkt *p;
-
-	p = (Qualpkt*)b->rp;
-	if(BLEN(b) == sizeof(Qualpkt)){
-		ppp->in.reports++;
-		ppp->pout.reports = nhgetl(p->peeroutreports);
-		ppp->pout.packets = nhgetl(p->peeroutpackets);
-		ppp->pout.bytes = nhgetl(p->peeroutbytes);
-		ppp->pin.reports = nhgetl(p->peerinreports);
-		ppp->pin.packets = nhgetl(p->peerinpackets);
-		ppp->pin.discards = nhgetl(p->peerindiscards);
-		ppp->pin.errors = nhgetl(p->peerinerrors);
-		ppp->pin.bytes = nhgetl(p->peerinbytes);
-
-		/* save our numbers at time of reception */
-		memmove(&ppp->sin, &ppp->in, sizeof(Qualstats));
-
-	}
-	freeblist(b);
-	if(ppp->period == 0)
-		putlqm(ppp);
-
-}
-static void
-putlqm(PPP *ppp)
-{
-	Qualpkt *p;
-	Block *b;
-
-	b = allocb(sizeof(Qualpkt));
-	b->wp += sizeof(Qualpkt);
-	p = (Qualpkt*)b->rp;
-	hnputl(p->magic, 0);
-
-	/* heresay (what he last told us) */
-	hnputl(p->lastoutreports, ppp->pout.reports);
-	hnputl(p->lastoutpackets, ppp->pout.packets);
-	hnputl(p->lastoutbytes, ppp->pout.bytes);
-
-	/* our numbers at time of last reception */
-	hnputl(p->peerinreports, ppp->sin.reports);
-	hnputl(p->peerinpackets, ppp->sin.packets);
-	hnputl(p->peerindiscards, ppp->sin.discards);
-	hnputl(p->peerinerrors, ppp->sin.errors);
-	hnputl(p->peerinbytes, ppp->sin.bytes);
-
-	/* our numbers now */
-	hnputl(p->peeroutreports, ppp->out.reports+1);
-	hnputl(p->peeroutpackets, ppp->out.packets+1);
-	hnputl(p->peeroutbytes, ppp->out.bytes+53/*hack*/);
-
-	b = putframe(ppp, Plqm, b);
-	freeblist(b);
-	ppp->out.reports++;
-}
-
-/*
- *  challenge response dialog
- */
-static void
-getchap(PPP *ppp, Block *b)
-{
-	Lcpmsg *m;
-	int len, vlen, n;
-	char md5buf[512];
-
-	m = (Lcpmsg*)b->rp;
-	len = nhgets(m->len);
-	if(BLEN(b) < len){
-		netlog(ppp->f, Logppp, "ppp %s: short chap message\n", ppp->ifc->dev);
-		freeblist(b);
-		return;
-	}
-
-	switch(m->code){
-	case Cchallenge:
-		vlen = m->data[0];
-		if(vlen > len - 5){
-			netlog(ppp->f, Logppp, "PPP %s: bad challenge len\n", ppp->ifc->dev);
-			freeblist(b);
-			break;
-		}
-
-		netlog(ppp->f, Logppp, "PPP %s: CHAP Challenge\n", ppp->ifc->dev);
-netlog(ppp->f, Logppp, "(secret %s chapname %s id %d)\n", ppp->secret, ppp->chapname, m->id);
-		/* create string to hash */
-		md5buf[0] = m->id;
-		strcpy(md5buf+1, ppp->secret);
-		n = strlen(ppp->secret) + 1;
-		memmove(md5buf+n, m->data+1, vlen);
-		n += vlen;
-		freeblist(b);
-
-		/* send reply */
-		len = 4 + 1 + 16 + strlen(ppp->chapname);
-		b = alloclcp(2, md5buf[0], len);
-		m = IPB2LCP(b);
-		m->data[0] = 16;
-		md5((uchar*)md5buf, n, m->data+1, 0);
-		memmove((char*)m->data+17, ppp->chapname, strlen(ppp->chapname));
-		hnputs(m->len, len);
-		b->wp += len-4;
-		b = putframe(ppp, Pchap, b);
-		break;
-	case Cresponse:
-		netlog(ppp->f, Logppp, "PPP %s: chap response?\n", ppp->ifc->dev);
-		break;
-	case Csuccess:
-		netlog(ppp->f, Logppp, "PPP %s: chap succeeded\n", ppp->ifc->dev);
-		setphase(ppp, Pnet);
-		break;
-	case Cfailure:
-		netlog(ppp->f, Logppp, "PPP %s: chap failed: %.*s\n", ppp->ifc->dev, len-4, m->data);
-		errlog(ppp, Eperm);
-		break;
-	default:
-		netlog(ppp->f, Logppp, "PPP %s: chap code %d?\n", ppp->ifc->dev, m->code);
-		break;
-	}
-	freeblist(b);
-}
-
-/*
- *  password authentication protocol dialog
- *	-- obsolete but all we know how to use with NT just now
- */
-static void
-sendpap(PPP *ppp)
-{
-	Lcpmsg *m;
-	int clen, slen, len;
-	Block *b;
-	uchar *p;
-
-	clen = strlen(ppp->chapname);
-	slen = strlen(ppp->secret);
-	len = 4 + 1 + clen + 1 + slen;
-	ppp->papid = ++ppp->lcp->id;
-	b = alloclcp(Cpapreq, ppp->papid, len);
-	m = IPB2LCP(b);
-	p = m->data;
-	p[0] = clen;
-	memmove(p+1, ppp->chapname, clen);
-	p += clen + 1;
-	p[0] = slen;
-	memmove(p+1, ppp->secret, slen);
-	hnputs(m->len, len);
-	b->wp += len-4;
-	b = putframe(ppp, Ppap, b);
-	netlog(ppp->f, Logppp, "PPP %s: sent pap auth req (%d)\n", ppp->ifc->dev, len);
-	freeblist(b);
-}
-
-static void
-getpap(PPP *ppp, Block *b)
-{
-	Lcpmsg *m;
-	int len;
-
-	m = (Lcpmsg*)b->rp;
-	len = nhgets(m->len);
-	if(BLEN(b) < len){
-		netlog(ppp->f, Logppp, "ppp %s: short pap message\n", ppp->ifc->dev);
-		freeblist(b);
-		return;
-	}
-
-	switch(m->code){
-	case Cpapreq:
-		netlog(ppp->f, Logppp, "PPP %s: pap request?\n", ppp->ifc->dev);
-		break;
-	case Cpapack:
-		netlog(ppp->f, Logppp, "PPP %s: PAP succeeded\n", ppp->ifc->dev);
-		setphase(ppp, Pnet);
-		break;
-	case Cpapnak:
-		if(m->data[0])
-			netlog(ppp->f, Logppp, "PPP %s: PAP failed: %.*s\n", ppp->ifc->dev, len-5, m->data+1);
-		else
-			netlog(ppp->f, Logppp, "PPP %s: PAP failed\n", ppp->ifc->dev);
-		errlog(ppp, Eperm);
-		break;
-	default:
-		netlog(ppp->f, Logppp, "PPP %s: pap code %d?\n", ppp->ifc->dev, m->code);
-		break;
-	}
-	freeblist(b);
-}
-
-static void
-printopts(PPP *ppp, Pstate *p, Block *b, int send)
-{
-	Lcpmsg *m;	
-	Lcpopt *o;
-	int proto, x, period;
-	uchar *cp;
-	char *code, *dir;
-
-	m = (Lcpmsg*)b->rp;
-	switch(m->code) {
-	default: code = "<unknown>"; break;
-	case Lconfreq: code = "confrequest"; break;
-	case Lconfack: code = "confack"; break;
-	case Lconfnak: code = "confnak"; break;
-	case Lconfrej: code = "confreject"; break;
-	}
-
-	if(send)
-		dir = "send";
-	else
-		dir = "recv";
-
-	netlog(ppp->f, Logppp, "ppp: %s %s: id=%d\n", dir, code, m->id);
-
-	for(cp = m->data; cp < b->wp; cp += o->len){
-		o = (Lcpopt*)cp;
-		if(cp + o->len > b->wp || o->len == 0){
-			netlog(ppp->f, Logppp, "\tbad option length %ux\n", o->type);
-			return;
-		}
-
-		switch(p->proto){
-		case Plcp:
-			switch(o->type){
-			default:
-				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
-				break;
-			case Omtu:
-				netlog(ppp->f, Logppp, "\tmtu = %d\n", nhgets(o->data));
-				break;
-			case Octlmap:
-				netlog(ppp->f, Logppp, "\tctlmap = %ux\n", nhgetl(o->data));
-				break;
-			case Oauth:
-				netlog(ppp->f, Logppp, "\tauth = ", nhgetl(o->data));
-				proto = nhgets(o->data);
-				switch(proto) {
-				default:
-					netlog(ppp->f, Logppp, "unknown auth proto %d\n", proto);
-					break;
-				case Ppap:
-					netlog(ppp->f, Logppp, "password\n");
-					break;
-				case Pchap:
-					netlog(ppp->f, Logppp, "chap %ux\n", o->data[2]);
-					break;
-				}
-				break;
-			case Oquality:
-				proto = nhgets(o->data);
-				switch(proto) {
-				default:
-					netlog(ppp->f, Logppp, "\tunknown quality proto %d\n", proto);
-					break;
-				case Plqm:
-					x = nhgetl(o->data+2)*10;
-					period = (x+Period-1)/Period;
-					netlog(ppp->f, Logppp, "\tlqm period = %d\n", period);
-					break;
-				}
-			case Omagic:
-				netlog(ppp->f, Logppp, "\tmagic = %ux\n", nhgetl(o->data));
-				break;
-			case Opc:
-				netlog(ppp->f, Logppp, "\tprotocol compress\n");
-				break;
-			case Oac:
-				netlog(ppp->f, Logppp, "\taddr compress\n");
-				break;
-			}
-			break;
-		case Pccp:
-			switch(o->type){
-			default:
-				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
-				break;
-			case Ocoui:	
-				netlog(ppp->f, Logppp, "\tOUI\n");
-				break;
-			case Ocstac:
-				netlog(ppp->f, Logppp, "\tstac LZS\n");
-				break;
-			case Ocmppc:	
-				netlog(ppp->f, Logppp, "\tMicrosoft PPC len=%d %ux\n", o->len, nhgetl(o->data));
-				break;
-			}
-			break;
-		case Pecp:
-			switch(o->type){
-			default:
-				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
-				break;
-			case Oeoui:	
-				netlog(ppp->f, Logppp, "\tOUI\n");
-				break;
-			case Oedese:
-				netlog(ppp->f, Logppp, "\tDES\n");
-				break;
-			}
-			break;
-		case Pipcp:
-			switch(o->type){
-			default:
-				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
-				break;
-			case Oipaddrs:	
-				netlog(ppp->f, Logppp, "\tip addrs - deprecated\n");
-				break;
-			case Oipcompress:
-				netlog(ppp->f, Logppp, "\tip compress\n");
-				break;
-			case Oipaddr:	
-				netlog(ppp->f, Logppp, "\tip addr %V\n", o->data);
-				break;
-			case Oipdns:
-				netlog(ppp->f, Logppp, "\tdns addr %V\n", o->data);
-				break;
-			case Oipwins:	
-				netlog(ppp->f, Logppp, "\twins addr %V\n", o->data);
-				break;
-			case Oipdns2:
-				netlog(ppp->f, Logppp, "\tdns2 addr %V\n", o->data);
-				break;
-			case Oipwins2:	
-				netlog(ppp->f, Logppp, "\twins2 addr %V\n", o->data);
-				break;
-			}
-			break;
-		}
-	}
-}
-
-static void
-sendtermreq(PPP *ppp, Pstate *p)
-{
-	Block *b;
-	Lcpmsg *m;
-
-	p->termid = ++(p->id);
-	b = alloclcp(Ltermreq, p->termid, 4);
-	m = IPB2LCP(b);
-	hnputs(m->len, 4);
-	putframe(ppp, p->proto, b);
-	freeb(b);
-	newstate(ppp, p, Sclosing);
-}
-
-static void
-sendechoreq(PPP *ppp, Pstate *p)
-{
-	Block *b;
-	Lcpmsg *m;
-
-	p->termid = ++(p->id);
-	b = alloclcp(Lechoreq, p->id, 4);
-	m = IPB2LCP(b);
-	hnputs(m->len, 4);
-	putframe(ppp, p->proto, b);
-	freeb(b);
-}
-
-/*
- *  return non-zero if this is a valid v4 address
- */
-static int
-validv4(Ipaddr addr)
-{
-	return memcmp(addr, v4prefix, IPv4off) == 0;
-}
-
-static void
-invalidate(Ipaddr addr)
-{
-	ipmove(addr, IPnoaddr);
-}
--- a/os/ip/ppp.h
+++ /dev/null
@@ -1,258 +1,0 @@
-typedef struct PPP	PPP;
-typedef struct Pstate	Pstate;
-typedef struct Lcpmsg	Lcpmsg;
-typedef struct Lcpopt	Lcpopt;
-typedef struct Qualpkt	Qualpkt;
-typedef struct Qualstats Qualstats;
-typedef struct Tcpc	Tcpc;
-
-typedef uchar Ipaddr[IPaddrlen];
-
-enum
-{
-	HDLC_frame=	0x7e,
-	HDLC_esc=	0x7d,
-
-	/* PPP frame fields */
-	PPP_addr=	0xff,
-	PPP_ctl=	0x3,
-	PPP_initfcs=	0xffff,
-	PPP_goodfcs=	0xf0b8,
-
-	/* PPP phases */
-	Pdead=		0,	
-	Plink,				/* doing LCP */
-	Pauth,				/* doing chap */
-	Pnet,				/* doing IPCP, CCP */
-	Pterm,				/* closing down */
-
-	/* PPP protocol types */
-	Pip=		0x21,		/* internet */
-	Pvjctcp=	0x2d,		/* compressing van jacobson tcp */
-	Pvjutcp=	0x2f,		/* uncompressing van jacobson tcp */
-	Pcdata=		0xfd,		/* compressed datagram */
-	Pipcp=		0x8021,		/* ip control */
-	Pecp=		0x8053,		/* encryption control */
-	Pccp=		0x80fd,		/* compressed datagram control */
-	Plcp=		0xc021,		/* link control */
-	Ppap=		0xc023,		/* password auth. protocol */
-	Plqm=		0xc025,		/* link quality monitoring */
-	Pchap=		0xc223,		/* challenge/response */
-
-	/* LCP codes */
-	Lconfreq=	1,
-	Lconfack=	2,
-	Lconfnak=	3,
-	Lconfrej=	4,
-	Ltermreq=	5,
-	Ltermack=	6,
-	Lcoderej=	7,
-	Lprotorej=	8,
-	Lechoreq=	9,
-	Lechoack=	10,
-	Ldiscard=	11,
-
-	/* Lcp configure options */
-	Omtu=		1,
-	Octlmap=	2,
-	Oauth=		3,
-	Oquality=	4,
-	Omagic=		5,
-	Opc=		7,
-	Oac=		8,
-	Obad=		12,		/* for testing */
-
-	/* authentication protocols */
-	APmd5=		5,
-
-	/* lcp flags */
-	Fmtu=		1<<Omtu,
-	Fctlmap=	1<<Octlmap,
-	Fauth=		1<<Oauth,
-	Fquality=	1<<Oquality,
-	Fmagic=		1<<Omagic,
-	Fpc=		1<<Opc,
-	Fac=		1<<Oac,
-	Fbad=		1<<Obad,
-
-	/* Chap codes */
-	Cchallenge=	1,
-	Cresponse=	2,
-	Csuccess=	3,
-	Cfailure=	4,
-
-	/* Pap codes */
-	Cpapreq=		1,
-	Cpapack=		2,
-	Cpapnak=		3,
-
-	/* link states */
-	Sclosed=		0,
-	Sclosing,
-	Sreqsent,
-	Sackrcvd,
-	Sacksent,
-	Sopened,
-
-	/* ccp configure options */
-	Ocoui=		0,	/* proprietary compression */
-	Ocstac=		17,	/* stac electronics LZS */
-	Ocmppc=		18,	/* microsoft ppc */
-
-	/* ccp flags */
-	Fcoui=		1<<Ocoui,
-	Fcstac=		1<<Ocstac,
-	Fcmppc=		1<<Ocmppc,
-
-	/* ecp configure options */
-	Oeoui=		0,	/* proprietary compression */
-	Oedese=		1,	/* DES */
-
-	/* ecp flags */
-	Feoui=		1<<Oeoui,
-	Fedese=		1<<Oedese,
-
-	/* ipcp configure options */
-	Oipaddrs=	1,
-	Oipcompress=	2,
-	Oipaddr=	3,
-	Oipdns=		129,
-	Oipwins=	130,
-	Oipdns2=	131,
-	Oipwins2=	132,
-
-	/* ipcp flags */
-	Fipaddrs=	1<<Oipaddrs,
-	Fipcompress=	1<<Oipcompress,
-	Fipaddr=	1<<Oipaddr,
-
-	Period=		3*1000,	/* period of retransmit process (in ms) */
-	Timeout=	10,	/* xmit timeout (in Periods) */
-
-	MAX_STATES	= 16,		/* van jacobson compression states */
-	Defmtu=		1450,		/* default that we will ask for */
-	Minmtu=		128,		/* minimum that we will accept */
-	Maxmtu=		2000,		/* maximum that we will accept */
-};
-
-
-struct Pstate
-{
-	int	proto;		/* protocol type */
-	int	timeout;		/* for current state */
-	int	rxtimeout;	/* for current retransmit */
-	ulong	flags;		/* options received */
-	uchar	id;		/* id of current message */
-	uchar	confid;		/* id of current config message */
-	uchar	termid;		/* id of current termination message */
-	uchar	rcvdconfid;	/* id of last conf message received */
-	uchar	state;		/* PPP link state */
-	ulong	optmask;		/* which options to request */
-	int	echoack;	/* recieved echo ack */
-	int	echotimeout;	/* echo timeout */
-};
-
-struct Qualstats
-{
-	ulong	reports;
-	ulong	packets;
-	ulong	bytes;
-	ulong	discards;
-	ulong	errors;
-};
-
-struct PPP
-{
-	QLock;
-
-	Chan*	dchan;			/* serial line */
-	Chan*	cchan;			/* serial line control */
-	int		framing;	/* non-zero to use framing characters */
-	Ipaddr	local;
-	int		localfrozen;
-	Ipaddr	remote;
-	int		remotefrozen;
-
-	int	pppup;
-	Fs	*f;		/* file system we belong to */
-	Ipifc*	ifc;
-	Proc*	readp;			/* reading process */
-	Proc*	timep;			/* timer process */
-	Block*	inbuf;			/* input buffer */
-	Block*	outbuf;			/* output buffer */
-	QLock	outlock;		/*  and its lock */
-
-	ulong	magic;			/* magic number to detect loop backs */
-	ulong	rctlmap;		/* map of chars to ignore in rcvr */
-	ulong	xctlmap;		/* map of chars to excape in xmit */
-	int		phase;		/* PPP phase */
-	Pstate*	lcp;			/* lcp state */
-	Pstate*	ipcp;			/* ipcp state */
-	char	secret[256];		/* md5 key */
-	char	chapname[256];		/* chap system name */
-	Tcpc*	ctcp;
-	ulong		mtu;		/* maximum xmit size */
-	ulong		mru;		/* maximum recv size */
-
-	int	baud;
-	int	usepap;	/* authentication is PAP in every sense, not CHAP */
-	int	papid;
-	int	usechap;
-
-	/* rfc */
-	int	usedns;
-	Ipaddr	dns1;
-	Ipaddr	dns2;
-
-	/* link quality monitoring */
-	int		period;		/* lqm period */
-	int		timeout;	/* time to next lqm packet */
-	Qualstats	in;		/* local */
-	Qualstats	out;
-	Qualstats	pin;		/* peer */
-	Qualstats	pout;
-	Qualstats	sin;		/* saved */
-};
-
-PPP*		pppopen(PPP*, char*, Ipaddr, Ipaddr, int, int, char*, char*);
-Block*	pppread(PPP*);
-int		pppwrite(PPP*, Block*);
-void		pppclose(PPP*);
-
-struct Lcpmsg
-{
-	uchar	code;
-	uchar	id;
-	uchar	len[2];
-	uchar	data[1];
-};
-
-struct Lcpopt
-{
-	uchar	type;
-	uchar	len;
-	uchar	data[1];
-};
-
-struct Qualpkt
-{
-	uchar	magic[4];
-
-	uchar	lastoutreports[4];
-	uchar	lastoutpackets[4];
-	uchar	lastoutbytes[4];
-	uchar	peerinreports[4];
-	uchar	peerinpackets[4];
-	uchar	peerindiscards[4];
-	uchar	peerinerrors[4];
-	uchar	peerinbytes[4];
-	uchar	peeroutreports[4];
-	uchar	peeroutpackets[4];
-	uchar	peeroutbytes[4];
-};
-
-ushort	compress(Tcpc*, Block*, Fs*);
-Tcpc*	compress_init(Tcpc*);
-int		compress_negotiate(Tcpc*, uchar*);
-ushort	tcpcompress(Tcpc*, Block*, Fs*);
-Block*	tcpuncompress(Tcpc*, Block*, ushort, Fs*);
--- a/os/ip/pppmedium.c
+++ /dev/null
@@ -1,192 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-
-#include "ip.h"
-#include "kernel.h"
-#include "ppp.h"
-
-static void	pppreader(void *a);
-static void	pppbind(Ipifc *ifc, int argc, char **argv);
-static void	pppunbind(Ipifc *ifc);
-static void	pppbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
-static void	deadremote(Ipifc *ifc);
-
-Medium pppmedium =
-{
-.name=	"ppp",
-.hsize=	4,
-.mintu=	Minmtu,
-.maxtu=	Maxmtu,
-.maclen=	0,
-.bind=	pppbind,
-.unbind=	pppunbind,
-.bwrite=	pppbwrite,
-.unbindonclose=	0,		/* don't unbind on last close */
-};
-
-/*
- *  called to bind an IP ifc to an ethernet device
- *  called with ifc wlock'd
- */
-static void
-pppbind(Ipifc *ifc, int argc, char **argv)
-{
-	PPP *ppp;
-	Ipaddr ipaddr, remip;
-	int mtu, framing;
-	char *chapname, *secret;
-
-	if(argc < 3)
-		error(Ebadarg);
-
-	ipmove(ipaddr, IPnoaddr);
-	ipmove(remip, IPnoaddr);
-	mtu = Defmtu;
-	framing = 1;
-	chapname = nil;
-	secret = nil;
-
-	switch(argc){
-	default:
-	case 9:
-		if(argv[8][0] != '-')
-			secret = argv[8];
-	case 8:
-		if(argv[7][0] != '-')
-			chapname = argv[7];
-	case 7:
-		if(argv[6][0] != '-')
-			framing = strtoul(argv[6], 0, 0);
-	case 6:
-		if(argv[5][0] != '-')
-			mtu = strtoul(argv[5], 0, 0);
-	case 5:
-		if(argv[4][0] != '-')
-			parseip(remip, argv[4]);
-	case 4:
-		if(argv[3][0] != '-')
-			parseip(ipaddr, argv[3]);
-	case 3:
-		break;
-	}
-
-	ppp = smalloc(sizeof(*ppp));
-	ppp->ifc = ifc;
-	ppp->f = ifc->conv->p->f;
-	ifc->arg = ppp;
-	if(waserror()){
-		pppunbind(ifc);
-		nexterror();
-	}
-	if(pppopen(ppp, argv[2], ipaddr, remip, mtu, framing, chapname, secret) == nil)
-		error("ppp open failed");
-	poperror();
-	kproc("pppreader", pppreader, ifc, KPDUPPG|KPDUPFDG);
-}
-
-static void
-pppreader(void *a)
-{
-	Ipifc *ifc;
-	Block *bp;
-	PPP *ppp;
-
-	ifc = a;
-	ppp = ifc->arg;
-	ppp->readp = up;	/* hide identity under a rock for unbind */
-	setpri(PriHi);
-
-	if(waserror()){
-		netlog(ppp->f, Logppp, "pppreader: %I: %s\n", ppp->local, up->env->errstr);
-		ppp->readp = 0;
-		deadremote(ifc);
-		pexit("hangup", 1);
-	}
-
-	for(;;){
-		bp = pppread(ppp);
-		if(bp == nil)
-			error("hungup");
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
-		if(waserror()){
-			runlock(ifc);
-			nexterror();
-		}
-		ifc->in++;
-		if(ifc->lifc == nil)
-			freeb(bp);
-		else
-			ipiput(ppp->f, ifc, bp);
-		runlock(ifc);
-		poperror();
-	}
-}
-
-/*
- *  called with ifc wlock'd
- */
-static void
-pppunbind(Ipifc *ifc)
-{
-	PPP *ppp = ifc->arg;
-
-	if(ppp == nil)
-		return;
-	if(ppp->readp)
-		postnote(ppp->readp, 1, "unbind", 0);
-	if(ppp->timep)
-		postnote(ppp->timep, 1, "unbind", 0);
-
-	/* wait for kprocs to die */
-	while(ppp->readp != 0 || ppp->timep != 0)
-		tsleep(&up->sleep, return0, 0, 300);
-
-	pppclose(ppp);
-	qclose(ifc->conv->eq);
-	ifc->arg = nil;
-}
-
-/*
- *  called by ipoput with a single packet to write with ifc rlock'd
- */
-static void
-pppbwrite(Ipifc *ifc, Block *bp, int, uchar*)
-{
-	PPP *ppp = ifc->arg;
-
-	pppwrite(ppp, bp);
-	ifc->out++;
-}
-
-/*
- *	If the other end hangs up, we have to unbind the interface.  An extra
- *	unbind (in the case where we are hanging up) won't do any harm.
- */
-static void
-deadremote(Ipifc *ifc)
-{
-	int fd;
-	char path[128];
-	PPP *ppp;
-
-	ppp = ifc->arg;
-	snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ifc->conv->x);
-	fd = kopen(path, ORDWR);
-	if(fd < 0)
-		return;
-	kwrite(fd, "unbind", sizeof("unbind")-1);
-	kclose(fd);
-}
-
-void
-pppmediumlink(void)
-{
-	addipmedium(&pppmedium);
-}
--- a/os/ip/ptclbsum.c
+++ /dev/null
@@ -1,72 +1,0 @@
-#include	"u.h"
-#include	"../port/lib.h"
-#include	"mem.h"
-#include	"dat.h"
-#include	"fns.h"
-#include	"../port/error.h"
-#include	"ip.h"
-
-static	short	endian	= 1;
-static	uchar*	aendian	= (uchar*)&endian;
-#define	LITTLE	*aendian
-
-ushort
-ptclbsum(uchar *addr, int len)
-{
-	ulong losum, hisum, mdsum, x;
-	ulong t1, t2;
-
-	losum = 0;
-	hisum = 0;
-	mdsum = 0;
-
-	x = 0;
-	if((uintptr)addr & 1) {
-		if(len) {
-			hisum += addr[0];
-			len--;
-			addr++;
-		}
-		x = 1;
-	}
-	while(len >= 16) {
-		t1 = *(ushort*)(addr+0);
-		t2 = *(ushort*)(addr+2);	mdsum += t1;
-		t1 = *(ushort*)(addr+4);	mdsum += t2;
-		t2 = *(ushort*)(addr+6);	mdsum += t1;
-		t1 = *(ushort*)(addr+8);	mdsum += t2;
-		t2 = *(ushort*)(addr+10);	mdsum += t1;
-		t1 = *(ushort*)(addr+12);	mdsum += t2;
-		t2 = *(ushort*)(addr+14);	mdsum += t1;
-		mdsum += t2;
-		len -= 16;
-		addr += 16;
-	}
-	while(len >= 2) {
-		mdsum += *(ushort*)addr;
-		len -= 2;
-		addr += 2;
-	}
-	if(x) {
-		if(len)
-			losum += addr[0];
-		if(LITTLE)
-			losum += mdsum;
-		else
-			hisum += mdsum;
-	} else {
-		if(len)
-			hisum += addr[0];
-		if(LITTLE)
-			hisum += mdsum;
-		else
-			losum += mdsum;
-	}
-
-	losum += hisum >> 8;
-	losum += (hisum & 0xff) << 8;
-	while(hisum = losum>>16)
-		losum = hisum + (losum & 0xffff);
-
-	return losum & 0xffff;
-}
--- a/os/ip/rudp.c
+++ b/os/ip/rudp.c
@@ -1,4 +1,5 @@
 /*
+ *  Reliable User Datagram Protocol, currently only for IPv4.
  *  This protocol is compatible with UDP's packet format.
  *  It could be done over UDP if need be.
  */
@@ -25,20 +26,17 @@
 
 enum
 {
-	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_PHDRSIZE	= 12,	/* pseudo header */
+//	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_RHDRSIZE	= 36,	/* pseudo header + udp header + rudp header */
 	UDP_IPHDR	= 8,	/* ip header */
 	IP_UDPPROTO	= 254,
-	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
-	UDP_USEAD4	= 12,
+	UDP_USEAD7	= 52,	/* size of new ipv6 headers struct */
 
 	Rudprxms	= 200,
 	Rudptickms	= 50,
 	Rudpmaxxmit	= 10,
 	Maxunacked	= 100,
-
 };
 
 #define Hangupgen	0xffffffff	/* used only in hangup messages */
@@ -205,7 +203,7 @@
 		qlock(&rpriv->apl);
 		if(rpriv->ackprocstarted == 0){
 			sprint(kpname, "#I%drudpack", rudp->f->dev);
-			kproc(kpname, relackproc, rudp, 0);
+			kproc(kpname, relackproc, rudp);
 			rpriv->ackprocstarted = 1;
 		}
 		qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
 	qlock(ucb);
 	for(r = ucb->r; r; r = r->next)
 		m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+	m += snprint(state+m, n-m, "\n");
 	qunlock(ucb);
 	return m;
 }
@@ -281,7 +280,7 @@
 	/* force out any delayed acks */
 	ucb = (Rudpcb*)c->ptcl;
 	qlock(ucb);
-	for(r = ucb->r; r; r = r->next){
+	for(r = ucb->r; r != nil; r = r->next){
 		if(r->acksent != r->rcvseq)
 			relsendack(c, r, 0);
 	}
@@ -374,27 +373,10 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-
-		bp->rp += 4;			/* Igonore local port */
-		break;
 	default:
 		ipmove(raddr, c->raddr);
 		ipmove(laddr, c->laddr);
 		rport = c->rport;
-
 		break;
 	}
 
@@ -402,9 +384,6 @@
 
 	/* Make space to fit rudp & ip header */
 	bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
-
 	uh = (Udphdr *)(bp->rp);
 	uh->vihl = IP_VER4;
 
@@ -417,7 +396,6 @@
 	uh->frag[1] = 0;
 	hnputs(uh->udpplen, ptcllen);
 	switch(ucb->headers){
-	case 6:
 	case 7:
 		v6tov4(uh->udpdst, raddr);
 		hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.rudpNoPorts++;
 		qunlock(rudp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	default:
 		/* connection oriented rudp */
 		if(ipcmp(c->raddr, IPnoaddr) == 0){
-			/* save the src address in the conversation */
+			/* reply with the same ip address (if not broadcast) */
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
+			ipmove(c->laddr, laddr);
 		 	ipmove(c->raddr, raddr);
 			c->rport = rport;
-
-			/* reply with the same ip address (if not broadcast) */
-			if(ipforme(f, laddr) == Runi)
-				ipmove(c->laddr, laddr);
-			else
-				v4tov6(c->laddr, ifc->lifc->local);
 		}
 		break;
 	}
-	if(bp->next)
-		bp = concatblock(bp);
 
 	if(qfull(c->rq)) {
-		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
-			laddr, lport);
+		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-	else
-		qpass(c->rq, bp);
-	
 	qunlock(ucb);
 }
 
@@ -629,16 +594,14 @@
 	if(n < 1)
 		return rudpunknown;
 
-	if(strcmp(f[0], "headers++4") == 0){
-		ucb->headers = 7;
+	if(strcmp(f[0], "headers") == 0){
+		ucb->headers = 7;		/* new headers format */
 		return nil;
-	} else if(strcmp(f[0], "headers") == 0){
-		ucb->headers = 6;
-		return nil;
 	} else if(strcmp(f[0], "hangup") == 0){
 		if(n < 3)
 			return "bad syntax";
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			return Ebadip;
 		x = atoi(f[2]);
 		qlock(ucb);
 		relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
 		qunlock(ucb);
 		return nil;
 	} else if(strcmp(f[0], "randdrop") == 0){
-		x = 10;		/* default is 10% */
+		x = 10;			/* default is 10% */
 		if(n > 1)
 			x = atoi(f[1]);
 		if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
 	pdest = nhgets(h->udpdport);
 
 	/* Look for a connection */
-	for(p = rudp->conv; *p; p++) {
-		s = *p;
+	for(p = rudp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -701,12 +665,6 @@
 		upriv->orders);
 }
 
-int
-rudpgc(Proto *rudp)
-{
-	return natgc(rudp->ipproto);
-}
-
 void
 rudpinit(Fs *fs)
 {
@@ -725,9 +683,8 @@
 	rudp->rcv = rudpiput;
 	rudp->advise = rudpadvise;
 	rudp->stats = rudpstats;
-	rudp->gc = rudpgc;
 	rudp->ipproto = IP_UDPPROTO;
-	rudp->nc = 16;
+	rudp->nc = 32;
 	rudp->ptclsize = sizeof(Rudpcb);
 
 	Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
 
 	rudp = (Proto *)a;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Rudptickms);
 
@@ -989,8 +948,6 @@
 	Fs *f;
 
 	bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
 	bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
 	f = c->p->f;
 	uh = (Udphdr *)(bp->rp);
--- a/os/ip/tcp.c
+++ b/os/ip/tcp.c
@@ -41,13 +41,13 @@
 	EOLOPT		= 0,
 	NOOPOPT		= 1,
 	MSSOPT		= 2,
-	MSS_LENGTH	= 4,		/* Mean segment size */
+	MSS_LENGTH	= 4,		/* Maximum segment size */
 	WSOPT		= 3,
 	WS_LENGTH	= 3,		/* Bits to scale window size by */
 	MSL2		= 10,
 	MSPTICK		= 50,		/* Milliseconds per timer tick */
-	DEF_MSS		= 1460,		/* Default mean segment */
-	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
+	DEF_MSS		= 1460,		/* Default maximum segment */
+	DEF_MSS6	= 1220,		/* Default maximum segment (min) for v6 */
 	DEF_RTT		= 500,		/* Default round trip */
 	DEF_KAT		= 120000,	/* Default time (ms) between keep alives */
 	TCP_LISTEN	= 0,		/* Listen connection */
@@ -81,7 +81,13 @@
 	NLHT		= 256,		/* hash table size, must be a power of 2 */
 	LHTMASK		= NLHT-1,
 
-	HaveWS		= 1<<8,
+	/*
+	 * window is 64kb · 2ⁿ
+	 * these factors determine the ultimate bandwidth-delay product.
+	 * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+	 */
+	Maxqscale	= 4,		/* maximum queuing scale */
+	Defadvscale	= 4,		/* default advertisement */
 };
 
 /* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
 	ulong	seq;
 	ulong	ack;
 	uchar	flags;
-	ushort	ws;	/* window scale option (if not zero) */
-	ulong	wnd;
+	uchar	update;
+	ushort	ws;	/* window scale option */
+	ulong	wnd;	/* prescaled window*/
 	ushort	urg;
 	ushort	mss;	/* max segment size option (if not zero) */
 	ushort	len;	/* size of data */
@@ -205,44 +212,53 @@
 		ulong	wnd;		/* Tcp send window */
 		ulong	urg;		/* Urgent data pointer */
 		ulong	wl2;
-		int	scale;		/* how much to right shift window in xmitted packets */
+		uint	scale;		/* how much to right shift window in xmitted packets */
 		/* to implement tahoe and reno TCP */
 		ulong	dupacks;	/* number of duplicate acks rcvd */
+		ulong	partialack;
 		int	recovery;	/* loss recovery flag */
-		ulong	rxt;		/* right window marker for recovery */
+		int	retransmit;	/* retransmit 1 packet @ una flag */
+		int	rto;
+		ulong	rxt;		/* right window marker for recovery "recover" rfc3782 */
 	} snd;
 	struct {
 		ulong	nxt;		/* Receive pointer to next uchar slot */
 		ulong	wnd;		/* Receive window incoming */
+		ulong	wsnt;		/* Last wptr sent.  important to track for large bdp */
+		ulong	wptr;
 		ulong	urg;		/* Urgent pointer */
+		ulong	ackptr;		/* last acked sequence */
 		int	blocked;
-		int	una;		/* unacked data segs */
-		int	scale;		/* how much to left shift window in rcved packets */
+		uint	scale;		/* how much to left shift window in rcv'd packets */
 	} rcv;
 	ulong	iss;			/* Initial sequence number */
-	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
 	ulong	cwind;			/* Congestion window */
-	int	scale;			/* desired snd.scale */
-	ushort	ssthresh;		/* Slow start threshold */
+	ulong	abcbytes;		/* appropriate byte counting rfc 3465 */
+	uint	scale;			/* desired snd.scale */
+	ulong	ssthresh;		/* Slow start threshold */
 	int	resent;			/* Bytes just resent */
 	int	irs;			/* Initial received squence */
-	ushort	mss;			/* Mean segment size */
+	ushort	mss;			/* Maximum segment size */
 	int	rerecv;			/* Overlap of data rerecevived */
-	ulong	window;			/* Recevive window */
+	ulong	window;			/* Our receive window (queue) */
+	uint	qscale;			/* Log2 of our receive window (queue) */
 	uchar	backoff;		/* Exponential backoff counter */
 	int	backedoff;		/* ms we've backed off for rexmits */
 	uchar	flags;			/* State flags */
 	Reseq	*reseq;			/* Resequencing queue */
+	int	nreseq;
+	int	reseqlen;
 	Tcptimer	timer;			/* Activity timer */
 	Tcptimer	acktimer;		/* Acknowledge timer */
 	Tcptimer	rtt_timer;		/* Round trip timer */
 	Tcptimer	katimer;		/* keep alive timer */
 	ulong	rttseq;			/* Round trip sequence */
-	int	srtt;			/* Shortened round trip */
+	int	srtt;			/* Smoothed round trip */
 	int	mdev;			/* Mean deviation of round trip */
 	int	kacounter;		/* count down for keep alive */
 	uint	sndsyntime;		/* time syn sent */
 	ulong	time;			/* time Finwait2 or Syn_received was sent */
+	ulong	timeuna;			/* snd.una when time was set */
 	int	nochecksum;		/* non-zero means don't send checksums */
 	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
 
@@ -285,11 +301,11 @@
 };
 
 int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
-ushort	tcp_mss = DEF_MSS;	/* Maximum segment size to be sent */
 
 enum {
 	/* MIB stats */
 	MaxConn,
+	Mss,
 	ActiveOpens,
 	PassiveOpens,
 	EstabResets,
@@ -297,6 +313,7 @@
 	InSegs,
 	OutSegs,
 	RetransSegs,
+	RetransSegsSent,
 	RetransTimeouts,
 	InErrs,
 	OutRsts,
@@ -305,14 +322,27 @@
 	CsumErrs,
 	HlenErrs,
 	LenErrs,
+	Resequenced,
 	OutOfOrder,
+	ReseqBytelim,
+	ReseqPktlim,
+	Delayack,
+	Wopenack,
 
+	Recovery,
+	RecoveryDone,
+	RecoveryRTO,
+	RecoveryNoSeq,
+	RecoveryCwind,
+	RecoveryPA,
+
 	Nstats
 };
 
-static char *statnames[] =
+static char *statnames[Nstats] =
 {
 [MaxConn]	"MaxConn",
+[Mss]		"MaxSegment",
 [ActiveOpens]	"ActiveOpens",
 [PassiveOpens]	"PassiveOpens",
 [EstabResets]	"EstabResets",
@@ -320,6 +350,7 @@
 [InSegs]	"InSegs",
 [OutSegs]	"OutSegs",
 [RetransSegs]	"RetransSegs",
+[RetransSegsSent]	"RetransSegsSent",
 [RetransTimeouts]	"RetransTimeouts",
 [InErrs]	"InErrs",
 [OutRsts]	"OutRsts",
@@ -327,6 +358,19 @@
 [HlenErrs]	"HlenErrs",
 [LenErrs]	"LenErrs",
 [OutOfOrder]	"OutOfOrder",
+[Resequenced]	"Resequenced",
+[ReseqBytelim]	"ReseqBytelim",
+[ReseqPktlim]	"ReseqPktlim",
+[Delayack]	"Delayack",
+[Wopenack]	"Wopenack",
+
+[Recovery]	"Recovery",
+[RecoveryDone]	"RecoveryDone",
+[RecoveryRTO]	"RecoveryRTO",
+
+[RecoveryNoSeq]	"RecoveryNoSeq",
+[RecoveryCwind]	"RecoveryCwind",
+[RecoveryPA]	"RecoveryPA",
 };
 
 typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
 	QLock	apl;
 	int	ackprocstarted;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 };
 
 /*
@@ -356,34 +400,34 @@
  *  of DoS attack.
  *
  *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
- *  it that number gets acked by the other end, we shut down the connection.
- *  Look for tcpporthogedefense in the code.
+ *  that number gets acked by the other end, we shut down the connection.
+ *  Look for tcpporthogdefense in the code.
  */
 int tcpporthogdefense = 0;
 
-int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void	localclose(Conv*, char*);
-void	procsyn(Conv*, Tcp*);
-void	tcpiput(Proto*, Ipifc*, Block*);
-void	tcpoutput(Conv*);
-int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void	tcpstart(Conv*, int);
-void	tcptimeout(void*);
-void	tcpsndsyn(Conv*, Tcpctl*);
-void	tcprcvwin(Conv*);
-void	tcpacktimer(void*);
-void	tcpkeepalive(void*);
-void	tcpsetkacounter(Tcpctl*);
-void	tcprxmit(Conv*);
-void	tcpsettimer(Tcpctl*);
-void	tcpsynackrtt(Conv*);
-void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	int	addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static	int	dumpreseq(Tcpctl*);
+static	void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static	void	limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static	void	limborexmit(Proto*);
+static	void	localclose(Conv*, char*);
+static	void	procsyn(Conv*, Tcp*);
+static	void	tcpacktimer(void*);
+static	void	tcpiput(Proto*, Ipifc*, Block*);
+static	void	tcpkeepalive(void*);
+static	void	tcpoutput(Conv*);
+static	void	tcprcvwin(Conv*);
+static	void	tcprxmit(Conv*);
+static	void	tcpsetkacounter(Tcpctl*);
+static	void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	void	tcpsettimer(Tcpctl*);
+static	void	tcpsndsyn(Conv*, Tcpctl*);
+static	void	tcpstart(Conv*, int);
+static	void	tcpsynackrtt(Conv*);
+static	void	tcptimeout(void*);
+static	int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
 
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
 tcpsetstate(Conv *s, uchar newstate)
 {
 	Tcpctl *tcb;
@@ -403,11 +447,6 @@
 	if(newstate == Established)
 		tpriv->stats[CurrEstab]++;
 
-	/**
-	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
-		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
-	**/
-
 	switch(newstate) {
 	case Closed:
 		qclose(s->rq);
@@ -430,7 +469,12 @@
 tcpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -447,12 +491,14 @@
 	s = (Tcpctl*)(c->ptcl);
 
 	return snprint(state, n,
-		"%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+		"%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
 		tcpstates[s->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
-		s->srtt, s->mdev,
-		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+		s->nreseq, s->reseqlen,
+		s->srtt, s->mdev, s->ssthresh,
+		s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+		s->qscale,
 		s->timer.start, s->timer.count, s->rerecv,
 		s->katimer.start, s->katimer.count);
 }
@@ -470,7 +516,12 @@
 tcpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdannounce(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -524,7 +575,7 @@
 	}
 }
 
-void
+static void
 tcpkick(void *x)
 {
 	Conv *s = x;
@@ -546,7 +597,6 @@
 		/*
 		 * Push data
 		 */
-		tcprcvwin(s);
 		tcpoutput(s);
 		break;
 	default:
@@ -558,7 +608,9 @@
 	poperror();
 }
 
-void
+static int seq_lt(ulong, ulong);
+
+static void
 tcprcvwin(Conv *s)				/* Call with tcb locked */
 {
 	int w;
@@ -568,12 +620,20 @@
 	w = tcb->window - qlen(s->rq);
 	if(w < 0)
 		w = 0;
-	tcb->rcv.wnd = w;
-	if(w == 0)
+	/* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+	if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+		w = tcb->rcv.wptr - tcb->rcv.nxt;
+	if(w != tcb->rcv.wnd)
+	if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
 		tcb->rcv.blocked = 1;
+		netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+			tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+	}
+	tcb->rcv.wnd = w;
+	tcb->rcv.wptr = tcb->rcv.nxt + w;
 }
 
-void
+static void
 tcpacktimer(void *v)
 {
 	Tcpctl *tcb;
@@ -589,7 +649,6 @@
 	qlock(s);
 	if(tcb->state != Closed){
 		tcb->flags |= FORCE;
-		tcprcvwin(s);
 		tcpoutput(s);
 	}
 	qunlock(s);
@@ -597,10 +656,52 @@
 }
 
 static void
+tcpcongestion(Tcpctl *tcb)
+{
+	ulong inflight;
+
+	inflight = tcb->snd.nxt - tcb->snd.una;
+	if(inflight > tcb->cwind)
+		inflight = tcb->cwind;
+	tcb->ssthresh = inflight / 2;
+	if(tcb->ssthresh < 2*tcb->mss)
+		tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+	L		= 2,		/* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+	uint limit;
+
+	tcb->abcbytes += acked;
+	if(tcb->cwind < tcb->ssthresh){
+		/* slow start */
+		if(tcb->snd.rto)
+			limit = 1*tcb->mss;
+		else
+			limit = L*tcb->mss;
+		tcb->cwind += MIN(tcb->abcbytes, limit);
+		tcb->abcbytes = 0;
+	}
+	else{
+		tcb->snd.rto = 0;
+		/* avoidance */
+		if(tcb->abcbytes >= tcb->cwind){
+			tcb->abcbytes -= tcb->cwind;
+			tcb->cwind += tcb->mss;
+		}
+	}
+}
+
+static void
 tcpcreate(Conv *c)
 {
 	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
-	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+	c->wq = qopen(QMAX, Qkick, tcpkick, c);
 }
 
 static void
@@ -608,7 +709,7 @@
 {
 	if(newstate != TcptimerON){
 		if(t->state == TcptimerON){
-			// unchain
+			/* unchain */
 			if(priv->timers == t){
 				priv->timers = t->next;
 				if(t->prev != nil)
@@ -622,7 +723,7 @@
 		}
 	} else {
 		if(t->state != TcptimerON){
-			// chain
+			/* chain */
 			if(t->prev != nil || t->next != nil)
 				panic("timerstate2");
 			t->prev = nil;
@@ -635,7 +736,7 @@
 	t->state = newstate;
 }
 
-void
+static void
 tcpackproc(void *a)
 {
 	Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
 	tcp = a;
 	priv = tcp->priv;
 
+	while(waserror())
+		;
+
 	for(;;) {
 		tsleep(&up->sleep, return0, 0, MSPTICK);
 
@@ -681,7 +785,7 @@
 	}
 }
 
-void
+static void
 tcpgo(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
 	qunlock(&priv->tl);
 }
 
-void
+static void
 tcphalt(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil)
@@ -704,17 +808,16 @@
 	qunlock(&priv->tl);
 }
 
-int
+static int
 backoff(int n)
 {
 	return 1 << n;
 }
 
-void
+static void
 localclose(Conv *s, char *reason)	/* called with tcb locked */
 {
 	Tcpctl *tcb;
-	Reseq *rp,*rp1;
 	Tcppriv *tpriv;
 
 	tpriv = s->p->priv;
@@ -728,12 +831,7 @@
 	tcphalt(tpriv, &tcb->katimer);
 
 	/* Flush reassembly queue; nothing more can arrive */
-	for(rp = tcb->reseq; rp != nil; rp = rp1) {
-		rp1 = rp->next;
-		freeblist(rp->bp);
-		free(rp);
-	}
-	tcb->reseq = nil;
+	dumpreseq(tcb);
 
 	if(tcb->state == Syn_sent)
 		Fsconnected(s, reason);
@@ -747,45 +845,46 @@
 }
 
 /* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
 {
 	Ipifc *ifc;
 	int mtu;
 
-	ifc = findipifc(tcp->f, addr, 0);
-	switch(version){
-	default:
-	case V4:
-		mtu = DEF_MSS;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
-		break;
-	case V6:
-		mtu = DEF_MSS6;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
-		break;
-	}
-	if(ifc != nil){
-		if(ifc->mbps > 100)
-			*scale = HaveWS | 3;
-		else if(ifc->mbps > 10)
-			*scale = HaveWS | 1;
-		else
-			*scale = HaveWS | 0;
-	} else
-		*scale = HaveWS | 0;
+	/*
+	 * set the ws.  it doesn't commit us to anything.
+	 * ws is the ultimate limit to the bandwidth-delay product.
+	 */
+	*scale = Defadvscale;
 
-	return mtu;
+	/*
+	 * currently we do not implement path MTU discovery
+	 * so use interface MTU *only* if directly reachable
+	 * or when we use V4 which allows routers to fragment.
+	 * otherwise, we use the default MSS which assumes a
+	 * safe minimum MTU of 1280 bytes for V6.
+	 */  
+	if(r != nil && (ifc = r->ifc) != nil){
+		mtu = ifc->maxtu - ifc->m->hsize;
+		if(version == V4)
+			return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+		mtu -= TCP6_PKT + TCP6_HDRSIZE;
+		if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+			return mtu;
+	}
+	if(version == V6)
+		return DEF_MSS6;
+	else
+		return DEF_MSS;
 }
 
-void
+static void
 inittcpctl(Conv *s, int mode)
 {
 	Tcpctl *tcb;
 	Tcp4hdr* h4;
 	Tcp6hdr* h6;
+	Tcppriv *tpriv;
 	int mss;
 
 	tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
 
 	memset(tcb, 0, sizeof(Tcpctl));
 
-	tcb->ssthresh = 65535;
+	tcb->ssthresh = QMAX;			/* reset by tcpsetscale() */
 	tcb->srtt = tcp_irtt<<LOGAGAIN;
 	tcb->mdev = 0;
 
@@ -841,19 +940,18 @@
 	}
 
 	tcb->mss = tcb->cwind = mss;
+	tcb->abcbytes = 0;
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* default is no window scaling */
-	tcb->window = QMAX;
-	tcb->rcv.wnd = QMAX;
-	tcb->rcv.scale = 0;
-	tcb->snd.scale = 0;
-	qsetlimit(s->rq, QMAX);
+	tcpsetscale(s, tcb, 0, 0);
 }
 
 /*
  *  called with s qlocked
  */
-void
+static void
 tcpstart(Conv *s, int mode)
 {
 	Tcpctl *tcb;
@@ -865,8 +963,8 @@
 	if(tpriv->ackprocstarted == 0){
 		qlock(&tpriv->apl);
 		if(tpriv->ackprocstarted == 0){
-			sprint(kpname, "#I%dtcpack", s->p->f->dev);
-			kproc(kpname, tcpackproc, s->p, 0);
+			snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+			kproc(kpname, tcpackproc, s->p);
 			tpriv->ackprocstarted = 1;
 		}
 		qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
 }
 
 static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
 {
-	static char buf[128];
+	char *p;
 
-	sprint(buf, "%d", flag>>10);	/* Head len */
+	p = seprint(buf, e, "%d", flag>>10);	/* Head len */
 	if(flag & URG)
-		strcat(buf, " URG");
+		p = seprint(p, e, " URG");
 	if(flag & ACK)
-		strcat(buf, " ACK");
+		p = seprint(p, e, " ACK");
 	if(flag & PSH)
-		strcat(buf, " PSH");
+		p = seprint(p, e, " PSH");
 	if(flag & RST)
-		strcat(buf, " RST");
+		p = seprint(p, e, " RST");
 	if(flag & SYN)
-		strcat(buf, " SYN");
+		p = seprint(p, e, " SYN");
 	if(flag & FIN)
-		strcat(buf, " FIN");
-
+		p = seprint(p, e, " FIN");
+	USED(p);
 	return buf;
 }
 
-Block *
+static Block*
 htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -940,14 +1038,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP6_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP6_PKT;
 	}
 
@@ -1000,7 +1094,7 @@
 	return data;
 }
 
-Block *
+static Block*
 htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -1013,7 +1107,7 @@
 	if(tcph->flags & SYN){
 		if(tcph->mss)
 			hdrlen += MSS_LENGTH;
-		if(tcph->ws)
+		if(1)
 			hdrlen += WS_LENGTH;
 		optpad = hdrlen & 3;
 		if(optpad)
@@ -1024,14 +1118,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP4_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP4_PKT;
 	}
 
@@ -1055,7 +1145,8 @@
 			hnputs(opt, tcph->mss);
 			opt += 2;
 		}
-		if(tcph->ws != 0){
+		/* always offer.  rfc1323 §2.2 */
+		if(1){
 			*opt++ = WSOPT;
 			*opt++ = WS_LENGTH;
 			*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
 	return data;
 }
 
-int
+static int
 ntohtcp6(Tcp *tcph, Block **bpp)
 {
 	Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->ploadlen) - hdrlen;
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1136,7 +1228,7 @@
 	return hdrlen;
 }
 
-int
+static int
 ntohtcp4(Tcp *tcph, Block **bpp)
 {
 	Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1200,16 +1293,19 @@
 }
 
 /*
- *  For outgiing calls, generate an initial sequence
+ *  For outgoing calls, generate an initial sequence
  *  number and put a SYN on the send queue
  */
-void
+static void
 tcpsndsyn(Conv *s, Tcpctl *tcb)
 {
+	Tcppriv *tpriv;
+
 	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
 	tcb->rttseq = tcb->iss;
 	tcb->snd.wl2 = tcb->iss;
 	tcb->snd.una = tcb->iss;
+	tcb->snd.rxt = tcb->iss;
 	tcb->snd.ptr = tcb->rttseq;
 	tcb->snd.nxt = tcb->rttseq;
 	tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
 	tcb->sndsyntime = NOW;
 
 	/* set desired mss and scale */
-	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+	tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 }
 
 void
@@ -1229,7 +1327,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 
-	netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+	netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
 
 	tpriv = tcp->priv;
 
@@ -1307,7 +1405,7 @@
  *  send a reset to the remote side and close the conversation
  *  called with s qlocked
  */
-char*
+static char*
 tcphangup(Conv *s)
 {
 	Tcp seg;
@@ -1322,7 +1420,7 @@
 			memset(&seg, 0, sizeof seg);
 			seg.flags = RST | ACK;
 			seg.ack = tcb->rcv.nxt;
-			tcb->rcv.una = 0;
+			tcb->rcv.ackptr = seg.ack;
 			seg.seq = tcb->snd.ptr;
 			seg.wnd = 0;
 			seg.urg = 0;
@@ -1353,7 +1451,7 @@
 /*
  *  (re)send a SYN ACK
  */
-int
+static int
 sndsynack(Proto *tcp, Limbo *lp)
 {
 	Block *hbp;
@@ -1360,7 +1458,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 	Tcp seg;
-	int scale;
+	uint scale;
 
 	/* make pseudo header */
 	switch(lp->version) {
@@ -1388,11 +1486,12 @@
 		panic("sndrst: version %d", lp->version);
 	}
 
+	memset(&seg, 0, sizeof seg);
 	seg.seq = lp->iss;
 	seg.ack = lp->irs+1;
 	seg.flags = SYN|ACK;
 	seg.urg = 0;
-	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+	seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
 	seg.wnd = QMAX;
 
 	/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
 	}
 }
 
+static void
+initialwindow(Tcpctl *tcb)
+{
+	/* RFC 3390 initial window */
+	if(tcb->mss < 1095)
+		tcb->cwind = 4*tcb->mss;
+	else if(tcb->mss < 2190)
+		tcb->cwind = 4380;
+	else
+		tcb->cwind = 2*tcb->mss;
+}
+
 /*
  *  come here when we finally get an ACK to our SYN-ACK.
  *  lookup call in limbo.  if found, create a new conversation
@@ -1596,7 +1707,7 @@
 	/* find a call in limbo */
 	h = hashipa(src, segp->source);
 	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
-		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+		netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
 			src, segp->source, lp->raddr, lp->rport,
 			dst, segp->dest, lp->laddr, lp->lport,
 			version, lp->version
@@ -1611,7 +1722,7 @@
 
 		/* we're assuming no data with the initial SYN */
 		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
-			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
 				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
 			lp = nil;
 		} else {
@@ -1641,6 +1752,8 @@
 
 	tcb->irs = lp->irs;
 	tcb->rcv.nxt = tcb->irs+1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 
 	tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
 	tcb->snd.una = tcb->iss+1;
 	tcb->snd.ptr = tcb->iss+1;
 	tcb->snd.nxt = tcb->iss+1;
+	tcb->snd.rxt = tcb->iss+1;
 	tcb->flgcnt = 0;
 	tcb->flags |= SYNACK;
 
+	/* set desired mss and scale */
+	tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
 	/* our sending max segment size cannot be bigger than what he asked for */
 	if(lp->mss != 0 && lp->mss < tcb->mss)
 		tcb->mss = lp->mss;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* window scaling */
 	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
 
-	/* the congestion window always starts out as a single segment */
+	/* congestion window */
 	tcb->snd.wnd = segp->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 
 	/* set initial round trip time */
 	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
 	return new;
 }
 
-int
+static int
 seq_within(ulong x, ulong low, ulong high)
 {
 	if(low <= high){
@@ -1714,25 +1832,25 @@
 	return 0;
 }
 
-int
+static int
 seq_lt(ulong x, ulong y)
 {
 	return (int)(x-y) < 0;
 }
 
-int
+static int
 seq_le(ulong x, ulong y)
 {
 	return (int)(x-y) <= 0;
 }
 
-int
+static int
 seq_gt(ulong x, ulong y)
 {
 	return (int)(x-y) > 0;
 }
 
-int
+static int
 seq_ge(ulong x, ulong y)
 {
 	return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
  *  use the time between the first SYN and it's ack as the
  *  initial round trip time
  */
-void
+static void
 tcpsynackrtt(Conv *s)
 {
 	Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
 	tcphalt(tpriv, &tcb->rtt_timer);
 }
 
-void
+static void
 update(Conv *s, Tcp *seg)
 {
 	int rtt, delta;
 	Tcpctl *tcb;
 	ulong acked;
-	ulong expand;
 	Tcppriv *tpriv;
 
+	if(seg->update)
+		return;
+	seg->update = 1;
+
 	tpriv = s->p->priv;
 	tcb = (Tcpctl*)s->ptcl;
 
-	/* if everything has been acked, force output(?) */
-	if(seq_gt(seg->ack, tcb->snd.nxt)) {
-		tcb->flags |= FORCE;
-		return;
+	/* catch zero-window updates, update window & recover */
+	if(tcb->snd.wnd == 0 && seg->wnd > 0)
+	if(seq_lt(seg->ack,  tcb->snd.ptr)){
+		netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+			seg->ack,  tcb->snd.una, tcb->snd.ptr, seg->wnd);
+		tcb->snd.wnd = seg->wnd;
+		goto recovery;
 	}
 
-	/* added by Dong Lin for fast retransmission */
-	if(seg->ack == tcb->snd.una
-	&& tcb->snd.una != tcb->snd.nxt
-	&& seg->len == 0
-	&& seg->wnd == tcb->snd.wnd) {
-
-		/* this is a pure ack w/o window update */
-		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
-			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
-		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
-			/*
-			 *  tahoe tcp rxt the packet, half sshthresh,
- 			 *  and set cwnd to one packet
-			 */
+	/* newreno fast retransmit */
+	if(seg->ack == tcb->snd.una)
+	if(tcb->snd.una != tcb->snd.nxt)
+	if(++tcb->snd.dupacks == 3){
+recovery:
+		if(tcb->snd.recovery){
+			tpriv->stats[RecoveryCwind]++;
+			tcb->cwind += tcb->mss;
+		}else if(seq_le(tcb->snd.rxt, seg->ack)){
+			tpriv->stats[Recovery]++;
+			tcb->abcbytes = 0;
 			tcb->snd.recovery = 1;
+			tcb->snd.partialack = 0;
 			tcb->snd.rxt = tcb->snd.nxt;
-			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+			tcpcongestion(tcb);
+			tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+			netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+				tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
 			tcprxmit(s);
-		} else {
-			/* do reno tcp here. */
+		}else{
+			tpriv->stats[RecoveryNoSeq]++;
+			netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+				tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+			/* do not enter fast retransmit */
+			/* do not change ssthresh */
 		}
+	}else if(tcb->snd.recovery){
+		tpriv->stats[RecoveryCwind]++;
+		tcb->cwind += tcb->mss;
 	}
 
 	/*
@@ -1807,6 +1938,9 @@
 	 */
 	if(seq_gt(seg->ack, tcb->snd.wl2)
 	|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+		/* clear dupack if we advance wl2 */
+		if(tcb->snd.wl2 != seg->ack)
+			tcb->snd.dupacks = 0;
 		tcb->snd.wnd = seg->wnd;
 		tcb->snd.wl2 = seg->ack;
 	}
@@ -1816,22 +1950,11 @@
 		 *  don't let us hangup if sending into a closed window and
 		 *  we're still getting acks
 		 */
-		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
 			tcb->backedoff = MAXBACKMS/4;
-		}
 		return;
 	}
 
-	/*
-	 *  any positive ack turns off fast rxt,
-	 *  (should we do new-reno on partial acks?)
-	 */
-	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
-		tcb->snd.dupacks = 0;
-		tcb->snd.recovery = 0;
-	} else
-		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
 	/* Compute the new send window size */
 	acked = seg->ack - tcb->snd.una;
 
@@ -1843,24 +1966,41 @@
 		goto done;
 	}
 
-	/* slow start as long as we're not recovering from lost packets */
-	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
-		if(tcb->cwind < tcb->ssthresh) {
-			expand = tcb->mss;
-			if(acked < expand)
-				expand = acked;
+	/*
+	 *  congestion control
+	 */
+	if(tcb->snd.recovery){
+		if(seq_ge(seg->ack, tcb->snd.rxt)){
+			/* recovery finished; deflate window */
+			tpriv->stats[RecoveryDone]++;
+			tcb->snd.dupacks = 0;
+			tcb->snd.recovery = 0;
+			tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+			if(tcb->ssthresh < tcb->cwind)
+				tcb->cwind = tcb->ssthresh;
+			netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+				tcb->cwind, tcb->ssthresh);
+		} else {
+			/* partial ack; we lost more than one segment */
+			tpriv->stats[RecoveryPA]++;
+			if(tcb->cwind > acked)
+				tcb->cwind -= acked;
+			else{
+				netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+				tcb->cwind = tcb->mss;
+			}
+			netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+				acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+			if(acked >= tcb->mss)
+				tcb->cwind += tcb->mss;
+			tcb->snd.partialack++;
 		}
-		else
-			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+	} else
+		tcpabcincr(tcb, acked);
 
-		if(tcb->cwind + expand < tcb->cwind)
-			expand = tcb->snd.wnd - tcb->cwind;
-		if(tcb->cwind + expand > tcb->snd.wnd)
-			expand = tcb->snd.wnd - tcb->cwind;
-		tcb->cwind += expand;
-	}
-
 	/* Adjust the timers according to the round trip time */
+	/* todo: fix sloppy treatment of overflow cases here. */
 	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
 		tcphalt(tpriv, &tcb->rtt_timer);
 		if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
 done:
 	if(qdiscard(s->wq, acked) < acked)
 		tcb->flgcnt--;
-
 	tcb->snd.una = seg->ack;
+
+	/* newreno fast recovery */
+	if(tcb->snd.recovery)
+		tcprxmit(s);
+
 	if(seq_gt(seg->ack, tcb->snd.urg))
 		tcb->snd.urg = seg->ack;
 
-	if(tcb->snd.una != tcb->snd.nxt)
-		tcpgo(tpriv, &tcb->timer);
+	if(tcb->snd.una != tcb->snd.nxt){
+		/* “impatient” variant */
+		if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+			tcb->time = NOW;
+			tcb->timeuna = tcb->snd.una;
+			tcpgo(tpriv, &tcb->timer);
+		}
+	}
 	else
 		tcphalt(tpriv, &tcb->timer);
 
@@ -1904,12 +2054,13 @@
 	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
 		tcb->snd.ptr = tcb->snd.una;
 
-	tcb->flags &= ~RETRAN;
+	if(!tcb->snd.recovery)
+		tcb->flags &= ~RETRAN;
 	tcb->backoff = 0;
 	tcb->backedoff = 0;
 }
 
-void
+static void
 tcpiput(Proto *tcp, Ipifc*, Block *bp)
 {
 	Tcp seg;
@@ -1917,7 +2068,7 @@
 	Tcp6hdr *h6;
 	int hdrlen;
 	Tcpctl *tcb;
-	ushort length;
+	ushort length, csum;
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	Conv *s;
 	Fs *f;
@@ -1980,10 +2131,12 @@
 		h6->ttl = proto;
 		hnputl(h6->vcf, length);
 		if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
-			ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+		    (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
 			tpriv->stats[CsumErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp proto cksum\n");
+			netlog(f, Logtcp,
+			    "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+				h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
 			freeblist(bp);
 			return;
 		}
@@ -1995,7 +2148,7 @@
 		if(hdrlen < 0){
 			tpriv->stats[HlenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp hdr len\n");
+			netlog(f, Logtcp, "bad tcpv6 hdr len\n");
 			return;
 		}
 
@@ -2005,7 +2158,7 @@
 		if(bp == nil){
 			tpriv->stats[LenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "tcp len < 0 after trim\n");
+			netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
 			return;
 		}
 	}
@@ -2016,7 +2169,8 @@
 	/* Look for a matching conversation */
 	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
 	if(s == nil){
-		netlog(f, Logtcp, "iphtlook failed");
+		netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+			source, seg.source, dest, seg.dest);
 reset:
 		qunlock(tcp);
 		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
 	}
 
 	/* Cut the data to fit the receive window */
+	tcprcvwin(s);
 	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
-		netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+		if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+		netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n", 
+			seg.seq, seg.seq + length - 1, 
+			tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
 		update(s, &seg);
 		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
 			tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
 	if(seg.seq != tcb->rcv.nxt)
 	if(length != 0 || (seg.flags & (SYN|FIN))) {
 		update(s, &seg);
-		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+		if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
 			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
-		tcb->flags |= FORCE;
+		tcb->flags |= FORCE;		/* force duplicate ack; RFC 5681 §3.2 */
 		goto output;
 	}
 
+	if(tcb->nreseq > 0)
+		tcb->flags |= FORCE;		/* filled hole in sequence space; RFC 5681 §3.2 */
+
 	/*
 	 *  keep looping till we've processed this packet plus any
 	 *  adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
 				goto raise;
 			}
 		case Time_wait:
-			tcb->flags |= FORCE;
+			if(seg.flags & FIN)
+				tcb->flags |= FORCE;
 			if(tcb->timer.state != TcptimerON)
 				tcpgo(tpriv, &tcb->timer);
 		}
@@ -2272,34 +2434,12 @@
 				 * receive queue
 				 */
 				if(bp) {
-					bp = packblock(bp);
-					if(bp == nil)
-						panic("tcp packblock");
-					qpassnolim(s->rq, bp);
+					qpassnolim(s->rq, packblock(bp));
 					bp = nil;
-
-					/*
-					 *  Force an ack every 2 data messages.  This is
-					 *  a hack for rob to make his home system run
-					 *  faster.
-					 *
-					 *  this also keeps the standard TCP congestion
-					 *  control working since it needs an ack every
-					 *  2 max segs worth.  This is not quite that,
-					 *  but under a real stream is equivalent since
-					 *  every packet has a max seg in it.
-					 */
-					if(++(tcb->rcv.una) >= 2)
-						tcb->flags |= FORCE;
 				}
 				tcb->rcv.nxt += length;
 
 				/*
-				 *  update our rcv window
-				 */
-				tcprcvwin(s);
-
-				/*
 				 *  turn on the acktimer if there's something
 				 *  to ack
 				 */
@@ -2373,8 +2513,11 @@
 
 			getreseq(tcb, &seg, &bp, &length);
 
-			if(tcptrim(tcb, &seg, &bp, &length) == 0)
+			tcprcvwin(s);
+			if(tcptrim(tcb, &seg, &bp, &length) == 0){
+				tcb->flags |= FORCE;
 				break;
+			}
 		}
 	}
 output:
@@ -2394,15 +2537,15 @@
  *  the lock to ipoput the packet so some care has to be
  *  taken by callers.
  */
-void
+static void
 tcpoutput(Conv *s)
 {
 	Tcp seg;
-	int msgs;
+	uint msgs;
 	Tcpctl *tcb;
 	Block *hbp, *bp;
-	int sndcnt, n;
-	ulong ssize, dsize, usable, sent;
+	int sndcnt;
+	ulong ssize, dsize, sent;
 	Fs *f;
 	Tcppriv *tpriv;
 	uchar version;
@@ -2411,9 +2554,26 @@
 	tpriv = s->p->priv;
 	version = s->ipversion;
 
-	for(msgs = 0; msgs < 100; msgs++) {
-		tcb = (Tcpctl*)s->ptcl;
+	tcb = (Tcpctl*)s->ptcl;
 
+	/* force ack every 2*mss */
+	if((tcb->flags & FORCE) == 0)
+	if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+		tpriv->stats[Delayack]++;
+		tcb->flags |= FORCE;
+	}
+
+	/* force ack if window opening */
+	if(0)
+	if((tcb->flags & FORCE) == 0){
+		tcprcvwin(s);
+		if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+			tpriv->stats[Wopenack]++;
+			tcb->flags |= FORCE;
+		}
+	}
+
+	for(msgs = 0; msgs < 100; msgs++) {
 		switch(tcb->state) {
 		case Listen:
 		case Closed:
@@ -2421,7 +2581,12 @@
 			return;
 		}
 
+		/* Don't send anything else until our SYN has been acked */
+		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+			break;
+
 		/* force an ack when a window has opened up */
+		tcprcvwin(s);
 		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
 			tcb->rcv.blocked = 0;
 			tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
 
 		sndcnt = qlen(s->wq)+tcb->flgcnt;
 		sent = tcb->snd.ptr - tcb->snd.una;
-
-		/* Don't send anything else until our SYN has been acked */
-		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
-			break;
-
-		/* Compute usable segment based on offered window and limit
-		 * window probes to one
-		 */
+		ssize = sndcnt;
 		if(tcb->snd.wnd == 0){
-			if(sent != 0) {
-				if((tcb->flags&FORCE) == 0)
-					break;
-//				tcb->snd.ptr = tcb->snd.una;
+			/* zero window probe */
+			if(sent > 0)
+			if(!(tcb->flags & FORCE))
+				break;	/* already probing, rto re-probes */
+			if(ssize < sent)
+				ssize = 0;
+			else{
+				ssize -= sent;
+				if(ssize > 0)
+					ssize = 1;
 			}
-			usable = 1;
+		} else {
+			/* calculate usable segment size */
+			if(ssize > tcb->cwind)
+				ssize = tcb->cwind;
+			if(ssize > tcb->snd.wnd)
+				ssize = tcb->snd.wnd;
+
+			if(ssize < sent)
+				ssize = 0;
+			else {
+				ssize -= sent;
+				if(ssize > tcb->mss)
+					ssize = tcb->mss;
+			}
 		}
-		else {
-			usable = tcb->cwind;
-			if(tcb->snd.wnd < usable)
-				usable = tcb->snd.wnd;
-			usable -= sent;
-		}
-		ssize = sndcnt-sent;
-		if(ssize && usable < 2)
-			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
-				tcb->snd.wnd, tcb->cwind);
-		if(usable < ssize)
-			ssize = usable;
-		if(tcb->mss < ssize)
-			ssize = tcb->mss;
+
 		dsize = ssize;
 		seg.urg = 0;
 
-		if(ssize == 0)
-		if((tcb->flags&FORCE) == 0)
-			break;
+		if(!(tcb->flags & FORCE)){
+			if(ssize == 0)
+				break;
+			if(ssize < tcb->mss)
+			if(tcb->snd.nxt == tcb->snd.ptr)
+			if(sent > TCPREXMTTHRESH*tcb->mss)
+				break;
+		}
 
 		tcb->flags &= ~FORCE;
-		tcprcvwin(s);
 
 		/* By default we will generate an ack */
 		tcphalt(tpriv, &tcb->acktimer);
-		tcb->rcv.una = 0;
 		seg.source = s->lport;
 		seg.dest = s->rport;
 		seg.flags = ACK;
 		seg.mss = 0;
 		seg.ws = 0;
+		seg.update = 0;
 		switch(tcb->state){
 		case Syn_sent:
 			seg.flags = 0;
@@ -2516,20 +2684,9 @@
 			}
 		}
 
-		if(sent+dsize == sndcnt)
+		if(sent+dsize == sndcnt && dsize)
 			seg.flags |= PSH;
 
-		/* keep track of balance of resent data */
-		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
-			n = tcb->snd.nxt - tcb->snd.ptr;
-			if(ssize < n)
-				n = ssize;
-			tcb->resent += n;
-			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
-				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
-			tpriv->stats[RetransSegs]++;
-		}
-
 		tcb->snd.ptr += ssize;
 
 		/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
 		 * expect acknowledges
 		 */
 		if(ssize != 0){
-			if(tcb->timer.state != TcptimerON)
+			if(tcb->timer.state != TcptimerON){
+				tcb->time = NOW;
+				tcb->timeuna = tcb->snd.una;
 				tcpgo(tpriv, &tcb->timer);
+			}
 
 			/*  If round trip timer isn't running, start it.
 			 *  measure the longest packet only in case the
 			 *  transmission time dominates RTT
 			 */
+			if(tcb->snd.retransmit == 0)
 			if(tcb->rtt_timer.state != TcptimerON)
 			if(ssize == tcb->mss) {
 				tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
 		}
 
 		tpriv->stats[OutSegs]++;
+		if(tcb->snd.retransmit)
+			tpriv->stats[RetransSegsSent]++;
+		tcb->rcv.ackptr = seg.ack;
+		tcb->rcv.wsnt = tcb->rcv.wptr;
 
 		/* put off the next keep alive */
 		tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
 		default:
 			panic("tcpoutput2: version %d", version);
 		}
-		if((msgs%4) == 1){
+		if((msgs%4) == 3){
 			qunlock(s);
-			sched();
 			qlock(s);
 		}
 	}
@@ -2611,7 +2775,7 @@
 /*
  *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
  */
-void
+static void
 tcpsendka(Conv *s)
 {
 	Tcp seg;
@@ -2621,6 +2785,7 @@
 	tcb = (Tcpctl*)s->ptcl;
 
 	dbp = nil;
+	memset(&seg, 0, sizeof seg);
 	seg.urg = 0;
 	seg.source = s->lport;
 	seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
 	else
 		seg.seq = tcb->snd.una-1;
 	seg.ack = tcb->rcv.nxt;
-	tcb->rcv.una = 0;
+	tcb->rcv.ackptr = seg.ack;
+	tcprcvwin(s);
 	seg.wnd = tcb->rcv.wnd;
 	if(tcb->state == Finwait2){
 		seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
 /*
  *  set connection to time out after 12 minutes
  */
-void
+static void
 tcpsetkacounter(Tcpctl *tcb)
 {
 	tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
  *  if we've timed out, close the connection
  *  otherwise, send a keepalive and restart the timer
  */
-void
+static void
 tcpkeepalive(void *v)
 {
 	Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
 /*
  *  start keepalive timer
  */
-char*
+static char*
 tcpstartka(Conv *s, char **f, int n)
 {
 	Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
 /*
  *  turn checksums on/off
  */
-char*
+static char*
 tcpsetchecksum(Conv *s, char **f, int)
 {
 	Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
 	return nil;
 }
 
-void
+/*
+ *  retransmit (at most) one segment at snd.una.
+ *  preserve cwind & snd.ptr
+ */
+static void
 tcprxmit(Conv *s)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	ulong tcwind, tptr;
 
 	tcb = (Tcpctl*)s->ptcl;
-
 	tcb->flags |= RETRAN|FORCE;
-	tcb->snd.ptr = tcb->snd.una;
 
-	/*
-	 *  We should be halving the slow start threshhold (down to one
-	 *  mss) but leaving it at mss seems to work well enough
-	 */
- 	tcb->ssthresh = tcb->mss;
-
-	/*
-	 *  pull window down to a single packet
-	 */
+	tptr = tcb->snd.ptr;
+	tcwind = tcb->cwind;
+	tcb->snd.ptr = tcb->snd.una;
 	tcb->cwind = tcb->mss;
+	tcb->snd.retransmit = 1;
 	tcpoutput(s);
+	tcb->snd.retransmit = 0;
+	tcb->cwind = tcwind;
+	tcb->snd.ptr = tptr;
+
+	tpriv = s->p->priv;
+	tpriv->stats[RetransSegs]++;
 }
 
-void
+/*
+ *  todo: RFC 4138 F-RTO
+ */
+static void
 tcptimeout(void *arg)
 {
 	Conv *s;
@@ -2792,11 +2966,29 @@
 			localclose(s, Etimedout);
 			break;
 		}
-		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+		netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+			tcb->srtt, tcb->mdev, NOW-tcb->time,
+			tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+			tcpstates[s->state]);
 		tcpsettimer(tcb);
+		if(tcb->snd.rto == 0)
+			tcpcongestion(tcb);
 		tcprxmit(s);
+		tcb->snd.ptr = tcb->snd.una;
+		tcb->cwind = tcb->mss;
+		tcb->snd.rto = 1;
 		tpriv->stats[RetransTimeouts]++;
-		tcb->snd.dupacks = 0;
+
+		if(tcb->snd.recovery){
+			tcb->snd.dupacks = 0;			/* reno rto */
+			tcb->snd.recovery = 0;
+			tpriv->stats[RecoveryRTO]++;
+			tcb->snd.rxt = tcb->snd.nxt;
+			netlog(s->p->f, Logtcpwin,
+				"rto recovery rxt @%lud\n", tcb->snd.nxt);
+		}
+
+		tcb->abcbytes = 0;
 		break;
 	case Time_wait:
 		localclose(s, nil);
@@ -2808,7 +3000,7 @@
 	poperror();
 }
 
-int
+static int
 inwindow(Tcpctl *tcb, int seq)
 {
 	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
 /*
  *  set up state for a received SYN (or SYN ACK) packet
  */
-void
+static void
 procsyn(Conv *s, Tcp *seg)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
 
 	tcb = (Tcpctl*)s->ptcl;
 	tcb->flags |= FORCE;
 
 	tcb->rcv.nxt = seg->seq + 1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 	tcb->irs = seg->seq;
 
 	/* our sending max segment size cannot be bigger than what he asked for */
-	if(seg->mss != 0 && seg->mss < tcb->mss)
+	if(seg->mss != 0 && seg->mss < tcb->mss) {
 		tcb->mss = seg->mss;
+		tpriv = s->p->priv;
+		tpriv->stats[Mss] = tcb->mss;
+	}
 
-	/* the congestion window always starts out as a single segment */
+	/* if the server does not support ws option, disable window scaling */
+	if(seg->ws == 0){
+		tcb->scale = 0;
+		tcb->snd.scale = 0;
+	}
+
 	tcb->snd.wnd = seg->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 }
 
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
 {
-	Reseq *rp, *rp1;
-	int i, rqlen, qmax;
+	Reseq *r, *next;
 
+	for(r = tcb->reseq; r != nil; r = next){
+		next = r->next;
+		freeblist(r->bp);
+		free(r);
+	}
+	tcb->reseq = nil;
+	tcb->nreseq = 0;
+	tcb->reseqlen = 0;
+	return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+	char *s;
+
+	for(; r != nil; r = r->next){
+		s = nil;
+		if(r->next == nil && r->seg.seq != n)
+			s = "hole/end";
+		else if(r->next == nil)
+			s = "end";
+		else if(r->seg.seq != n)
+			s = "hole";
+		if(s != nil)
+			netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+				n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+		n = r->seg.seq + r->seg.len;
+	}
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+	Reseq *rp, **rr;
+	int qmax;
+
 	rp = malloc(sizeof(Reseq));
 	if(rp == nil){
-		freeblist(bp);	/* bp always consumed by add_reseq */
+		freeblist(bp);	/* bp always consumed by addreseq */
 		return 0;
 	}
 
@@ -2854,56 +3093,39 @@
 	rp->bp = bp;
 	rp->length = length;
 
-	/* Place on reassembly list sorting by starting seq number */
-	rp1 = tcb->reseq;
-	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
-		rp->next = rp1;
-		tcb->reseq = rp;
-		if(rp->next != nil)
-			tpriv->stats[OutOfOrder]++;
-		return 0;
-	}
+	tcb->reseqlen += length;
+	tcb->nreseq++;
 
-	rqlen = 0;
-	for(i = 0;; i++) {
-		rqlen += rp1->length;
-		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
-			rp->next = rp1->next;
-			rp1->next = rp;
+	/* Place on reassembly list sorting by starting seq number */
+	for(rr = &tcb->reseq;; rr = &(*rr)->next)
+		if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+			rp->next = *rr;
+			*rr = rp;
+			tpriv->stats[Resequenced]++;
 			if(rp->next != nil)
 				tpriv->stats[OutOfOrder]++;
 			break;
 		}
-		rp1 = rp1->next;
-	}
-	qmax = QMAX<<tcb->rcv.scale;
-	if(rqlen > qmax){
-		print("resequence queue > window: %d > %d\n", rqlen, qmax);
-		i = 0;
-	  	for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
-	  		print("%#lux %#lux %#ux\n", rp1->seg.seq,
-	  			rp1->seg.ack, rp1->seg.flags);
-			if(i++ > 10){
-				print("...\n");
-				break;
-			}
-		}
 
-		// delete entire reassembly queue; wait for retransmit.
-		// - should we be smarter and only delete the tail?
-		for(rp = tcb->reseq; rp != nil; rp = rp1){
-			rp1 = rp->next;
-			freeblist(rp->bp);
-			free(rp);
-		}
-		tcb->reseq = nil;
-
-	  	return -1;
+	qmax = tcb->window;
+	if(tcb->reseqlen > qmax){
+		netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqBytelim]++;
+		return dumpreseq(tcb);
 	}
+	qmax = tcb->window / tcb->mss;		/* ~190 for qscale==2, 390 for qscale=3 */
+	if(tcb->nreseq > qmax){
+		netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqPktlim]++;
+		return dumpreseq(tcb);
+	}
+
 	return 0;
 }
 
-void
+static void
 getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	Reseq *rp;
@@ -2918,10 +3140,13 @@
 	*bp = rp->bp;
 	*length = rp->length;
 
+	tcb->nreseq--;
+	tcb->reseqlen -= rp->length;
+
 	free(rp);
 }
 
-int
+static int
 tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	ushort len;
@@ -2992,7 +3217,7 @@
 	return 0;
 }
 
-void
+static void
 tcpadvise(Proto *tcp, Block *bp, char *msg)
 {
 	Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
 		v4tov6(source, h4->tcpsrc);
 		psource = nhgets(h4->tcpsport);
 		pdest = nhgets(h4->tcpdport);
-	}
-	else {
+	} else {
 		ipmove(dest, h6->tcpdst);
 		ipmove(source, h6->tcpsrc);
 		psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
 
 	/* Look for a connection */
 	qlock(tcp);
-	for(p = tcp->conv; *p; p++) {
-		s = *p;
+	for(p = tcp->conv; (s = *p) != nil; p++) {
 		tcb = (Tcpctl*)s->ptcl;
 		if(s->rport == pdest)
 		if(s->lport == psource)
@@ -3029,6 +3252,8 @@
 		if(tcb->state != Closed)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qlock(s);
 			qunlock(tcp);
 			switch(tcb->state){
@@ -3058,9 +3283,11 @@
 }
 
 /* called with c qlocked */
-char*
+static char*
 tcpctl(Conv* c, char** f, int n)
 {
+	if(n == 1 && strcmp(f[0], "close") == 0)
+		return tcpclose(c), nil;
 	if(n == 1 && strcmp(f[0], "hangup") == 0)
 		return tcphangup(c);
 	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
 	return "unknown control request";
 }
 
-int
+static int
 tcpstats(Proto *tcp, char *buf, int len)
 {
 	Tcppriv *priv;
@@ -3083,7 +3310,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -3096,7 +3323,7 @@
  *  of questionable validity so we try to use them only when we're
  *  up against the wall.
  */
-int
+static int
 tcpgc(Proto *tcp)
 {
 	Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
 	Tcpctl *tcb;
 
 
-	n = natgc(tcp->ipproto);
+	n = 0;
 	ep = &tcp->conv[tcp->nc];
 	for(pp = tcp->conv; pp < ep; pp++) {
 		c = *pp;
@@ -3116,13 +3343,13 @@
 		switch(tcb->state){
 		case Syn_received:
 			if(NOW - tcb->time > 5000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
 		case Finwait2:
 			if(NOW - tcb->time > 5*60*1000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
@@ -3132,7 +3359,7 @@
 	return n;
 }
 
-void
+static void
 tcpsettimer(Tcpctl *tcb)
 {
 	int x;
@@ -3141,9 +3368,9 @@
 	x = backoff(tcb->backoff) *
 		(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
 
-	/* bounded twixt 1/2 and 64 seconds */
-	if(x < 500/MSPTICK)
-		x = 500/MSPTICK;
+	/* bounded twixt 0.3 and 64 seconds */
+	if(x < 300/MSPTICK)
+		x = 300/MSPTICK;
 	else if(x > (64000/MSPTICK))
 		x = 64000/MSPTICK;
 	tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
 	Fsproto(fs, tcp);
 }
 
-void
+static void
 tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
 {
-	if(rcvscale){
-		tcb->rcv.scale = rcvscale & 0xff;
-		tcb->snd.scale = sndscale & 0xff;
-		tcb->window = QMAX<<tcb->snd.scale;
-		qsetlimit(s->rq, tcb->window);
-	} else {
-		tcb->rcv.scale = 0;
-		tcb->snd.scale = 0;
-		tcb->window = QMAX;
-		qsetlimit(s->rq, tcb->window);
-	}
+	/*
+	 * guess at reasonable queue sizes.  there's no current way 
+	 * to know how many nic receive buffers we can safely tie up in the
+	 * tcp stack, and we don't adjust our queues to maximize throughput
+	 * and minimize bufferbloat.  n.b. the offer (rcvscale) needs to be
+	 * respected, but we still control our own buffer commitment by
+	 * keeping a seperate qscale.
+	 */
+	tcb->rcv.scale = rcvscale & 0xff;
+	tcb->snd.scale = sndscale & 0xff;
+	tcb->qscale = rcvscale & 0xff;
+	if(rcvscale > Maxqscale)
+		tcb->qscale = Maxqscale;
+
+	if(rcvscale != tcb->rcv.scale)
+		netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+			tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+	tcb->window = QMAX<<tcb->qscale;
+	tcb->ssthresh = tcb->window;
+
+	/*
+	 * it's important to set wq large enough to cover the full
+	 * bandwidth-delay product.  it's possible to be in loss
+	 * recovery with a big window, and we need to keep sending
+	 * into the inflated window.  the difference can be huge
+	 * for even modest (70ms) ping times.
+	 */
+	qsetlimit(s->rq, QMAX<<tcb->qscale);
+	qsetlimit(s->wq, QMAX<<tcb->qscale);
+	tcprcvwin(s);
 }
--- a/os/ip/udp.c
+++ b/os/ip/udp.c
@@ -24,7 +24,6 @@
 
 	IP_UDPPROTO	= 17,
 	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
 
 	Udprxms		= 200,
 	Udptickms	= 100,
@@ -40,7 +39,7 @@
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	udpproto;	/* Protocol */
 	uchar	udpplen[2];	/* Header plus data length */
 	uchar	udpsrc[IPv4addrlen];	/* Ip source */
@@ -73,10 +72,10 @@
 typedef struct Udpstats Udpstats;
 struct Udpstats
 {
-	ulong	udpInDatagrams;
+	uvlong	udpInDatagrams;
 	ulong	udpNoPorts;
 	ulong	udpInErrors;
-	ulong	udpOutDatagrams;
+	uvlong	udpOutDatagrams;
 };
 
 typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
 typedef struct Udpcb Udpcb;
 struct Udpcb
 {
-	QLock;
 	uchar	headers;
 };
 
@@ -125,7 +123,7 @@
 static int
 udpstate(Conv *c, char *state, int n)
 {
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		c->inuse ? "Open" : "Closed",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
 static void
 udpcreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->rq = qopen(512*1024, Qmsg, 0, 0);
 	c->wq = qbypass(udpkick, c);
 }
 
@@ -175,8 +173,6 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	ucb->headers = 0;
-
-	qunlock(c);
 }
 
 void
@@ -192,12 +188,13 @@
 	Udppriv *upriv;
 	Fs *f;
 	int version;
-	Conv *rc;
+	Routehint *rh;
+	ushort csum;
 
 	upriv = c->p->priv;
 	f = c->p->f;
 
-	netlog(c->p->f, Logudp, "udp: kick\n");
+//	netlog(c->p->f, Logudp, "udp: kick\n");	/* frequent and uninteresting */
 	if(bp == nil)
 		return;
 
@@ -219,21 +216,6 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-		bp->rp += 2+2;			/* Ignore local port */
-		break;
 	default:
 		rport = 0;
 		break;
@@ -240,18 +222,12 @@
 	}
 
 	if(ucb->headers) {
-		if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
-		    ipcmp(laddr, IPnoaddr) == 0)
+		if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
 			version = V4;
 		else
 			version = V6;
 	} else {
-		if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-			memcmp(c->laddr, v4prefix, IPv4off) == 0)
-			|| ipcmp(c->raddr, IPnoaddr) == 0)
-			version = V4;
-		else
-			version = V6;
+		version = convipvers(c);
 	}
 
 	dlen = blocklen(bp);
@@ -260,9 +236,6 @@
 	switch(version){
 	case V4:
 		bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
 		uh4 = (Udp4hdr *)(bp->rp);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
 		uh4->Unused = 0;
@@ -274,7 +247,7 @@
 			v6tov4(uh4->udpdst, raddr);
 			hnputs(uh4->udpdport, rport);
 			v6tov4(uh4->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			v6tov4(uh4->udpdst, c->raddr);
 			hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			v6tov4(uh4->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh4->udpsport, c->lport);
 		hnputs(uh4->udplen, ptcllen);
 		uh4->udpcksum[0] = 0;
 		uh4->udpcksum[1] = 0;
-		hnputs(uh4->udpcksum, 
-		       ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+		csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh4->udpcksum, csum);
 		uh4->vihl = IP_VER4;
-		ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput4(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	case V6:
+		/*
+		 * using the v6 ip header to create pseudo header
+		 * first then reset it to the normal ip header
+		 */
 		bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
-		// using the v6 ip header to create pseudo header 
-		// first then reset it to the normal ip header
 		uh6 = (Udp6hdr *)(bp->rp);
 		memset(uh6, 0, 8);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
 			ipmove(uh6->udpdst, raddr);
 			hnputs(uh6->udpdport, rport);
 			ipmove(uh6->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			ipmove(uh6->udpdst, c->raddr);
 			hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			ipmove(uh6->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh6->udpsport, c->lport);
 		hnputs(uh6->udplen, ptcllen);
 		uh6->udpcksum[0] = 0;
 		uh6->udpcksum[1] = 0;
-		hnputs(uh6->udpcksum, 
-		       ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+		csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh6->udpcksum, csum);
 		memset(uh6, 0, 8);
 		uh6->viclfl[0] = IP_VER6;
 		hnputs(uh6->len, ptcllen);
 		uh6->nextheader = IP_UDPPROTO;
-		ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput6(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	default:
@@ -360,10 +336,8 @@
 	uh4 = (Udp4hdr*)(bp->rp);
 	version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
 
-	/*
-	 * Put back pseudo header for checksum 
-	 * (remember old values for icmpnoconv())
-	 */
+	/* Put back pseudo header for checksum
+	 * (remember old values for icmpnoconv()) */
 	switch(version) {
 	case V4:
 		ottl = uh4->Unused;
@@ -423,7 +397,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.udpNoPorts++;
 		qunlock(udp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
 			icmpnoconv(f, bp);
 			break;
 		case V6:
-			icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+			icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
 			break;
 		default:
 			panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
 	if(c->state == Announced){
 		if(ucb->headers == 0){
 			/* create a new conversation */
-			if(ipforme(f, laddr) != Runi) {
-				switch(version){
-				case V4:
-					v4tov6(laddr, ifc->lifc->local);
-					break;
-				case V6:
-					ipmove(laddr, ifc->lifc->local);
-					break;
-				default:
-					panic("udpiput3: version %d", version);
-				}
-			}
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
 			c = Fsnewcall(c, raddr, rport, laddr, lport, version);
 			if(c == nil){
 				qunlock(udp);
@@ -507,33 +471,21 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	}
 
-	if(bp->next)
-		bp = concatblock(bp);
-
 	if(qfull(c->rq)){
-		qunlock(c);
-		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
-		       laddr, lport);
+		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
-		return;
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-
-	qpass(c->rq, bp);
 	qunlock(c);
 
 }
@@ -545,11 +497,13 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	if(n == 1){
-		if(strcmp(f[0], "oldheaders") == 0){
-			ucb->headers = 6;
+		if(strcmp(f[0], "hangup") == 0){
+			qhangup(c->rq, nil);
+			qhangup(c->wq, nil);
 			return nil;
-		} else if(strcmp(f[0], "headers") == 0){
-			ucb->headers = 7;
+		}
+		if(strcmp(f[0], "headers") == 0){
+			ucb->headers = 7;	/* new headers format */
 			return nil;
 		}
 	}
@@ -564,34 +518,25 @@
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	ushort psource, pdest;
 	Conv *s, **p;
-	int version;
 
 	h4 = (Udp4hdr*)(bp->rp);
-	version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+	h6 = (Udp6hdr*)(bp->rp);
 
-	switch(version) {
-	case V4:
+	if((h4->vihl&0xF0)==IP_VER4) {
 		v4tov6(dest, h4->udpdst);
 		v4tov6(source, h4->udpsrc);
 		psource = nhgets(h4->udpsport);
 		pdest = nhgets(h4->udpdport);
-		break;
-	case V6:
-		h6 = (Udp6hdr*)(bp->rp);
+	} else {
 		ipmove(dest, h6->udpdst);
 		ipmove(source, h6->udpsrc);
 		psource = nhgets(h6->udpsport);
 		pdest = nhgets(h6->udpdport);
-		break;
-	default:
-		panic("udpadvise: version %d", version);
-		return;  /* to avoid a warning */
 	}
 
 	/* Look for a connection */
 	qlock(udp);
-	for(p = udp->conv; *p; p++) {
-		s = *p;
+	for(p = udp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
 	Udppriv *upriv;
 
 	upriv = udp->priv;
-	return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+	return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+		"InErrors: %lud\nOutDatagrams: %llud\n",
 		upriv->ustats.udpInDatagrams,
 		upriv->ustats.udpNoPorts,
 		upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
 		upriv->ustats.udpOutDatagrams);
 }
 
-int
-udpgc(Proto *udp)
-{
-	return natgc(udp->ipproto);
-}
-
 void
 udpinit(Fs *fs)
 {
@@ -647,7 +587,6 @@
 	udp->rcv = udpiput;
 	udp->advise = udpadvise;
 	udp->stats = udpstats;
-	udp->gc = udpgc;
 	udp->ipproto = IP_UDPPROTO;
 	udp->nc = Nchans;
 	udp->ptclsize = sizeof(Udpcb);
diff -u a/os/ip//arp.c b/os/ip//arp.c
--- a/os/ip//arp.c
+++ b/os/ip//arp.c
@@ -47,7 +47,8 @@
 
 #define haship(s) ((s)[IPaddrlen-1]%NHASH)
 
-extern int 	ReTransTimer = RETRANS_TIMER;
+int 	ReTransTimer = RETRANS_TIMER;
+
 static void 	rxmitproc(void *v);
 
 void
@@ -57,145 +58,121 @@
 	f->arp->f = f;
 	f->arp->rxmt = nil;
 	f->arp->dropf = f->arp->dropl = nil;
-	kproc("rxmitproc", rxmitproc, f->arp, 0);
+	kproc("rxmitproc", rxmitproc, f->arp);
 }
 
-/*
- *  create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
 {
-	uint t;
-	Block *next, *xp;
-	Arpent *a, *e, *f, **l;
-	Medium *m = ifc->m;
-	int empty;
+	Block *next;
 
-	/* find oldest entry */
-	e = &arp->cache[NCACHE];
-	a = arp->cache;
-	t = a->utime;
-	for(f = a; f < e; f++){
-		if(f->utime < t){
-			t = f->utime;
-			a = f;
-		}
+	while(bp != nil){
+		next = bp->list;
+		freeblist(bp);
+		bp = next;
 	}
+}
 
-	/* dump waiting packets */
-	xp = a->hold;
-	a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+	Arpent **l;
 
-	if(isv4(a->ip)){
-		while(xp){
-			next = xp->list;
-			freeblist(xp);
-			xp = next;
+	for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+		if(*l == a){
+			*l = a->nextrxt;
+			break;
 		}
 	}
-	else {	// queue icmp unreachable for rxmitproc later on, w/o arp lock
-		if(xp){
-			if(arp->dropl == nil) 
-				arp->dropf = xp;
-			else
-				arp->dropl->list = xp;
+	a->nextrxt = nil;
+	return l;
+}
 
-			for(next = xp->list; next; next = next->list)
-				xp = next;
-			arp->dropl = xp;
-			wakeup(&arp->rxmtq);
-		}
-	}
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+	Arpent **l;
+	Block *bp;
 
 	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
+	for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+		if(*l == a){
 			*l = a->hash;
 			break;
 		}
-		l = &f->hash;
 	}
+	a->hash = nil;
 
-	/* insert into new chain */
-	l = &arp->hash[haship(ip)];
-	a->hash = *l;
-	*l = a;
+	/* dump waiting packets */
+	bp = a->hold;
+	a->hold = nil;
+	if(isv4(a->ip))
+		freeblistchain(bp);
+	else {
+		rxmtunchain(arp, a);
 
-	memmove(a->ip, ip, sizeof(a->ip));
-	a->utime = NOW;
-	a->ctime = 0;
-	a->type = m;
+		/* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+		if(bp != nil){
+			if(arp->dropf == nil)
+				arp->dropf = bp;
+			else
+				arp->dropl->list = bp;
+			arp->dropl = a->last;
 
-	a->rtime = NOW + ReTransTimer;
-	a->rxtsrem = MAX_MULTICAST_SOLICIT;
-	a->ifc = ifc;
-	a->ifcid = ifc->ifcid;
-
-	/* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
-	if(!ipismulticast(a->ip) && addrxt){
-		l = &arp->rxmt;
-		empty = (*l==nil);
-
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
+			if(bp == arp->dropf)
+				wakeup(&arp->rxmtq);
 		}
-		for(f = *l; f; f = f->nextrxt){
-			l = &f->nextrxt;
-		}
-		*l = a;
-		if(empty) 
-			wakeup(&arp->rxmtq);
 	}
+	a->last = nil;
 
-	a->nextrxt = nil;
+	a->ifc = nil;
+	a->ifcid = 0;
 
-	return a;
-}
+	a->state = 0;
+	a->rxtsrem = 0;
 
-/* called with arp qlocked */
+	a->utime = 0;
+	a->ctime = 0;
 
-void
-cleanarpent(Arp *arp, Arpent *a)
+	memset(a->ip, 0, sizeof(a->ip));
+	memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ *  create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
 {
-	Arpent *f, **l;
+	Arpent *a, *e, *f, **l;
+	ulong t;
 
-	a->utime = 0;
-	a->ctime = 0;
-	a->type = 0;
-	a->state = 0;
-	
-	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
-			*l = a->hash;
-			break;
+	/* find oldest entry */
+	e = &arp->cache[NCACHE];
+	a = arp->cache;
+	t = a->utime;
+	for(f = a; f < e; f++){
+		if(f->utime < t){
+			t = f->utime;
+			a = f;
 		}
-		l = &f->hash;
 	}
+	cleanarpent(arp, a);
 
-	/* take out of re-transmit chain */
-	l = &arp->rxmt;
-	for(f = *l; f; f = f->nextrxt){
-		if(f == a){
-			*l = a->nextrxt;
-			break;
-		}
-		l = &f->nextrxt;
-	}
-	a->nextrxt = nil;
-	a->hash = nil;
-	a->hold = nil;
-	a->last = nil;
-	a->ifc = nil;
+	ipmove(a->ip, ip);
+	a->ifc = ifc;
+	a->ifcid = ifc->ifcid;
+
+	/* insert into new chain */
+	l = &arp->hash[haship(ip)];
+	a->hash = *l;
+	*l = a;
+
+	return a;
 }
 
+
 /*
  *  fill in the media address if we have it.  Otherwise return an
  *  Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
 {
 	int hash;
 	Arpent *a;
-	Medium *type = ifc->m;
 	uchar v6ip[IPaddrlen];
 
 	if(version == V4){
@@ -217,30 +193,28 @@
 
 	qlock(arp);
 	hash = haship(ip);
-	for(a = arp->hash[hash]; a; a = a->hash){
-		if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
-		if(type == a->type)
+	for(a = arp->hash[hash]; a != nil; a = a->hash){
+		if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
 			break;
 	}
-
 	if(a == nil){
-		a = newarp6(arp, ip, ifc, (version != V4));
+		a = newarpent(arp, ip, ifc);
 		a->state = AWAIT;
 	}
 	a->utime = NOW;
 	if(a->state == AWAIT){
 		if(bp != nil){
-			if(a->hold)
-				a->last->list = bp;
-			else
+			bp->list = nil; 
+			if(a->hold == nil)
 				a->hold = bp;
+			else
+				a->last->list = bp;
 			a->last = bp;
-			bp->list = nil; 
 		}
 		return a;		/* return with arp qlocked */
 	}
 
-	memmove(mac, a->mac, a->type->maclen);
+	memmove(mac, a->mac, ifc->m->maclen);
 
 	/* remove old entries */
 	if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
 arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
 {
 	Block *bp;
-	Arpent *f, **l;
 
-	if(!isv4(a->ip)){
-		l = &arp->rxmt;
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
-		}
-	}
-
 	memmove(a->mac, mac, type->maclen);
-	a->type = type;
+	if(a->state == AWAIT && !isv4(a->ip)){
+		rxmtunchain(arp, a);
+		a->rxtsrem = 0;
+	}
 	a->state = AOK;
-	a->utime = NOW;
+	a->ctime = a->utime = NOW;
 	bp = a->hold;
-	a->hold = nil;
+	a->hold = a->last = nil;
 	qunlock(arp);
 
 	return bp;
 }
 
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
 {
-	Arp *arp;
-	Route *r;
-	Arpent *a, *f, **l;
-	Ipifc *ifc;
-	Medium *type;
-	Block *bp, *next;
 	uchar v6ip[IPaddrlen];
+	Block *bp, *next;
+	Arpent *a;
+	Route *r;
+	Arp *arp;
 
-	arp = fs->arp;
+	if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+		return -1;
 
-	if(n != 6){
-//		print("arp: len = %d\n", n);
-		return;
-	}
-
 	switch(version){
 	case V4:
-		r = v4lookup(fs, ip, nil);
+		r = v4lookup(fs, ip, ia, nil);
 		v4tov6(v6ip, ip);
 		ip = v6ip;
 		break;
 	case V6:
-		r = v6lookup(fs, ip, nil);
+		r = v6lookup(fs, ip, ia, nil);
 		break;
 	default:
 		panic("arpenter: version %d", version);
-		return;	/* to supress warnings */
+		return -1;	/* to supress warnings */
 	}
 
-	if(r == nil){
-//		print("arp: no route for entry\n");
-		return;
-	}
+	if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+		return -1;
 
-	ifc = r->ifc;
-	type = ifc->m;
-
+	arp = fs->arp;
 	qlock(arp);
-	for(a = arp->hash[haship(ip)]; a; a = a->hash){
-		if(a->type != type || (a->state != AWAIT && a->state != AOK))
+	for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+		if(a->ifc != ifc || a->ifcid != ifc->ifcid)
 			continue;
-
 		if(ipcmp(a->ip, ip) == 0){
-			a->state = AOK;
-			memmove(a->mac, mac, type->maclen);
-
-			if(version == V6){
-				/* take out of re-transmit chain */
-				l = &arp->rxmt;
-				for(f = *l; f; f = f->nextrxt){
-					if(f == a){
-						*l = a->nextrxt;
-						break;
-					}
-					l = &f->nextrxt;
-				}
-			}
-
-			a->ifc = ifc;
-			a->ifcid = ifc->ifcid;
-			bp = a->hold;
-			a->hold = nil;
 			if(version == V4)
 				ip += IPv4off;
-			a->utime = NOW;
-			a->ctime = a->utime;
-			qunlock(arp);
-
-			while(bp){
+			bp = arpresolve(arp, a, ifc->m, mac);	/* unlocks arp */
+			for(; bp != nil; bp = next){
 				next = bp->list;
-				if(ifc != nil){
-					if(waserror()){
-						runlock(ifc);
-						nexterror();
-					}
-					rlock(ifc);
-					if(ifc->m != nil)
-						ifc->m->bwrite(ifc, bp, version, ip);
-					else
-						freeb(bp);
-					runlock(ifc);
-					poperror();
-				} else
-					freeb(bp);
-				bp = next;
+				bp->list = nil;
+				if(waserror()){
+					freeblistchain(next);
+					break;
+				}
+				ipifcoput(ifc, bp, version, ip);
+				poperror();
 			}
-			return;
+			return 1;
 		}
 	}
 
 	if(refresh == 0){
-		a = newarp6(arp, ip, ifc, 0);
+		a = newarpent(arp, ip, ifc);
 		a->state = AOK;
-		a->type = type;
-		a->ctime = NOW;
-		memmove(a->mac, mac, type->maclen);
+		a->ctime = a->utime = NOW;
+		memmove(a->mac, mac, n);
 	}
-
 	qunlock(arp);
+
+	return refresh == 0;
 }
 
 int
@@ -401,13 +325,12 @@
 arpwrite(Fs *fs, char *s, int len)
 {
 	int n;
-	Route *r;
 	Arp *arp;
-	Block *bp;
-	Arpent *a, *fl, **l;
+	Arpent *a, *x;
 	Medium *m;
-	char *f[4], buf[256];
-	uchar ip[IPaddrlen], mac[MAClen];
+	Ipifc *ifc;
+	char *f[5], buf[256];
+	uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
 
 	arp = fs->arp;
 
@@ -420,7 +343,7 @@
 	if(len > 0 && buf[len-1] == '\n')
 		buf[len-1] = 0;
 
-	n = getfields(buf, f, 4, 1, " ");
+	n = getfields(buf, f, nelem(f), 1, " ");
 	if(strcmp(f[0], "flush") == 0){
 		qlock(arp);
 		for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
 			memset(a->ip, 0, sizeof(a->ip));
 			memset(a->mac, 0, sizeof(a->mac));
 			a->hash = nil;
+			a->nextrxt = nil;
+			a->ifc = nil;
+			a->ifcid = 0;
 			a->state = 0;
+			a->rxtsrem = 0;
+			a->ctime = 0;
 			a->utime = 0;
-			while(a->hold != nil){
-				bp = a->hold->list;
-				freeblist(a->hold);
-				a->hold = bp;
-			}
+			freeblistchain(a->hold);
+			a->hold = a->last = nil;
 		}
 		memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+		freeblistchain(arp->dropf);
+		arp->dropf = arp->dropl = nil;
 		arp->rxmt = nil;
-		arp->dropf = nil;
-		arp->dropl = nil;
 		qunlock(arp);
 	} else if(strcmp(f[0], "add") == 0){
 		switch(n){
@@ -446,64 +370,53 @@
 		default:
 			error(Ebadarg);
 		case 3:
-			parseip(ip, f[1]);
-			if(isv4(ip))
-				r = v4lookup(fs, ip+IPv4off, nil);
-			else
-				r = v6lookup(fs, ip, nil);
-			if(r == nil)
-				error("Destination unreachable");
-			m = r->ifc->m;
-			n = parsemac(mac, f[2], m->maclen);
+			if(parseip(ip, f[1]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
 		case 4:
 			m = ipfindmedium(f[1]);
-			if(m == nil)
+			if(m == nil || m->maclen == 0)
 				error(Ebadarp);
-			parseip(ip, f[2]);
-			n = parsemac(mac, f[3], m->maclen);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
+		case 5:
+			m = ipfindmedium(f[1]);
+			if(m == nil || m->maclen == 0)
+				error(Ebadarp);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			if(parseip(ia, f[4]) == -1)
+				error(Ebadip);
+			break;
 		}
-
-		if(m->ares == nil)
-			error(Ebadarp);
-
-		m->ares(fs, V6, ip, mac, n, 0);
+		if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+			error("no interface");
+		rlock(ifc);
+		if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+			runlock(ifc);
+			error("destination unreachable");
+		}
+		runlock(ifc);
 	} else if(strcmp(f[0], "del") == 0){
-		if(n != 2)
+		if (n != 2)
 			error(Ebadarg);
-
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			error(Ebadip);
 		qlock(arp);
-
-		l = &arp->hash[haship(ip)];
-		for(a = *l; a; a = a->hash){
-			if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
-				*l = a->hash;
-				break;
-			}
-			l = &a->hash;
+		for(a = arp->hash[haship(ip)]; a != nil; a = x){
+			x = a->hash;
+			if(ipcmp(ip, a->ip) == 0)
+				cleanarpent(arp, a);
 		}
-	
-		if(a){
-			/* take out of re-transmit chain */
-			l = &arp->rxmt;
-			for(fl = *l; fl; fl = fl->nextrxt){
-				if(fl == a){
-					*l = a->nextrxt;
-					break;
-				}
-				l = &fl->nextrxt;
-			}
-
-			a->nextrxt = nil;
-			a->hash = nil;
-			a->hold = nil;
-			a->last = nil;
-			a->ifc = nil;
-			memset(a->ip, 0, sizeof(a->ip));
-			memset(a->mac, 0, sizeof(a->mac));
-		}
 		qunlock(arp);
 	} else
 		error(Ebadarp);
@@ -511,13 +424,6 @@
 	return len;
 }
 
-enum
-{
-	Alinelen=	90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
 static void
 convmac(char *p, uchar *mac, int n)
 {
@@ -526,136 +432,136 @@
 }
 
 int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
 {
+	char mac[2*MAClen+1], *state, *mname, *p;
+	uchar ip[IPaddrlen], ia[IPaddrlen];
+	Ipifc *ifc;
 	Arpent *a;
-	int n;
-	char mac[2*MAClen+1];
+	long n, o;
 
-	if(offset % Alinelen)
-		return 0;
-
-	offset = offset/Alinelen;
-	len = len/Alinelen;
-
-	n = 0;
+	p = s;
+	o = -offset;
 	for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
-		if(a->state == 0)
+		if(a->state == 0 || (ifc = a->ifc) == nil)
 			continue;
-		if(offset > 0){
-			offset--;
+
+		rlock(ifc);
+		qlock(arp);
+		state = arpstate[a->state];
+		ipmove(ip, a->ip);
+		if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+			qunlock(arp);
+			runlock(ifc);
 			continue;
 		}
-		len--;
-		qlock(arp);
-		convmac(mac, a->mac, a->type->maclen);
-		n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+		mname = ifc->m->name;
+		convmac(mac, a->mac, ifc->m->maclen);
 		qunlock(arp);
+		runlock(ifc);
+
+		n = snprint(up->genbuf, sizeof up->genbuf,
+			"%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+			mname, state, ip, mac, ia);
+		o += n;
+		if(o <= 0)
+			continue;
+		if(n > len)
+			break;
+		memmove(p, up->genbuf, n);
+		len -= n;
+		p += n;
 	}
 
-	return n;
+	return p - s;
 }
 
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
 {
-	uint sflag;
-	Block *next, *xp;
-	Arpent *a, *b, **l;
-	Fs *f;
-	uchar ipsrc[IPaddrlen];
-	Ipifc *ifc = nil;
-	long nrxt;
+	uchar targ[IPaddrlen], src[IPaddrlen];
+	Arpent **l;
 
-	qlock(arp);
-	f = arp->f;
+	a->ctime = NOW;
+	if(a->rxtsrem == 0)
+		a->rxtsrem = MAX_MULTICAST_SOLICIT;
+	else
+		a->rxtsrem--;
 
-	a = arp->rxmt;
-	if(a==nil){
-		nrxt = 0;
-		goto dodrops; 		//return nrxt;
-	}
-	nrxt = a->rtime - NOW;
-	if(nrxt > 3*ReTransTimer/4) 
-		goto dodrops; 		//return nrxt;
+	/* put on end of re-transmit chain */
+	for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+		;
+	*l = a;
 
-	for(; a; a = a->nextrxt){
-		ifc = a->ifc;
-		assert(ifc != nil);
-		if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
-			xp = a->hold;
-			a->hold = nil;
+	if(l == &f->arp->rxmt)
+		wakeup(&f->arp->rxmtq);
 
-			if(xp){
-				if(arp->dropl == nil) 
-					arp->dropf = xp;
-				else
-					arp->dropl->list = xp;
-			}
+	/* try to use source address of original packet */
+	ipmove(targ, a->ip);
+	if(a->last != nil){
+		ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+		arprelease(f->arp, a);
 
-			cleanarpent(arp, a);
-		}
-		else
-			break;
+		if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+			goto send;
+	} else {
+		arprelease(f->arp, a);
 	}
-	if(a == nil)
-		goto dodrops;
+	if(!ipv6local(ifc, src, 0, targ))
+		return;
+send:
+	if(!waserror()){
+		icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+		poperror();
+	}
+}
 
+static void
+rxmitsols(Arp *arp)
+{
+	Block *next, *bp;
+	Arpent *a;
+	Ipifc *ifc;
+	Route *r;
 
-	qunlock(arp);	/* for icmpns */
-	if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
-		icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
-
-	runlock(ifc);
-	qlock(arp);	
-
-	/* put to the end of re-transmit chain */
-	l = &arp->rxmt;
-	for(b = *l; b; b = b->nextrxt){
-		if(b == a){
-			*l = a->nextrxt;
-			break;
+	qlock(arp);
+	while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+		if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+			if(a->ifcid == ifc->ifcid){
+				ndpsendsol(arp->f, ifc, a);	/* unlocks arp */
+				runlock(ifc);
+				qlock(arp);
+				continue;
+			}
+			runlock(ifc);
 		}
-		l = &b->nextrxt;
+		cleanarpent(arp, a);
 	}
-	for(b = *l; b; b = b->nextrxt){
-		l = &b->nextrxt;
-	}
-	*l = a;
-	a->rxtsrem--;
-	a->nextrxt = nil;
-	a->rtime = NOW + ReTransTimer;
-
-	a = arp->rxmt;
-	if(a==nil)
-		nrxt = 0;
-	else 
-		nrxt = a->rtime - NOW;
-
-dodrops:
-	xp = arp->dropf;
-	arp->dropf = nil;
-	arp->dropl = nil;
+	bp = arp->dropf;
+	arp->dropf = arp->dropl = nil;
 	qunlock(arp);
 
-	for(; xp; xp = next){
-		next = xp->list;
-		icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+	for(; bp != nil; bp = next){
+		next = bp->list;
+		bp->list = nil;
+		r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+		if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+			if(!waserror()){
+				icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+				poperror();
+			}
+			runlock(ifc);
+		}
+		freeblist(bp);
 	}
-
-	return nrxt;
-
 }
 
 static int
 rxready(void *v)
 {
-	Arp *arp = (Arp *) v;
-	int x;
+	Arp *arp = (Arp *)v;
 
-	x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
-	return x;
+	return arp->rxmt != nil || arp->dropf != nil;
 }
 
 static void
@@ -662,20 +568,15 @@
 rxmitproc(void *v)
 {
 	Arp *arp = v;
-	long wakeupat;
 
 	arp->rxmitp = up;
-	//print("arp rxmitproc started\n");
 	if(waserror()){
-		arp->rxmitp = 0;
+		arp->rxmitp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;){
-		wakeupat = rxmitsols(arp);
-		if(wakeupat == 0) 
-			sleep(&arp->rxmtq, rxready, v); 
-		else if(wakeupat > ReTransTimer/4) 
-			tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+		sleep(&arp->rxmtq, rxready, v);
+		rxmitsols(arp);
+		tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
 	}
 }
-
diff -u a/os/ip//devip.c b/os/ip//devip.c
--- a/os/ip//devip.c
+++ b/os/ip//devip.c
@@ -14,7 +14,6 @@
 	Qbootp,
 	Qndb,
 	Qiproute,
-	Qiprouter,
 	Qipselftab,
 	Qlog,
 
@@ -43,11 +42,11 @@
 	Maskproto=	(1<<Logproto)-1,
 	Shiftproto=	Logtype + Logconv,
 
-	Nfs=		32,
+	Nfs=		128,
 };
-#define TYPE(x) 	( ((u32)(x).path) & Masktype )
-#define CONV(x) 	( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) 	( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) 	( ((ulong)(x).path) & Masktype )
+#define CONV(x) 	( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((ulong)(x).path) >> Shiftproto) & Maskproto )
 #define QID(p, c, y) 	( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
 
 static char network[] = "network";
@@ -58,8 +57,7 @@
 
 extern	void nullmediumlink(void);
 extern	void pktmediumlink(void);
-static	long ndbwrite(Fs*, char*, ulong, int);
-extern void    closeconv(Conv*);
+	long ndbwrite(Fs *f, char *a, ulong off, int n);
 
 static int
 ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
 		mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
 		devdir(c, q, "stats", 0, network, 0444, dp);
 		return 1;
-	}	
+	}
 	return -1;
 }
 
@@ -144,11 +142,10 @@
 		return -1;
 	case Qarp:
 		p = "arp";
+		prot = 0664;
 		break;
 	case Qbootp:
 		p = "bootp";
-		if(bootp == nil)
-			return 0;
 		break;
 	case Qndb:
 		p = "ndb";
@@ -157,14 +154,12 @@
 		break;
 	case Qiproute:
 		p = "iproute";
+		prot = 0664;
 		break;
 	case Qipselftab:
 		p = "ipselftab";
 		prot = 0444;
 		break;
-	case Qiprouter:
-		p = "iprouter";
-		break;
 	case Qlog:
 		p = "log";
 		break;
@@ -188,7 +183,7 @@
 	case Qtopdir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
@@ -206,19 +201,18 @@
 	case Qndb:
 	case Qlog:
 	case Qiproute:
-	case Qiprouter:
 	case Qipselftab:
 		return ip1gen(c, TYPE(c->qid), dp);
 	case Qprotodir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
 		if(s < f->p[PROTO(c->qid)]->ac) {
 			cv = f->p[PROTO(c->qid)]->conv[s];
-			sprint(up->genbuf, "%d", s);
+			snprint(up->genbuf, sizeof up->genbuf, "%d", s);
 			mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
 			devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
 			return 1;
@@ -262,45 +256,14 @@
 	fmtinstall('M', eipfmt);
 }
 
-static Fs*
-ipgetfs(int dev)
-{
-	extern void (*ipprotoinit[])(Fs*);
-	Fs *f;
-	int i;
-
-	if(dev >= Nfs)
-		return nil;
-
-	qlock(&fslock);
-	if(ipfs[dev] == nil){
-		f = smalloc(sizeof(Fs));
-		ip_init(f);
-		arpinit(f);
-		netloginit(f);
-		for(i = 0; ipprotoinit[i]; i++)
-			ipprotoinit[i](f);
-		f->dev = dev;
-		ipfs[dev] = f;
-	}
-	qunlock(&fslock);
-
-	return ipfs[dev];
-}
-
 IPaux*
 newipaux(char *owner, char *tag)
 {
 	IPaux *a;
-	int n;
 
 	a = smalloc(sizeof(*a));
 	kstrdup(&a->owner, owner);
-	memset(a->tag, ' ', sizeof(a->tag));
-	n = strlen(tag);
-	if(n > sizeof(a->tag))
-		n = sizeof(a->tag);
-	memmove(a->tag, tag, n);
+	strncpy(a->tag, tag, sizeof(a->tag));
 	return a;
 }
 
@@ -310,13 +273,29 @@
 ipattach(char* spec)
 {
 	Chan *c;
-	int dev;
+	ulong dev;
 
-	dev = atoi(spec);
+	dev = strtoul(spec, nil, 10);
 	if(dev >= Nfs)
-		error("bad specification");
+		error(Enodev);
 
-	ipgetfs(dev);
+	qlock(&fslock);
+	if(ipfs[dev] == nil){
+		extern void (*ipprotoinit[])(Fs*);
+		Fs *f;
+		int i;
+
+		f = smalloc(sizeof(Fs));
+		ip_init(f);
+		arpinit(f);
+		netloginit(f);
+		for(i = 0; ipprotoinit[i]; i++)
+			ipprotoinit[i](f);
+		f->dev = dev;
+		ipfs[dev] = f;
+	}
+	qunlock(&fslock);
+
 	c = devattach('I', spec);
 	mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
 	c->dev = dev;
@@ -327,7 +306,7 @@
 }
 
 static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
 {
 	IPaux *a = c->aux;
 	Walkqid* w;
@@ -338,8 +317,9 @@
 	return w;
 }
 
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
 {
 	return devstat(c, db, n, nil, 0, ipgen);
 }
@@ -360,7 +340,7 @@
 };
 
 static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
 {
 	Conv *cv, *nc;
 	Proto *p;
@@ -375,7 +355,7 @@
 	default:
 		break;
 	case Qndb:
-		if(omode & (OWRITE|OTRUNC) && !iseve())
+		if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
 			error(Eperm);
 		if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
 			f->ndb[0] = 0;
@@ -383,10 +363,10 @@
 	case Qlog:
 		netlogopen(f);
 		break;
-	case Qiprouter:
-		iprouteropen(f);
-		break;
 	case Qiproute:
+	case Qarp:
+		if(omode != OREAD && !iseve())
+			error(Eperm);
 		break;
 	case Qtopdir:
 	case Qprotodir:
@@ -412,13 +392,8 @@
 	case Qclone:
 		p = f->p[PROTO(c->qid)];
 		qlock(p);
-		if(waserror()){
-			qunlock(p);
-			nexterror();
-		}
 		cv = Fsprotoclone(p, ATTACHER(c));
 		qunlock(p);
-		poperror();
 		if(cv == nil) {
 			error(Enodev);
 			break;
@@ -437,15 +412,12 @@
 			qunlock(p);
 			nexterror();
 		}
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
-		}
-		cv->inuse++;
-		if(cv->inuse == 1){
+		if(++cv->inuse == 1){
 			kstrdup(&cv->owner, ATTACHER(c));
 			cv->perm = 0660;
 		}
@@ -455,24 +427,26 @@
 		break;
 	case Qlisten:
 		cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
-
+		qlock(cv);
+		if(waserror()){
+			qunlock(cv);
+			nexterror();
 		}
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
 		if(cv->state != Announced)
 			error("not announced");
 
+		cv->inuse++;
+		qunlock(cv);
+		poperror();
 		if(waserror()){
 			closeconv(cv);
 			nexterror();
 		}
-		qlock(cv);
-		cv->inuse++;
-		qunlock(cv);
 
 		nc = nil;
 		while(nc == nil) {
@@ -494,7 +468,6 @@
 			if(nc != nil){
 				cv->incall = nc->next;
 				mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
-				kstrdup(&cv->owner, ATTACHER(c));
 			}
 			qunlock(cv);
 
@@ -511,13 +484,25 @@
 	return c;
 }
 
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
 {
-	Dir *d;
+	error(Eperm);
+	return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+	error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+	Dir *dir;
 	Conv *cv;
 	Fs *f;
-	Proto *p;
 
 	f = ipfs[c->dev];
 	switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
 		break;
 	}
 
-	d = smalloc(sizeof(*d)+n);
+	dir = smalloc(sizeof(Dir)+n);
 	if(waserror()){
-		free(d);
+		free(dir);
 		nexterror();
 	}
-	n = convM2D(dp, n, d, (char*)&d[1]);
+	n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
 	if(n == 0)
 		error(Eshortstat);
-	p = f->p[PROTO(c->qid)];
-	cv = p->conv[CONV(c->qid)];
-	if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+	cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+	qlock(cv);
+	if(waserror()){
+		qunlock(cv);
+		nexterror();
+	}
+	if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
 		error(Eperm);
-	if(!emptystr(d->uid))
-		kstrdup(&cv->owner, d->uid);
-	if(d->mode != ~0UL)
-		cv->perm = d->mode & 0777;
+	if(!emptystr(dir->uid)){
+		if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+			error(Eperm);
+		kstrdup(&cv->owner, dir->uid);
+	}
+	if(dir->mode != ~0UL)
+		cv->perm = dir->mode & 0666;
+	qunlock(cv);
 	poperror();
-	free(d);
+
+	free(dir);
+	poperror();
+
 	return n;
 }
 
-extern void
+void
 closeconv(Conv *cv)
 {
 	Conv *nc;
@@ -564,7 +561,7 @@
 	}
 
 	/* close all incoming calls since no listen will ever happen */
-	for(nc = cv->incall; nc; nc = cv->incall){
+	for(nc = cv->incall; nc != nil; nc = cv->incall){
 		cv->incall = nc->next;
 		closeconv(nc);
 	}
@@ -576,9 +573,9 @@
 	while((mp = cv->multi) != nil)
 		ipifcremmulti(cv, mp->ma, mp->ia);
 
-	cv->r = nil;
-	cv->rgen = 0;
-	cv->p->close(cv);
+	if(cv->p->close != nil)
+		(*cv->p->close)(cv);
+
 	cv->state = Idle;
 	qunlock(cv);
 }
@@ -596,10 +593,6 @@
 		if(c->flag & COPEN)
 			netlogclose(f);
 		break;
-	case Qiprouter:
-		if(c->flag & COPEN)
-			iprouterclose(f);
-		break;
 	case Qdata:
 	case Qctl:
 	case Qerr:
@@ -620,13 +613,13 @@
 	Statelen=	32*1024,
 };
 
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
 	char *buf, *p;
-	s32 rv;
+	long rv;
 	Fs *f;
 	ulong offset = off;
 
@@ -648,21 +641,22 @@
 		return readstr(offset, a, n, f->ndb);
 	case Qiproute:
 		return routeread(f, a, offset, n);
-	case Qiprouter:
-		return iprouterread(f, a, n);
 	case Qipselftab:
 		return ipselftabread(f, a, offset, n);
 	case Qlog:
 		return netlogread(f, a, offset, n);
 	case Qctl:
-		sprint(up->genbuf, "%ud", CONV(ch->qid));
-		return readstr(offset, p, n, up->genbuf);
+		buf = smalloc(16);
+		snprint(buf, 16, "%lud", CONV(ch->qid));
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
 	case Qremote:
 		buf = smalloc(Statelen);
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->remote == nil) {
-			sprint(buf, "%I!%d\n", c->raddr, c->rport);
+			snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
 		} else {
 			(*x->remote)(c, buf, Statelen-2);
 		}
@@ -674,7 +668,7 @@
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->local == nil) {
-			sprint(buf, "%I!%d\n", c->laddr, c->lport);
+			snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
 		} else {
 			(*x->local)(c, buf, Statelen-2);
 		}
@@ -711,7 +705,7 @@
 }
 
 static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
 {
 	Conv *c;
 	Proto *x;
@@ -740,7 +734,7 @@
 /*
  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
  */
-static char*
+char*
 setluniqueport(Conv* c, int lport)
 {
 	Proto *p;
@@ -771,51 +765,63 @@
 }
 
 /*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+	int x;
+
+	for(x = 0; x < p->nc && p->conv[x]; x++)
+		if(p->conv[x]->lport == lport)
+			return 1;
+	return 0;
+}
+
+/*
  *  pick a local port and set it
  */
-extern void
+char *
 setlport(Conv* c)
 {
 	Proto *p;
-	ushort *pp;
-	int x, found;
+	int i, port;
 
 	p = c->p;
-	if(c->restricted)
-		pp = &p->nextrport;
-	else
-		pp = &p->nextport;
 	qlock(p);
-	for(;;(*pp)++){
+	if(c->restricted){
+		/* Restricted ports cycle between 600 and 1024. */
+		for(i=0; i<1024-600; i++){
+			if(p->nextrport >= 1024 || p->nextrport < 600)
+				p->nextrport = 600;
+			port = p->nextrport++;
+			if(!lportinuse(p, port))
+				goto chosen;
+		}
+	}else{
 		/*
-		 * Fsproto initialises p->nextport to 0 and the restricted
-		 * ports (p->nextrport) to 600.
-		 * Restricted ports must lie between 600 and 1024.
-		 * For the initial condition or if the unrestricted port number
-		 * has wrapped round, select a random port between 5000 and 1<<15
-		 * to start at.
+		 * Unrestricted ports are chosen randomly
+		 * between 2^15 and 2^16.  There are at most
+		 * 4*Nchan = 4096 ports in use at any given time,
+		 * so even in the worst case, a random probe has a
+		 * 1 - 4096/2^15 = 87% chance of success.
+		 * If 64 successive probes fail, there is a bug somewhere
+		 * (or a once in 10^58 event has happened, but that's
+		 * less likely than a venti collision).
 		 */
-		if(c->restricted){
-			if(*pp >= 1024)
-				*pp = 600;
+		for(i=0; i<64; i++){
+			port = (1<<15) + nrand(1<<15);
+			if(!lportinuse(p, port))
+				goto chosen;
 		}
-		else while(*pp < 5000)
-			*pp = nrand(1<<15);
-
-		found = 0;
-		for(x = 0; x < p->nc; x++){
-			if(p->conv[x] == nil)
-				break;
-			if(p->conv[x]->lport == *pp){
-				found = 1;
-				break;
-			}
-		}
-		if(found == 0)
-			break;
 	}
-	c->lport = (*pp)++;
 	qunlock(p);
+	return "no ports available";
+
+chosen:
+	c->lport = port;
+	qunlock(p);
+	return nil;
 }
 
 /*
@@ -822,7 +828,7 @@
  *  set a local address and port from a string of the form
  *	[address!]port[!r]
  */
-static char*
+char*
 setladdrport(Conv* c, char* str, int announcing)
 {
 	char *p;
@@ -830,8 +836,6 @@
 	ushort lport;
 	uchar addr[IPaddrlen];
 
-	rv = nil;
-
 	/*
 	 *  ignore restricted part if it exists.  it's
 	 *  meaningless on local ports.
@@ -854,8 +858,9 @@
 		if(strcmp(str, "*") == 0)
 			ipmove(c->laddr, IPnoaddr);
 		else {
-			parseip(addr, str);
-			if(ipforme(c->p->f, addr))
+			if(parseip(addr, str) == -1)
+				return Ebadip;
+			if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
 				ipmove(c->laddr, addr);
 			else
 				return "not a local IP address";
@@ -869,9 +874,13 @@
 		return setluniqueport(c, 0);
 	}
 
-	lport = atoi(p);
+	str = p;
+	lport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
 	if(lport <= 0)
-		setlport(c);
+		rv = setlport(c);
 	else
 		rv = setluniqueport(c, lport);
 	return rv;
@@ -886,13 +895,17 @@
 	if(p == nil)
 		return "malformed address";
 	*p++ = 0;
-	parseip(c->raddr, str);
-	c->rport = atoi(p);
-	p = strchr(p, '!');
-	if(p){
-		if(strstr(p, "!r") != nil)
-			c->restricted = 1;
-	}
+	if(parseip(c->raddr, str) == -1)
+		return Ebadip;
+
+	str = p;
+	c->rport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
+	if(strstr(p, "!r") != nil)
+		c->restricted = 1;
+
 	return nil;
 }
 
@@ -912,7 +925,9 @@
 		if(p != nil)
 			return p;
 		setladdr(c);
-		setlport(c);
+		p = setlport(c);
+		if (p != nil)
+			return p;
 		break;
 	case 3:
 		p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
 			return p;
 	}
 
-	if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-		memcmp(c->laddr, v4prefix, IPv4off) == 0)
-		|| ipcmp(c->raddr, IPnoaddr) == 0)
-		c->ipversion = V4;
-	else
-		c->ipversion = V6;
+	c->ipversion = convipvers(c);
 
 	return nil;
 }
@@ -978,10 +988,11 @@
 	c->rport = 0;
 	switch(argc){
 	default:
-		return "bad args to announce";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 1);
 	}
+	return "bad args to announce";
 }
 
 /*
@@ -1028,10 +1039,11 @@
 {
 	switch(argc){
 	default:
-		return "bad args to bind";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 0);
 	}
+	return "bad args to bind";
 }
 
 static void
@@ -1042,7 +1054,7 @@
 	if(x->bind == nil)
 		p = Fsstdbind(c, cb->f, cb->nf);
 	else
-		p = x->bind(c, cb->f, cb->nf);
+		p = (*x->bind)(c, cb->f, cb->nf);
 	if(p != nil)
 		error(p);
 }
@@ -1065,8 +1077,8 @@
 		c->ttl = atoi(cb->f[1]);
 }
 
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
@@ -1075,6 +1087,7 @@
 	uchar ia[IPaddrlen], ma[IPaddrlen];
 	Fs *f;
 	char *a;
+	ulong offset = off;
 
 	a = v;
 	f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
 		netlogctl(f, a, n);
 		return n;
 	case Qndb:
-		return ndbwrite(f, a, off, n);
+		return ndbwrite(f, a, offset, n);
+		break;
 	case Qctl:
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
 			if(cb->nf == 2){
 				if(!ipismulticast(c->raddr))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
+				if (parseip(ia, cb->f[1]) == -1)
+					error(Ebadip);
 				ipifcaddmulti(c, c->raddr, ia);
 			} else {
-				parseip(ma, cb->f[2]);
+				if (parseip(ia, cb->f[1]) == -1 ||
+				    parseip(ma, cb->f[2]) == -1)
+					error(Ebadip);
 				if(!ipismulticast(ma))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
 				ipifcaddmulti(c, ma, ia);
 			}
 		} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
 				error("remmulti needs interface address");
 			if(!ipismulticast(c->raddr))
 				error("remmulti for a non multicast address");
-			parseip(ia, cb->f[1]);
+			if (parseip(ia, cb->f[1]) == -1)
+				error(Ebadip);
 			ipifcremmulti(c, c->raddr, ia);
 		} else if(x->ctl != nil) {
-			p = x->ctl(c, cb->f, cb->nf);
+			p = (*x->ctl)(c, cb->f, cb->nf);
 			if(p != nil)
 				error(p);
 		} else
@@ -1160,13 +1177,12 @@
 	return n;
 }
 
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
 {
 	Conv *c;
 	Proto *x;
 	Fs *f;
-	int n;
 
 	switch(TYPE(ch->qid)){
 	case Qdata:
@@ -1177,11 +1193,7 @@
 		if(c->wq == nil)
 			error(Eperm);
 
-		if(bp->next)
-			bp = concatblock(bp);
-		n = BLEN(bp);
-		qbwrite(c->wq, bp);
-		return n;
+		return qbwrite(c->wq, bp);
 	default:
 		return devbwrite(ch, bp, offset);
 	}
@@ -1198,13 +1210,13 @@
 	ipwalk,
 	ipstat,
 	ipopen,
-	devcreate,
+	ipcreate,
 	ipclose,
 	ipread,
 	ipbread,
 	ipwrite,
 	ipbwrite,
-	devremove,
+	ipremove,
 	ipwstat,
 };
 
@@ -1224,12 +1236,15 @@
 
 	p->qid.type = QTDIR;
 	p->qid.path = QID(f->np, 0, Qprotodir);
+	if(p->nc > Maskconv+1){
+		print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+		p->nc = Maskconv+1;
+	}
 	p->conv = malloc(sizeof(Conv*)*(p->nc+1));
 	if(p->conv == nil)
 		panic("Fsproto");
 
 	p->x = f->np;
-	p->nextport = 0;
 	p->nextrport = 600;
 	f->p[f->np++] = p;
 
@@ -1262,21 +1277,33 @@
 		if(c == nil){
 			c = malloc(sizeof(Conv));
 			if(c == nil)
-				error(Enomem);
-			qlock(c);
+				return nil;
+			if(waserror()){
+				qfree(c->rq);
+				qfree(c->wq);
+				qfree(c->eq);
+				qfree(c->sq);
+				free(c->ptcl);
+				free(c);
+				return nil;
+			}
 			c->p = p;
 			c->x = pp - p->conv;
 			if(p->ptclsize != 0){
 				c->ptcl = malloc(p->ptclsize);
-				if(c->ptcl == nil) {
-					free(c);
+				if(c->ptcl == nil)
 					error(Enomem);
-				}
 			}
-			*pp = c;
-			p->ac++;
 			c->eq = qopen(1024, Qmsg, 0, 0);
+			if(c->eq == nil)
+				error(Enomem);
 			(*p->create)(c);
+			if(c->rq == nil || c->wq == nil)
+				error(Enomem);
+			poperror();
+			qlock(c);
+			*pp = c;
+			p->ac++;
 			break;
 		}
 		if(canqlock(c)){
@@ -1291,8 +1318,11 @@
 		}
 	}
 	if(pp >= ep) {
-		if(p->gc != nil && (*p->gc)(p))
-			goto retry;
+		if(p->gc != nil){
+			print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+			if((*p->gc)(p))
+				goto retry;
+		}
 		return nil;
 	}
 
@@ -1307,8 +1337,9 @@
 	c->lport = 0;
 	c->rport = 0;
 	c->restricted = 0;
+	c->ignoreadvice = 0;
 	c->ttl = MAXTTL;
-	c->tos = DFLTTOS;
+	c->tos = 0;
 	qreopen(c->rq);
 	qreopen(c->wq);
 	qreopen(c->eq);
@@ -1321,7 +1352,7 @@
 Fsconnected(Conv* c, char* msg)
 {
 	if(msg != nil && *msg != '\0')
-		kstrcpy(c->cerr, msg, sizeof(c->cerr));
+		strncpy(c->cerr, msg, ERRMAX-1);
 
 	switch(c->state){
 
@@ -1368,12 +1399,19 @@
 	for(l = &c->incall; *l; l = &(*l)->next)
 		i++;
 	if(i >= Maxincall) {
+		static int beenhere;
+
 		qunlock(c);
+		if (!beenhere) {
+			beenhere = 1;
+			print("Fsnewcall: incall queue full (%d) on port %d\n",
+				i, c->lport);
+		}
 		return nil;
 	}
 
 	/* find a free conversation */
-	nc = Fsprotoclone(c->p, network);
+	nc = Fsprotoclone(c->p, c->owner);
 	if(nc == nil) {
 		qunlock(c);
 		return nil;
@@ -1394,12 +1432,12 @@
 	return nc;
 }
 
-static long
+long
 ndbwrite(Fs *f, char *a, ulong off, int n)
 {
 	if(off > strlen(f->ndb))
 		error(Eio);
-	if(off+n >= sizeof(f->ndb)-1)
+	if(off+n >= sizeof(f->ndb))
 		error(Eio);
 	memmove(f->ndb+off, a, n);
 	f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
 ulong
 scalednconv(void)
 {
-	if(conf.npage*BY2PG >= 128*MB)
+	if(cpuserver && conf.npage*BY2PG >= 128*MB)
 		return Nchans*4;
 	return Nchans;
 }
diff -u a/os/ip//esp.c b/os/ip//esp.c
--- a/os/ip//esp.c
+++ b/os/ip//esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ *	currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ *	transport mode (host-to-host)
+ */
 #include	"u.h"
 #include	"../port/lib.h"
 #include	"mem.h"
@@ -6,47 +14,79 @@
 #include	"../port/error.h"
 
 #include	"ip.h"
+#include	"ipv6.h"
+#include	<libsec.h>
 
-#include	"libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by)  ((by) * BI2BY)
 
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
 typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
 typedef struct Esptail Esptail;
 typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
 
-#define DPRINT if(0)print
+enum {
+	Encrypt,
+	Decrypt,
 
-enum
-{
-	IP_ESPPROTO	= 50,
-	EsphdrSize	= 28,	// includes IP header
-	IphdrSize	= 20,	// options have been striped
-	EsptailSize	= 2,	// does not include pad or auth data
-	UserhdrSize	= 4,	// user visable header size - if enabled
+	IP_ESPPROTO	= 50,	/* IP v4 and v6 protocol number */
+	Esp4hdrlen	= IP4HDR + 8,
+	Esp6hdrlen	= IP6HDR + 8,
+
+	Esptaillen	= 2,	/* does not include pad or auth data */
+	Userhdrlen	= 4,	/* user-visible header size - if enabled */
+
+	Desblk	 = BITS2BYTES(64),
+	Des3keysz = BITS2BYTES(192),
+
+	Aesblk	 = BITS2BYTES(128),
+	Aeskeysz = BITS2BYTES(128),
 };
 
 struct Esphdr
 {
-	/* ip header */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+	uchar	payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ *	 enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ *	orig IP hdrs | ESP hdr |
+ *			enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+	/* ipv4 header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	espproto;	/* Protocol */
 	uchar	espplen[2];	/* Header plus data length */
 	uchar	espsrc[4];	/* Ip source */
 	uchar	espdst[4];	/* Ip destination */
 
-	/* esp header */
-	uchar	espspi[4];	/* Security parameter index */
-	uchar	espseq[4];	/* Sequence number */
+	Esphdr;
 };
 
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+	IPV6HDR;
+	Esphdr;
+};
+
 struct Esptail
 {
 	uchar	pad;
@@ -53,16 +93,28 @@
 	uchar	nexthdr;
 };
 
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+	ulong	version;
+	ulong	iphdrlen;
+	ulong	hdrlen;		/* iphdrlen + esp hdr len */
+	ulong	spi;
+	uchar	laddr[IPaddrlen];
+	uchar	raddr[IPaddrlen];
+};
+
 /* header as seen by the user */
 struct Userhdr
 {
-	uchar	nexthdr;	// next protocol
+	uchar	nexthdr;	/* next protocol */
 	uchar	unused[3];
 };
 
 struct Esppriv
 {
-	ulong	in;
+	uvlong	in;
 	ulong	inerrors;
 };
 
@@ -72,77 +124,68 @@
 struct Espcb
 {
 	int	incoming;
-	int	header;		// user user level header
+	int	header;		/* user-level header */
 	ulong	spi;
-	ulong	seq;		// last seq sent
-	ulong	window;		// for replay attacks
+	ulong	seq;		/* last seq sent */
+	ulong	window;		/* for replay attacks */
+
 	char	*espalg;
-	void	*espstate;	// other state for esp
-	int	espivlen;	// in bytes
+	void	*espstate;	/* other state for esp */
+	int	espivlen;	/* in bytes */
 	int	espblklen;
 	int	(*cipher)(Espcb*, uchar *buf, int len);
+
 	char	*ahalg;
-	void	*ahstate;	// other state for esp
-	int	ahlen;		// auth data length in bytes
+	void	*ahstate;	/* other state for esp */
+	int	ahlen;		/* auth data length in bytes */
 	int	ahblklen;
 	int	(*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+	DigestState *ds;
 };
 
 struct Algorithm
 {
 	char 	*name;
-	int	keylen;		// in bits
-	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+	int	keylen;		/* in bits */
+	void	(*init)(Espcb*, char* name, uchar *key, unsigned keylen);
 };
 
-
-enum {
-	RC4forward	= 10*1024*1024,	// maximum skip forward
-	RC4back = 100*1024,		// maximum look back
-};
-
-struct Esprc4
-{
-	ulong cseq;	// current byte sequence number
-	RC4state current;
-
-	int ovalid;	// old is valid
-	ulong lgseq; // last good sequence
-	ulong oseq;	// old byte sequence number
-	RC4state old;
-};
-
 static	Conv* convlookup(Proto *esp, ulong spi);
 static	char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static	void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
 static	void espkick(void *x);
 
+static	void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static	void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
 static Algorithm espalg[] =
 {
-	"null",			0,	nullespinit,
-	"des_56_cbc",		64,	desespinit,
-	"rc4_128",		128,	rc4espinit,
-	nil,			0,	nil,
+	"null",		0,	nullespinit,
+	"des3_cbc",	192,	des3espinit,	/* new rfc2451, des-ede3 */
+	"aes_128_cbc",	128,	aescbcespinit,	/* new rfc3602 */
+	"aes_ctr",	128,	aesctrespinit,	/* new rfc3686 */
+	"des_56_cbc",	64,	desespinit,	/* rfc2405, deprecated */
+	nil,		0,	nil,
 };
 
 static Algorithm ahalg[] =
 {
-	"null",			0,	nullahinit,
-	"hmac_sha1_96",		128,	shaahinit,
-	"hmac_md5_96",		128,	md5ahinit,
-	nil,			0,	nil,
+	"null",		0,	nullahinit,
+	"hmac_sha1_96",	128,	shaahinit,	/* rfc2404 */
+	"hmac_md5_96",	128,	md5ahinit,	/* rfc2403 */
+	nil,		0,	nil,
 };
 
 static char*
 espconnect(Conv *c, char **argv, int argc)
 {
-	char *p, *pp;
-	char *e = nil;
+	char *p, *pp, *e = nil;
 	ulong spi;
 	Espcb *ecb = (Espcb*)c->ptcl;
 
@@ -157,7 +200,10 @@
 			break;
 		}
 		*p++ = 0;
-		parseip(c->raddr, argv[1]);
+		if (parseip(c->raddr, argv[1]) == -1) {
+			e = Ebadip;
+			break;
+		}
 		findlocalip(c->p->f, c->laddr, c->raddr);
 		ecb->incoming = 0;
 		ecb->seq = 0;
@@ -215,26 +261,86 @@
 	ipmove(c->raddr, IPnoaddr);
 
 	ecb = (Espcb*)c->ptcl;
-	free(ecb->espstate);
-	free(ecb->ahstate);
+	secfree(ecb->espstate);
+	secfree(ecb->ahstate);
 	memset(ecb, 0, sizeof(Espcb));
 }
 
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+	if (*bpp == nil || BLEN(*bpp) == 0) {
+		/* get enough to identify the IP version */
+		*bpp = pullupblock(*bpp, IP4HDR);
+		if(*bpp == nil) {
+			netlog(f, Logesp, "esp: short packet\n");
+			return 0;
+		}
+	}
+	return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
 static void
+getverslens(int version, Versdep *vp)
+{
+	vp->version = version;
+	switch(vp->version) {
+	case V4:
+		vp->iphdrlen = IP4HDR;
+		vp->hdrlen   = Esp4hdrlen;
+		break;
+	case V6:
+		vp->iphdrlen = IP6HDR;
+		vp->hdrlen   = Esp6hdrlen;
+		break;
+	default:
+		panic("esp: getverslens version %d wrong", version);
+	}
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+
+	switch(vp->version) {
+	case V4:
+		eh4 = (Esp4hdr*)pkt;
+		v4tov6(vp->raddr, eh4->espsrc);
+		v4tov6(vp->laddr, eh4->espdst);
+		vp->spi = nhgetl(eh4->espspi);
+		break;
+	case V6:
+		eh6 = (Esp6hdr*)pkt;
+		ipmove(vp->raddr, eh6->src);
+		ipmove(vp->laddr, eh6->dst);
+		vp->spi = nhgetl(eh6->espspi);
+		break;
+	default:
+		panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+	}
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
 espkick(void *x)
 {
+	int nexthdr, payload, pad, align;
+	uchar *auth;
+	Block *bp;
 	Conv *c = x;
-	Esphdr *eh;
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Espcb *ecb;
 	Esptail *et;
 	Userhdr *uh;
-	Espcb *ecb;
-	Block *bp;
-	int nexthdr;
-	int payload;
-	int pad;
-	int align;
-	uchar *auth;
+	Versdep vers;
 
+	getverslens(convipvers(c), &vers);
 	bp = qget(c->wq);
 	if(bp == nil)
 		return;
@@ -244,7 +350,7 @@
 
 	if(ecb->header) {
 		/* make sure the message has a User header */
-		bp = pullupblock(bp, UserhdrSize);
+		bp = pullupblock(bp, Userhdrlen);
 		if(bp == nil) {
 			qunlock(c);
 			return;
@@ -251,15 +357,16 @@
 		}
 		uh = (Userhdr*)bp->rp;
 		nexthdr = uh->nexthdr;
-		bp->rp += UserhdrSize;
+		bp->rp += Userhdrlen;
 	} else {
-		nexthdr = 0;  // what should this be?
+		nexthdr = 0;	/* what should this be? */
 	}
 
 	payload = BLEN(bp) + ecb->espivlen;
 
 	/* Make space to fit ip header */
-	bp = padblock(bp, EsphdrSize + ecb->espivlen);
+	bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+	getpktspiaddrs(bp->rp, &vers);
 
 	align = 4;
 	if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
 		align = ecb->espblklen;
 	if(align % ecb->ahblklen != 0)
 		panic("espkick: ahblklen is important after all");
-	pad = (align-1) - (payload + EsptailSize-1)%align;
+	pad = (align-1) - (payload + Esptaillen-1)%align;
 
 	/*
 	 * Make space for tail
@@ -273,70 +380,88 @@
 	 * this is done by calling padblock with a negative size
 	 * Padblock does not change bp->wp!
 	 */
-	bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
-	bp->wp += pad+EsptailSize+ecb->ahlen;
+	bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+	bp->wp += pad+Esptaillen+ecb->ahlen;
 
-	eh = (Esphdr *)(bp->rp);
-	et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
 
-	// fill in tail
+	/* fill in tail */
 	et->pad = pad;
 	et->nexthdr = nexthdr;
 
-	ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
-	auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+	/* encrypt the payload */
+	ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+	auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
 
-	// fill in head
-	eh->vihl = IP_VER4;
-	hnputl(eh->espspi, ecb->spi);
-	hnputl(eh->espseq, ++ecb->seq);
-	v6tov4(eh->espsrc, c->laddr);
-	v6tov4(eh->espdst, c->raddr);
-	eh->espproto = IP_ESPPROTO;
-	eh->frag[0] = 0;
-	eh->frag[1] = 0;
+	/* fill in head; construct a new IP header and an ESP header */
+	if (vers.version == V4) {
+		eh4 = (Esp4hdr *)bp->rp;
+		eh4->vihl = IP_VER4;
+		v6tov4(eh4->espsrc, c->laddr);
+		v6tov4(eh4->espdst, c->raddr);
+		eh4->espproto = IP_ESPPROTO;
+		eh4->frag[0] = 0;
+		eh4->frag[1] = 0;
 
-	ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+		hnputl(eh4->espspi, ecb->spi);
+		hnputl(eh4->espseq, ++ecb->seq);
+	} else {
+		eh6 = (Esp6hdr *)bp->rp;
+		eh6->vcf[0] = IP_VER6;
+		ipmove(eh6->src, c->laddr);
+		ipmove(eh6->dst, c->raddr);
+		eh6->proto = IP_ESPPROTO;
 
+		hnputl(eh6->espspi, ecb->spi);
+		hnputl(eh6->espseq, ++ecb->seq);
+	}
+
+	/* compute secure hash */
+	ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+		payload + pad + Esptaillen, auth);
+
 	qunlock(c);
-	//print("esp: pass down: %uld\n", BLEN(bp));
-	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	/* print("esp: pass down: %uld\n", BLEN(bp)); */
+	if (vers.version == V4)
+		ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	else
+		ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
 }
 
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
 void
 espiput(Proto *esp, Ipifc*, Block *bp)
 {
-	Esphdr *eh;
-	Esptail *et;
-	Userhdr *uh;
+	int payload, nexthdr;
+	uchar *auth, *espspi;
 	Conv *c;
 	Espcb *ecb;
-	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	Esptail *et;
 	Fs *f;
-	uchar *auth;
-	ulong spi;
-	int payload, nexthdr;
+	Userhdr *uh;
+	Versdep vers;
 
 	f = esp->f;
 
-	bp = pullupblock(bp, EsphdrSize+EsptailSize);
+	getverslens(pktipvers(f, &bp), &vers);
+
+	bp = pullupblock(bp, vers.hdrlen + Esptaillen);
 	if(bp == nil) {
 		netlog(f, Logesp, "esp: short packet\n");
 		return;
 	}
+	getpktspiaddrs(bp->rp, &vers);
 
-	eh = (Esphdr*)(bp->rp);
-	spi = nhgetl(eh->espspi);
-	v4tov6(raddr, eh->espsrc);
-	v4tov6(laddr, eh->espdst);
-
 	qlock(esp);
 	/* Look for a conversation structure for this port */
-	c = convlookup(esp, spi);
+	c = convlookup(esp, vers.spi);
 	if(c == nil) {
 		qunlock(esp);
-		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		icmpnoconv(f, bp);
 		freeblist(bp);
 		return;
@@ -346,76 +471,83 @@
 	qunlock(esp);
 
 	ecb = c->ptcl;
-	// too hard to do decryption/authentication on block lists
-	if(bp->next)
+	/* too hard to do decryption/authentication on block lists */
+	if(bp->next != nil)
 		bp = concatblock(bp);
 
-	if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+	if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	eh = (Esphdr*)(bp->rp);
 	auth = bp->wp - ecb->ahlen;
-	if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+	espspi = vers.version == V4?	((Esp4hdr*)bp->rp)->espspi:
+					((Esp6hdr*)bp->rp)->espspi;
+
+	/* compute secure hash and authenticate */
+	if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
 		qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
-			laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
-	if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+	payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+	if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
-			laddr, spi, payload, BLEN(bp));
+		netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+			vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
 		freeb(bp);
 		return;
 	}
-	if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+	/* decrypt payload */
+	if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
 		qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
-			laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+			vers.raddr, vers.laddr, vers.spi, up->errstr);
 		freeb(bp);
 		return;
 	}
 
-	payload -= EsptailSize;
-	et = (Esptail*)(bp->rp + EsphdrSize + payload);
+	payload -= Esptaillen;
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload);
 	payload -= et->pad + ecb->espivlen;
 	nexthdr = et->nexthdr;
 	if(payload <= 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+			vers.raddr, vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	// trim packet
-	bp->rp += EsphdrSize + ecb->espivlen;
+	/* trim packet */
+	bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
 	bp->wp = bp->rp + payload;
 	if(ecb->header) {
-		// assume UserhdrSize < EsphdrSize
-		bp->rp -= UserhdrSize;
+		/* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+		bp->rp -= Userhdrlen;
 		uh = (Userhdr*)bp->rp;
-		memset(uh, 0, UserhdrSize);
+		memset(uh, 0, Userhdrlen);
 		uh->nexthdr = nexthdr;
 	}
 
+	/* ingress filtering here? */
+
 	if(qfull(c->rq)){
-		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeblist(bp);
 	}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
-		qpass(c->rq, bp);
+//		print("esp: pass up: %uld\n", BLEN(bp));
+		qpass(c->rq, bp);	/* pass packet up the read queue */
 	}
 
 	qunlock(c);
@@ -440,19 +572,19 @@
 	return e;
 }
 
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
 void
 espadvise(Proto *esp, Block *bp, char *msg)
 {
-	Esphdr *h;
 	Conv *c;
-	ulong spi;
+	Versdep vers;
 
-	h = (Esphdr*)(bp->rp);
+	getverslens(pktipvers(esp->f, &bp), &vers);
+	getpktspiaddrs(bp->rp, &vers);
 
-	spi = nhgets(h->espspi);
 	qlock(esp);
-	c = convlookup(esp, spi);
-	if(c != nil) {
+	c = convlookup(esp, vers.spi);
+	if(c != nil && !c->ignoreadvice) {
 		qhangup(c->rq, msg);
 		qhangup(c->wq, msg);
 	}
@@ -466,7 +598,7 @@
 	Esppriv *upriv;
 
 	upriv = esp->priv;
-	return snprint(buf, len, "%lud %lud\n",
+	return snprint(buf, len, "%llud %lud\n",
 		upriv->in,
 		upriv->inerrors);
 }
@@ -520,10 +652,10 @@
 setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
 {
 	uchar *key;
-	int i, nbyte, nchar;
-	int c;
+	int c, nbyte, nchar;
+	uint i;
 
-	if(n < 2)
+	if(n < 2 || n > 3)
 		return "bad format";
 	for(; alg->name; alg++)
 		if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
 	if(alg->name == nil)
 		return "unknown algorithm";
 
-	if(n != 3)
-		return "bad format";
 	nbyte = (alg->keylen + 7) >> 3;
-	nchar = strlen(f[2]);
+	if (n == 2)
+		nchar = 0;
+	else
+		nchar = strlen(f[2]);
+	if(nchar != 2 * nbyte)			/* TODO: maybe < is ok */
+		return "key not required length";
+	/* convert hex digits from ascii, in place */
 	for(i=0; i<nchar; i++) {
 		c = f[2][i];
 		if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
 		else if(c >= 'A' && c <= 'F')
 			f[2][i] -= 'A'-10;
 		else
-			return "bad character in key";
+			return "non-hex character in key";
 	}
-	key = smalloc(nbyte);
-	for(i=0; i<nchar && i*2<nbyte; i++) {
+	/* collapse hex digits into complete bytes in reverse order in key */
+	key = secalloc(nbyte);
+	for(i = 0; i < nchar && i/2 < nbyte; i++) {
 		c = f[2][nchar-i-1];
 		if(i&1)
 			c <<= 4;
-		key[i>>1] |= c;
+		key[i/2] |= c;
 	}
-
+	memset(f[2], 0, nchar);
 	alg->init(ecb, alg->name, key, alg->keylen);
-	free(key);
+	secfree(key);
 	return nil;
 }
 
+
+/*
+ * null encryption
+ */
+
 static int
 nullcipher(Espcb*, uchar*, int)
 {
@@ -566,7 +708,7 @@
 }
 
 static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->espalg = name;
 	ecb->espblklen = 1;
@@ -581,7 +723,7 @@
 }
 
 static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
 	ecb->auth = nullauth;
 }
 
-void
+
+/*
+ * sha1
+ */
+
+static void
 seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
 	DigestState *digest;
-	uchar innerhash[SHA1dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = sha1(ipad, 64, nil, nil);
+	digest = sha1(ipad, Hmacblksz, nil, nil);
 	sha1(t, tlen, innerhash, digest);
-	digest = sha1(opad, 64, nil, nil);
+	digest = sha1(opad, Hmacblksz, nil, nil);
 	sha1(innerhash, SHA1dlen, hash, digest);
 }
 
@@ -615,11 +759,11 @@
 static int
 shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
 {
-	uchar hash[SHA1dlen];
 	int r;
+	uchar hash[SHA1dlen];
 
 	memset(hash, 0, SHA1dlen);
-	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -626,40 +770,162 @@
 }
 
 static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("shaahinit: bad keylen");
-	klen >>= 8;	// convert to bytes
+	klen /= BI2BY;
 
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = shaauth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aeskeysz], ivec[Aeskeysz];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aeskeysz);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aescbccipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aesblk], ivec[Aesblk];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aesblk);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aesctrcipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
 seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
 	DigestState *digest;
-	uchar innerhash[MD5dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = md5(ipad, 64, nil, nil);
+	digest = md5(ipad, Hmacblksz, nil, nil);
 	md5(t, tlen, innerhash, digest);
-	digest = md5(opad, 64, nil, nil);
+	digest = md5(opad, Hmacblksz, nil, nil);
 	md5(innerhash, MD5dlen, hash, digest);
 }
 
@@ -670,7 +936,7 @@
 	int r;
 
 	memset(hash, 0, MD5dlen);
-	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -677,168 +943,102 @@
 }
 
 static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("md5ahinit: bad keylen");
-	klen >>= 3;	// convert to bytes
-
-
+	klen = BITS2BYTES(klen);
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = md5auth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
+
+/*
+ * des, single and triple
+ */
+
 static int
 descipher(Espcb *ecb, uchar *p, int n)
 {
-	uchar tmp[8];
-	uchar *pp, *tp, *ip, *eip, *ep;
 	DESstate *ds = ecb->espstate;
 
-	ep = p + n;
 	if(ecb->incoming) {
-		memmove(ds->ivec, p, 8);
-		p += 8;
-		while(p < ep){
-			memmove(tmp, p, 8);
-			block_cipher(ds->expanded, p, 1);
-			tp = tmp;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; ){
-				*p++ ^= *ip;
-				*ip++ = *tp++;
-			}
-		}
+		memmove(ds->ivec, p, Desblk);
+		desCBCdecrypt(p + Desblk, n - Desblk, ds);
 	} else {
-		memmove(p, ds->ivec, 8);
-		for(p += 8; p < ep; p += 8){
-			pp = p;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; )
-				*pp++ ^= *ip++;
-			block_cipher(ds->expanded, p, 0);
-			memmove(ds->ivec, p, 8);
-		}
+		memmove(p, ds->ivec, Desblk);
+		desCBCencrypt(p + Desblk, n - Desblk, ds);
 	}
 	return 1;
 }
-	
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+	DES3state *ds = ecb->espstate;
+
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, Desblk);
+		des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+	} else {
+		memmove(p, ds->ivec, Desblk);
+		des3CBCencrypt(p + Desblk, n - Desblk, ds);
+	}
+	return 1;
+}
+
 static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	uchar key[8];
-	uchar ivec[8];
-	int i;
-	
-	// bits to bytes
-	n = (n+7)>>3;
-	if(n > 8)
-		n = 8;
+	uchar key[Desblk], ivec[Desblk];
+
+	n = BITS2BYTES(n);
+	if(n > Desblk)
+		n = Desblk;
 	memset(key, 0, sizeof(key));
 	memmove(key, k, n);
-	for(i=0; i<8; i++)
-		ivec[i] = nrand(256);
+	prng(ivec, Desblk);
 	ecb->espalg = name;
-	ecb->espblklen = 8;
-	ecb->espivlen = 8;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
+
 	ecb->cipher = descipher;
-	ecb->espstate = smalloc(sizeof(DESstate));
+	ecb->espstate = secalloc(sizeof(DESstate));
 	setupDESstate(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	Esprc4 *esprc4;
-	RC4state tmpstate;
-	ulong seq;
-	long d, dd;
+	uchar key[3][Desblk], ivec[Desblk];
 
-	if(n < 4)
-		return 0;
+	n = BITS2BYTES(n);
+	if(n > Des3keysz)
+		n = Des3keysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Desblk);
+	ecb->espalg = name;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
 
-	esprc4 = ecb->espstate;
-	if(ecb->incoming) {
-		seq = nhgetl(p);
-		p += 4;
-		n -= 4;
-		d = seq-esprc4->cseq;
-		if(d == 0) {
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq += n;
-			if(esprc4->ovalid) {
-				dd = esprc4->cseq - esprc4->lgseq;
-				if(dd > RC4back)
-					esprc4->ovalid = 0;
-			}
-		} else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
-			// this link is hosed
-			if(d > RC4forward) {
-				strcpy(up->errstr, "rc4cipher: skipped too much");
-				return 0;
-			}
-			esprc4->lgseq = seq;
-			if(!esprc4->ovalid) {
-				esprc4->ovalid = 1;
-				esprc4->oseq = esprc4->cseq;
-				memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
-			}
-			rc4skip(&esprc4->current, d);
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq = seq+n;
-		} else {
-print("reordered packet: %uld %ld\n", seq, d);
-			dd = seq - esprc4->oseq;
-			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
-				strcpy(up->errstr, "rc4cipher: too far back");
-				return 0;
-			}
-			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
-			rc4skip(&tmpstate, dd);
-			rc4(&tmpstate, p, n);
-			return 1;
-		}
-
-		// move old state up
-		if(esprc4->ovalid) {
-			dd = esprc4->cseq - RC4back - esprc4->oseq;
-			if(dd > 0) {
-				rc4skip(&esprc4->old, dd);
-				esprc4->oseq += dd;
-			}
-		}
-	} else {
-		hnputl(p, esprc4->cseq);
-		p += 4;
-		n -= 4;
-		rc4(&esprc4->current, p, n);
-		esprc4->cseq += n;
-	}
-	return 1;
+	ecb->cipher = des3cipher;
+	ecb->espstate = secalloc(sizeof(DES3state));
+	setupDES3state(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{	
-	Esprc4 *esprc4;
 
-	// bits to bytes
-	n = (n+7)>>3;
-	esprc4 = smalloc(sizeof(Esprc4));
-	memset(esprc4, 0, sizeof(Esprc4));
-	setupRC4state(&esprc4->current, k, n);
-	ecb->espalg = name;
-	ecb->espblklen = 4;
-	ecb->espivlen = 4;
-	ecb->cipher = rc4cipher;
-	ecb->espstate = esprc4;
-}
-	
+/*
+ * interfacing to devip
+ */
 void
 espinit(Fs *fs)
 {
diff -u a/os/ip//ethermedium.c b/os/ip//ethermedium.c
--- a/os/ip//ethermedium.c
+++ b/os/ip//ethermedium.c
@@ -5,9 +5,9 @@
 #include "fns.h"
 #include "../port/error.h"
 
+#include "../port/netif.h"
 #include "ip.h"
 #include "ipv6.h"
-#include "kernel.h"
 
 typedef struct Etherhdr Etherhdr;
 struct Etherhdr
@@ -18,10 +18,10 @@
 };
 
 static uchar ipbroadcast[IPaddrlen] = {
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
 	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
 };
 
 static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
 static void	etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
 static void	etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
 static void	etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void	etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 static Block*	multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
 static void	sendarp(Ipifc *ifc, Arpent *a);
-static void	sendgarp(Ipifc *ifc, uchar*);
+static void	sendndp(Ipifc *ifc, Arpent *a);
 static int	multicastea(uchar *ea, uchar *ip);
 static void	recvarpproc(void*);
-static void	resolveaddr6(Ipifc *ifc, Arpent *a);
 static void	etherpref2addr(uchar *pref, uchar *ea);
 
 Medium ethermedium =
@@ -53,8 +53,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -70,8 +69,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -94,9 +92,6 @@
  */
 enum
 {
-	ETARP		= 0x0806,
-	ETIP4		= 0x0800,
-	ETIP6		= 0x86DD,
 	ARPREQUEST	= 1,
 	ARPREPLY	= 2,
 };
@@ -127,128 +122,92 @@
 static void
 etherbind(Ipifc *ifc, int argc, char **argv)
 {
-	Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
-	char addr[Maxpath];	//char addr[2*KNAMELEN];
-	char dir[Maxpath];	//char dir[2*KNAMELEN];
-	char *buf;
-	int fd, cfd, n;
-	char *ptr;
+	char addr[Maxpath], dir[Maxpath];
 	Etherrock *er;
+	Chan *c;
+	int n;
 
 	if(argc < 2)
 		error(Ebadarg);
 
-	mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
-	buf = nil;
+	/*
+	 *  get mac address
+	 */
+	snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+	c = namec(addr, Aopen, OREAD, 0);
 	if(waserror()){
-		if(mchan4 != nil)
-			cclose(mchan4);
-		if(cchan4 != nil)
-			cclose(cchan4);
-		if(achan != nil)
-			cclose(achan);
-		if(mchan6 != nil)
-			cclose(mchan6);
-		if(cchan6 != nil)
-			cclose(cchan6);
-		if(buf != nil)
-			free(buf);
-		nexterror(); 
+		cclose(c);
+		nexterror();
 	}
+	n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+	if(n < 0)
+		error(Eio);
+	addr[n] = 0;
+	if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+		error("could not find mac address");
+	cclose(c);
+	poperror();
 
+	er = smalloc(sizeof(*er));
+	er->read4p = er->read6p = er->arpp = (void*)-1;
+	er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+	er->f = ifc->conv->p->f;
+
+	if(waserror()){
+		if(er->mchan4 != nil)
+			cclose(er->mchan4);
+		if(er->cchan4 != nil)
+			cclose(er->cchan4);
+		if(er->mchan6 != nil)
+			cclose(er->mchan6);
+		if(er->cchan6 != nil)
+			cclose(er->cchan6);
+		if(er->achan != nil)
+			cclose(er->achan);
+		free(er);
+		nexterror();
+	}
+
 	/*
-	 *  open ip converstation
+	 *  open ipv4 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x800 failed: %s", up->env->errstr);
-	mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);	/* ETIP4 */
+	er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
 
 	/*
-	 *  get mac address and speed
-	 */
-	snprint(addr, sizeof(addr), "%s/stats", dir);
-	fd = kopen(addr, OREAD);
-	if(fd < 0)
-		errorf("can't open ether stats: %s", up->env->errstr);
-
-	buf = smalloc(512);
-	n = kread(fd, buf, 511);
-	kclose(fd);
-	if(n <= 0)
-		error(Eio);
-	buf[n] = 0;
-
-	ptr = strstr(buf, "addr: ");
-	if(!ptr)
-		error(Eio);
-	ptr += 6;
-	parsemac(ifc->mac, ptr, 6);
-
-	ptr = strstr(buf, "mbps: ");
-	if(ptr){
-		ptr += 6;
-		ifc->mbps = atoi(ptr);
-	} else
-		ifc->mbps = 100;
-
-	/*
- 	 *  open arp conversation
-	 */
-	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
-	fd = kdial(addr, nil, nil, nil);
-	if(fd < 0)
-		errorf("dial 0x806 failed: %s", up->env->errstr);
-	achan = commonfdtochan(fd, ORDWR, 0, 1);
-	kclose(fd);
-
-	/*
-	 *  open ip conversation
+	 *  open ipv6 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x86DD failed: %s", up->env->errstr);
-	mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);	/* ETIP6 */
+	er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
 
-	er = smalloc(sizeof(*er));
-	er->mchan4 = mchan4;
-	er->cchan4 = cchan4;
-	er->achan = achan;
-	er->mchan6 = mchan6;
-	er->cchan6 = cchan6;
-	er->f = ifc->conv->p->f;
-	ifc->arg = er;
-
-	free(buf);
+	/*
+ 	 *  open arp conversation
+	 */
+	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);	/* ETARP */
+	er->achan = chandial(addr, nil, nil, nil);
 	poperror();
 
-	kproc("etherread4", etherread4, ifc, 0);
-	kproc("recvarpproc", recvarpproc, ifc, 0);
-	kproc("etherread6", etherread6, ifc, 0);
+	ifc->arg = er;
+
+	kproc("etherread4", etherread4, ifc);
+	kproc("etherread6", etherread6, ifc);
+	kproc("recvarpproc", recvarpproc, ifc);
 }
 
 /*
@@ -259,21 +218,35 @@
 {
 	Etherrock *er = ifc->arg;
 
-	if(er->read4p)
+	while(waserror())
+		;
+
+	/* wait for readers to start */
+	while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->read4p != nil)
 		postnote(er->read4p, 1, "unbind", 0);
-	if(er->read6p)
+	if(er->read6p != nil)
 		postnote(er->read6p, 1, "unbind", 0);
-	if(er->arpp)
+	if(er->arpp != nil)
 		postnote(er->arpp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for readers to die */
-	while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+	while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan4 != nil)
 		cclose(er->mchan4);
-	if(er->achan != nil)
-		cclose(er->achan);
 	if(er->cchan4 != nil)
 		cclose(er->cchan4);
 	if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
 		cclose(er->mchan6);
 	if(er->cchan6 != nil)
 		cclose(er->cchan6);
+	if(er->achan != nil)
+		cclose(er->achan);
 
 	free(er);
 }
@@ -297,16 +272,16 @@
 
 	/* get mac address of destination */
 	a = arpget(er->f->arp, bp, version, ifc, ip, mac);
-	if(a){
+	if(a != nil){
 		/* check for broadcast or multicast */
 		bp = multicastarp(er->f, a, ifc->m, mac);
-		if(bp==nil){
+		if(bp == nil){
 			switch(version){
 			case V4:
 				sendarp(ifc, a);
 				break;
-			case V6: 
-				resolveaddr6(ifc, a);
+			case V6:
+				sendndp(ifc, a);
 				break;
 			default:
 				panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
 
 	/* make it a single block with space for the ether header */
 	bp = padblock(bp, ifc->m->hsize);
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 	eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read4p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read4p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput4(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read4p = nil;
+	pexit("hangup", 1);
 }
 
 
@@ -397,29 +369,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read6p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read6p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput6(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read6p = nil;
+	pexit("hangup", 1);
 }
 
 static void
@@ -477,6 +448,7 @@
 	Block *bp;
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
+	uchar targ[IPv4addrlen], src[IPv4addrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
 		return;
 	}
 
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+
 	/* remove all but the last message */
 	while((bp = a->hold) != nil){
 		if(bp == a->last)
@@ -492,18 +467,20 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
+	memmove(targ, a->ip+IPv4off, IPv4addrlen);
 	arprelease(er->f->arp, a);
 
+	if(!ipv4local(ifc, src, 0, targ))
+		return;
+
 	n = sizeof(Etherarp);
-	if(n < a->type->mintu)
-		n = a->type->mintu;
+	if(n < ifc->m->mintu)
+		n = ifc->m->mintu;
 	bp = allocb(n);
 	memset(bp->rp, 0, n);
 	e = (Etherarp*)bp->rp;
-	memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
-	ipv4local(ifc, e->spa);
+	memmove(e->tpa, targ, sizeof(e->tpa));
+	memmove(e->spa, src, sizeof(e->spa));
 	memmove(e->sha, ifc->mac, sizeof(e->sha));
 	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
 	memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("arp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
 {
-	int sflag;
 	Block *bp;
 	Etherrock *er = ifc->arg;
-	uchar ipsrc[IPaddrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
-	a->rtime = NOW + ReTransTimer;
-	if(a->rxtsrem <= 0) {
-		arprelease(er->f->arp, a);
-		return;
-	}
-
-	a->rxtsrem--;
-	arprelease(er->f->arp, a);
-
-	if(sflag = ipv6anylocal(ifc, ipsrc)) 
-		icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+	ndpsendsol(er->f, ifc, a);	/* unlocks arp */
 }
 
 /*
@@ -569,10 +530,6 @@
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
 
-	/* don't arp for our initial non address */
-	if(ipcmp(ip, IPnoaddr) == 0)
-		return;
-
 	n = sizeof(Etherarp);
 	if(n < ifc->m->mintu)
 		n = ifc->m->mintu;
@@ -593,15 +550,13 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("garp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
 recvarp(Ipifc *ifc)
 {
-	int n;
+	int n, forme;
 	Block *ebp, *rbp;
 	Etherarp *e, *r;
 	uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
 	Etherrock *er = ifc->arg;
 
 	ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
-	if(ebp == nil) {
-		print("arp: rcv: %r\n");
+	if(ebp == nil)
 		return;
-	}
 
+	rlock(ifc);
+
 	e = (Etherarp*)ebp->rp;
 	switch(nhgets(e->op)) {
 	default:
@@ -620,9 +575,13 @@
 		break;
 
 	case ARPREPLY:
+		/* make sure not to enter multi/broadcat address */
+		if(e->sha[0] & 1)
+			break;
+
 		/* check for machine using my ip address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
 				print("arprep: 0x%E/0x%E also has ip addr %V\n",
 					e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
 			}
 		}
 
-		/* make sure we're not entering broadcast addresses */
-		if(ipcmp(ip, ipbroadcast) == 0 ||
-			!memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
-			print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
-				e->s, e->sha, e->spa);
-			break;
-		}
-
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+		/* refresh what we know about sender */
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
 		break;
 
 	case ARPREQUEST:
+		/* don't reply to multi/broadcat addresses */
+		if(e->sha[0] & 1)
+			break;
+
 		/* don't answer arps till we know who we are */
-		if(ifc->lifc == 0)
+		if(ifc->lifc == nil)
 			break;
 
 		/* check for machine using my ip or ether address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
-				if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+				if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
 					/* print only once */
-					print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+					print("arpreq: 0x%E also has ip addr %V\n",
+						e->sha, e->spa);
 					memmove(eprinted, e->spa, sizeof(e->spa));
 				}
+				break;
 			}
 		} else {
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
-				print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+				print("arpreq: %V also has ether addr %E\n",
+					e->spa, e->sha);
 				break;
 			}
 		}
 
-		/* refresh what we know about sender */
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
-		/* answer only requests for our address or systems we're proxying for */
+		/*
+		 * when request is for our address or systems we're proxying for,
+		 * enter senders address into arp table and reply, otherwise just
+		 * refresh the senders address.
+		 */
 		v4tov6(ip, e->tpa);
-		if(!iplocalonifc(ifc, ip))
-		if(!ipproxyifc(er->f, ifc, ip))
+		forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+		if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
 			break;
 
 		n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
 		memmove(r->s, ifc->mac, sizeof(r->s));
 		rbp->wp += n;
 
-		n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
-		if(n < 0)
-			print("arp: write: %r\n");
+		runlock(ifc);
+		freeb(ebp);
+
+		devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+		return;
 	}
+
+	runlock(ifc);
 	freeb(ebp);
 }
 
@@ -707,7 +671,7 @@
 
 	er->arpp = up;
 	if(waserror()){
-		er->arpp = 0;
+		er->arpp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;)
@@ -749,14 +713,9 @@
 multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
 {
 	/* is it broadcast? */
-	switch(ipforme(f, a->ip)){
-	case Runi:
-		return nil;
-	case Rbcast:
-		memset(mac, 0xff, 6);
+	if(ipforme(f, a->ip) == Rbcast){
+		memset(mac, 0xff, medium->maclen);
 		return arpresolve(f->arp, a, medium, mac);
-	default:
-		break;
 	}
 
 	/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
 }
 
 
-static void 
+static void
 etherpref2addr(uchar *pref, uchar *ea)
 {
-	pref[8]  = ea[0] | 0x2;
-	pref[9]  = ea[1];
+	pref[8] = ea[0] ^ 0x2;
+	pref[9] = ea[1];
 	pref[10] = ea[2];
 	pref[11] = 0xFF;
 	pref[12] = 0xFE;
@@ -789,4 +748,41 @@
 	pref[13] = ea[3];
 	pref[14] = ea[4];
 	pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+	static char tdad[] = "dad6";
+	uchar a[IPaddrlen];
+
+	if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+		return;
+
+	if(isv4(ip)){
+		sendgarp(ifc, ip);
+		return;
+	}
+
+	if((lifc->type&Rv4) != 0)
+		return;
+
+	if(!lifc->tentative){
+		icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+		return;
+	}
+
+	if(ipcmp(lifc->local, ip) != 0)
+		return;
+
+	/* temporarily add route for duplicate address detection */
+	ipv62smcast(a, ip);
+	addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+	if(waserror()){
+		remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+		nexterror();
+	}
+	icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+	remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
 }
diff -u a/os/ip//gre.c b/os/ip//gre.c
--- a/os/ip//gre.c
+++ b/os/ip//gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,10 +10,7 @@
 
 #include "ip.h"
 
-#define DPRINT if(0)print
-
-enum
-{
+enum {
 	GRE_IPONLY	= 12,		/* size of ip header */
 	GRE_IPPLUSGRE	= 12,		/* minimum size of GRE header */
 	IP_GREPROTO	= 47,
@@ -18,10 +18,33 @@
 	GRErxms		= 200,
 	GREtickms	= 100,
 	GREmaxxmit	= 10,
+
+	K		= 1024,
+	GREqlen		= 256 * K,
+
+	GRE_cksum	= 0x8000,
+	GRE_routing	= 0x4000,
+	GRE_key		= 0x2000,
+	GRE_seq		= 0x1000,
+
+	Nring		= 1 << 10,	/* power of two, please */
+	Ringmask	= Nring - 1,
+
+	GREctlraw	= 0,
+	GREctlcooked,
+	GREctlretunnel,
+	GREctlreport,
+	GREctldlsuspend,
+	GREctlulsuspend,
+	GREctldlresume,
+	GREctlulresume,
+	GREctlforward,
+	GREctlulkey,
+	Ncmds,
 };
 
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
 	/* ip header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
@@ -28,7 +51,7 @@
 	uchar	len[2];		/* packet length (including headers) */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	ttl;
 	uchar	proto;		/* Protocol */
 	uchar	cksum[2];	/* checksum */
 	uchar	src[4];		/* Ip source */
@@ -37,21 +60,115 @@
 	/* gre header */
 	uchar	flags[2];
 	uchar	eproto[2];	/* encapsulation protocol */
-} GREhdr;
+};
 
 typedef struct GREpriv GREpriv;
-struct GREpriv
-{
-	int		raw;			/* Raw GRE mode */
-
+struct GREpriv{
 	/* non-MIB stats */
-	ulong		csumerr;		/* checksum errors */
-	ulong		lenerr;			/* short packet */
+	uvlong	lenerr;			/* short packet */
 };
 
+typedef struct Bring	Bring;
+struct Bring{
+	Block	*ring[Nring];
+	long	produced;
+	long	consumed;
+};
+
+typedef struct GREconv	GREconv;
+struct GREconv{
+	int	raw;
+
+	/* Retunnelling information.  v4 only */
+	uchar	north[4];			/* HA */
+	uchar	south[4];			/* Base station */
+	uchar	hoa[4];				/* Home address */
+	uchar	coa[4];				/* Careof address */
+	ulong	seq;				/* Current sequence # */
+	int	dlsusp;				/* Downlink suspended? */
+	int	ulsusp;				/* Uplink suspended? */
+	ulong	ulkey;				/* GRE key */
+
+	QLock	lock;				/* Lock for rings */
+	Bring	dlpending;			/* Ring of pending packets */
+	Bring	dlbuffered;			/* Received while suspended */
+	Bring	ulbuffered;			/* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+	uchar	*rp;
+	ulong	seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+	char	*cmd;
+	int	argc;
+	char	*(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw]	=	{	"raw",		1,	grectlraw,	},
+[GREctlcooked]	=	{	"cooked",	1,	grectlcooked,	},
+[GREctlretunnel]=	{	"retunnel",	5,	grectlretunnel,	},
+[GREctlreport]	=	{	"report",	2,	grectlreport,	},
+[GREctldlsuspend]=	{	"dlsuspend",	1,	grectldlsuspend,},
+[GREctlulsuspend]=	{	"ulsuspend",	1,	grectlulsuspend,},
+[GREctldlresume]=	{	"dlresume",	1,	grectldlresume,	},
+[GREctlulresume]=	{	"ulresume",	1,	grectlulresume,	},
+[GREctlforward]	=	{	"forward",	2,	grectlforward,	},
+[GREctlulkey]	=	{	"ulkey",	2,	grectlulkey,	},
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
 static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
 
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+	Block *bp;
+
+	if(r->consumed == r->produced)
+		return nil;
+
+	bp = r->ring[r->consumed & Ringmask];
+	r->ring[r->consumed & Ringmask] = nil;
+	r->consumed++;
+	return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+	Block *tbp;
+
+	if(r->produced - r->consumed > Ringmask){
+		/* Full! */
+		tbp = r->ring[r->produced & Ringmask];
+		assert(tbp);
+		freeb(tbp);
+		r->consumed++;
+	}
+	r->ring[r->produced & Ringmask] = bp;
+	r->produced++;
+}
+
+static char *
 greconnect(Conv *c, char **argv, int argc)
 {
 	Proto *p;
@@ -91,7 +208,7 @@
 static void
 grecreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->rq = qopen(GREqlen, Qmsg, 0, c);
 	c->wq = qbypass(grekick, c);
 }
 
@@ -98,44 +215,88 @@
 static int
 grestate(Conv *c, char *state, int n)
 {
-	USED(c);
-	return snprint(state, n, "%s", "Datagram");
+	GREconv *grec;
+	char *ep, *p;
+
+	grec = c->ptcl;
+	p    = state;
+	ep   = p + n;
+	p    = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+	 "pending %uld  %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+			c->inuse? "Open ": "Closed ",
+			grec->raw? "raw ": "",
+			grec->dlsusp? "DL suspended ": "",
+			grec->ulsusp? "UL suspended ": "",
+			grec->hoa, grec->north, grec->south, grec->seq,
+			grec->dlpending.consumed, grec->dlpending.produced,
+			grec->dlbuffered.consumed, grec->dlbuffered.produced,
+			grec->ulbuffered.consumed, grec->ulbuffered.produced,
+			grec->ulkey);
+	return p - state;
 }
 
 static char*
 greannounce(Conv*, char**, int)
 {
-	return "pktifc does not support announce";
+	return "gre does not support announce";
 }
 
 static void
 greclose(Conv *c)
 {
-	qclose(c->rq);
-	qclose(c->wq);
-	qclose(c->eq);
+	GREconv *grec;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	/* Make sure we don't forward any more packets */
+	memset(grec->hoa, 0, sizeof grec->hoa);
+	memset(grec->north, 0, sizeof grec->north);
+	memset(grec->south, 0, sizeof grec->south);
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->dlpending)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->dlbuffered)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->ulbuffered)) != nil)
+		freeb(bp);
+
+	grec->dlpending.produced = grec->dlpending.consumed = 0;
+	grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+	grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+	qunlock(&grec->lock);
+
+	grec->raw = 0;
+	grec->seq = 0;
+	grec->dlsusp = grec->ulsusp = 1;
+
+	qhangup(c->rq, sessend);
+	qhangup(c->wq, sessend);
+	qhangup(c->eq, sessend);
 	ipmove(c->laddr, IPnoaddr);
 	ipmove(c->raddr, IPnoaddr);
-	c->lport = 0;
-	c->rport = 0;
+	c->lport = c->rport = 0;
 }
 
-int drop;
-
 static void
 grekick(void *x, Block *bp)
 {
-	Conv *c = x;
-	GREhdr *ghp;
+	Conv *c;
+	GREconv *grec;
+	GREhdr *gre;
 	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 
 	if(bp == nil)
 		return;
 
+	c    = x;
+	grec = c->ptcl;
+
 	/* Make space to fit ip header (gre header already there) */
 	bp = padblock(bp, GRE_IPONLY);
-	if(bp == nil)
-		return;
 
 	/* make sure the message has a GRE header */
 	bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
 	if(bp == nil)
 		return;
 
-	ghp = (GREhdr *)(bp->rp);
-	ghp->vihl = IP_VER4;
+	gre = (GREhdr *)bp->rp;
+	gre->vihl = IP_VER4;
 
-	if(!((GREpriv*)c->p->priv)->raw){
-		v4tov6(raddr, ghp->dst);
+	if(grec->raw == 0){
+		v4tov6(raddr, gre->dst);
 		if(ipcmp(raddr, v4prefix) == 0)
-			memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
-		v4tov6(laddr, ghp->src);
+			memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+		v4tov6(laddr, gre->src);
 		if(ipcmp(laddr, v4prefix) == 0){
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
-				findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
-			memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+				/* pick interface closest to dest */
+				findlocalip(c->p->f, c->laddr, raddr);
+			memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
 		}
-		hnputs(ghp->eproto, c->rport);
+		hnputs(gre->eproto, c->rport);
 	}
 
-	ghp->proto = IP_GREPROTO;
-	ghp->frag[0] = 0;
-	ghp->frag[1] = 0;
+	gre->proto = IP_GREPROTO;
+	gre->frag[0] = gre->frag[1] = 0;
 
+	grepdout++;
+	grebdout += BLEN(bp);
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
 static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
 {
-	int len;
-	GREhdr *ghp;
-	Conv *c, **p;
-	ushort eproto;
+	Metablock *m;
+	GREconv *grec;
+	GREhdr *gre;
+	int hdrlen, suspended, extra;
+	ushort flags;
+	ulong seq;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1){
+		freeb(bp);
+		return;
+	}
+
+	/*
+	 * We've received a packet with a GRE header and we need to
+	 * re-adjust the packet header to strip all unwanted parts
+	 * but leave room for only a sequence number.
+	 */
+	grec   = c->ptcl;
+	flags  = nhgets(gre->flags);
+	hdrlen = 0;
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%V routing info present.  Discarding packet", gre->src);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	/*
+	 * The outgoing packet only has the sequence number set.  Make room
+	 * for the sequence number.
+	 */
+	if(hdrlen != sizeof(ulong)){
+		extra = hdrlen - sizeof(ulong);
+		if(extra < 0 && bp->rp - bp->base < -extra){
+			print("gredownlink: cannot add sequence number\n");
+			freeb(bp);
+			return;
+		}
+		memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+		bp->rp += extra;
+		assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+		gre = (GREhdr *)bp->rp;
+	}
+	seq = grec->seq++;
+	hnputs(gre->flags, GRE_seq);
+	hnputl(bp->rp + sizeof(GREhdr), seq);
+
+	/*
+	 * Keep rp and seq at the base.  ipoput4 consumes rp for
+	 * refragmentation.
+	 */
+	assert(bp->rp - bp->base >= sizeof(Metablock));
+	m = (Metablock *)bp->base;
+	m->rp  = bp->rp;
+	m->seq = seq;
+
+	/*
+	 * Here we make a decision what we're doing with the packet.  We're
+	 * doing this w/o holding a lock which means that later on in the
+	 * process we may discover we've done the wrong thing.  I don't want
+	 * to call ipoput with the lock held.
+	 */
+restart:
+	suspended = grec->dlsusp;
+	if(suspended){
+		if(!canqlock(&grec->lock)){
+			/*
+			 * just give up.  too bad, we lose a packet.  this
+			 * is just too hard and my brain already hurts.
+			 */
+			freeb(bp);
+			return;
+		}
+
+		if(!grec->dlsusp){
+			/*
+			 * suspend race.  We though we were suspended, but
+			 * we really weren't.
+			 */
+			qunlock(&grec->lock);
+			goto restart;
+		}
+
+		/* Undo the incorrect ref count addition */
+		addring(&grec->dlbuffered, bp);
+		qunlock(&grec->lock);
+		return;
+	}
+
+	/*
+	 * When we get here, we're not suspended.  Proceed to send the
+	 * packet.
+	 */
+	memmove(gre->src, grec->coa, sizeof gre->dst);
+	memmove(gre->dst, grec->south, sizeof gre->dst);
+
+	ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+	grepdout++;
+	grebdout += BLEN(bp);
+
+	/*
+	 * Now make sure we didn't do the wrong thing.
+	 */
+	if(!canqlock(&grec->lock)){
+		freeb(bp);		/* The packet just goes away */
+		return;
+	}
+
+	/* We did the right thing */
+	addring(&grec->dlpending, bp);
+	qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	ushort flags;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1)
+		return;
+
+	grec = c->ptcl;
+	memmove(gre->src, grec->coa, sizeof gre->src);
+	memmove(gre->dst, grec->north, sizeof gre->dst);
+
+	/*
+	 * Add a key, if needed.
+	 */
+	if(grec->ulkey){
+		flags = nhgets(gre->flags);
+		if(flags & (GRE_cksum|GRE_routing)){
+			print("%V routing info present.  Discarding packet\n",
+				gre->src);
+			freeb(bp);
+			return;
+		}
+
+		if((flags & GRE_key) == 0){
+			/* Make room for the key */
+			if(bp->rp - bp->base < sizeof(ulong)){
+				print("%V can't add key\n", gre->src);
+				freeb(bp);
+				return;
+			}
+
+			bp->rp -= 4;
+			memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+			gre = (GREhdr *)bp->rp;
+			hnputs(gre->flags, flags | GRE_key);
+		}
+
+		/* Add the key */
+		hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+	}
+
+	if(!canqlock(&grec->lock)){
+		freeb(bp);
+		return;
+	}
+
+	if(grec->ulsusp)
+		addring(&grec->ulbuffered, bp);
+	else{
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		grepuout++;
+		grebuout += BLEN(bp);
+	}
+	qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+	int len, hdrlen;
+	ushort eproto, flags;
 	uchar raddr[IPaddrlen];
+	Conv *c, **p;
+	GREconv *grec;
+	GREhdr *gre;
 	GREpriv *gpriv;
+	Ip4hdr *ip;
 
-	gpriv = gre->priv;
-	ghp = (GREhdr*)(bp->rp);
+	/*
+	 * We don't want to deal with block lists.  Ever.  The problem is
+	 * that when the block is forwarded, devether.c puts the block into
+	 * a queue that also uses ->next.  Just do not use ->next here!
+	 */
+	if(bp->next != nil)
+		bp = pullupblock(bp, blocklen(bp));
 
-	v4tov6(raddr, ghp->src);
-	eproto = nhgets(ghp->eproto);
-	qlock(gre);
+	gre = (GREhdr *)bp->rp;
+	if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+		freeb(bp);
+		return;
+	}
 
-	/* Look for a conversation structure for this port and address */
-	c = nil;
-	for(p = gre->conv; *p; p++) {
+	v4tov6(raddr, gre->src);
+	eproto = nhgets(gre->eproto);
+	flags  = nhgets(gre->flags);
+	hdrlen = sizeof(GREhdr);
+
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%I routing info present.  Discarding packet\n", raddr);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	qlock(proto);
+
+	if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+		ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+		/*
+		 * Look for a conversation structure for this port and address, or
+		 * match the retunnel part, or match on the raw flag.
+		 */
+		for(p = proto->conv; *p; p++) {
+			c = *p;
+
+			if(c->inuse == 0)
+				continue;
+
+			/*
+			 * Do not stop this session - blocking here
+			 * implies that etherread is blocked.
+			 */
+			grec = c->ptcl;
+			if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+				grepdin++;
+				grebdin += BLEN(bp);
+				gredownlink(c, bp);
+				qunlock(proto);
+				return;
+			}
+
+			if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+				grepuin++;
+				grebuin += BLEN(bp);
+				greuplink(c, bp);
+				qunlock(proto);
+				return;
+			}
+		}
+	}
+
+
+	/*
+	 * when we get here, none of the forwarding tunnels matched.  now
+	 * try to match on raw and conversational sessions.
+	 */
+	for(c = nil, p = proto->conv; *p; p++) {
 		c = *p;
+
 		if(c->inuse == 0)
 			continue;
-		if(c->rport == eproto && 
-			(gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+		/*
+		 * Do not stop this session - blocking here
+		 * implies that etherread is blocked.
+		 */
+		grec = c->ptcl;
+		if(c->rport == eproto &&
+		    (grec->raw || ipcmp(c->raddr, raddr) == 0))
 			break;
 	}
 
-	if(*p == nil) {
-		qunlock(gre);
-		freeblist(bp);
+	qunlock(proto);
+
+	if(*p == nil){
+		freeb(bp);
 		return;
 	}
 
-	qunlock(gre);
-
 	/*
 	 * Trim the packet down to data size
 	 */
-	len = nhgets(ghp->len) - GRE_IPONLY;
+	len = nhgets(gre->len) - GRE_IPONLY;
 	if(len < GRE_IPPLUSGRE){
-		freeblist(bp);
+		freeb(bp);
 		return;
 	}
+
 	bp = trimblock(bp, GRE_IPONLY, len);
 	if(bp == nil){
+		gpriv = proto->priv;
 		gpriv->lenerr++;
 		return;
 	}
 
-	/*
-	 *  Can't delimit packet so pull it all into one block.
-	 */
-	if(qlen(c->rq) > 64*1024)
-		freeblist(bp);
-	else{
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("greiput");
-		qpass(c->rq, bp);
-	}
+	qpass(c->rq, bp);
 }
 
 int
@@ -234,29 +649,258 @@
 	GREpriv *gpriv;
 
 	gpriv = gre->priv;
+	return snprint(buf, len,
+		"gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+		grepdin, grepdout, grepuin, grepuout,
+		grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
 
-	return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->raw = 1;
+	return nil;
 }
 
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
 {
-	GREpriv *gpriv;
+	GREconv *grec;
 
-	gpriv = c->p->priv;
-	if(n == 1){
-		if(strcmp(f[0], "raw") == 0){
-			gpriv->raw = 1;
-			return nil;
-		}
-		else if(strcmp(f[0], "cooked") == 0){
-			gpriv->raw = 0;
-			return nil;
-		}
+	grec = c->ptcl;
+	grec->raw = 0;
+	return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+	uchar ipaddr[4];
+
+	grec = c->ptcl;
+	if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+		return "tunnel already set up";
+
+	v4parseip(ipaddr, argv[1]);
+	if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+		return "bad hoa";
+	memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+	v4parseip(ipaddr, argv[2]);
+	memmove(grec->north, ipaddr, sizeof grec->north);
+	v4parseip(ipaddr, argv[3]);
+	memmove(grec->south, ipaddr, sizeof grec->south);
+	v4parseip(ipaddr, argv[4]);
+	memmove(grec->coa, ipaddr, sizeof grec->coa);
+	grec->ulsusp = 1;
+	grec->dlsusp = 0;
+
+	return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+	ulong seq;
+	Block *bp;
+	Bring *r;
+	GREconv *grec;
+	Metablock *m;
+
+	grec = c->ptcl;
+	seq  = strtoul(argv[1], nil, 0);
+
+	qlock(&grec->lock);
+	r = &grec->dlpending;
+	while(r->produced - r->consumed > 0){
+		bp = r->ring[r->consumed & Ringmask];
+
+		assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		if((long)(seq - m->seq) <= 0)
+			break;
+
+		r->ring[r->consumed & Ringmask] = nil;
+		r->consumed++;
+
+		freeb(bp);
 	}
-	return "unknown control request";
+	qunlock(&grec->lock);
+	return nil;
 }
 
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->dlsusp)
+		return "already suspended";
+
+	grec->dlsusp = 1;
+	return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->ulsusp)
+		return "already suspended";
+
+	grec->ulsusp = 1;
+	return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		qunlock(&grec->lock);
+
+		ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+		qlock(&grec->lock);
+		addring(&grec->dlpending, bp);
+	}
+	grec->dlsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	grec->ulsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+	Block *bp;
+	GREconv *grec;
+	GREhdr *gre;
+	Metablock *m;
+
+	grec = c->ptcl;
+
+	v4parseip(grec->south, argv[1]);
+	memmove(grec->north, grec->south, sizeof grec->north);
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+	grec->dlsusp = 0;
+	grec->ulsusp = 0;
+
+	while((bp = getring(&grec->dlpending)) != nil){
+
+		assert(bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		assert(m->rp >= bp->base && m->rp < bp->lim);
+
+		bp->rp = m->rp;
+
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->ulkey = strtoul(argv[1], nil, 0);
+	return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+	int i;
+
+	if(n < 1)
+		return "too few arguments";
+
+	for(i = 0; i < Ncmds; i++)
+		if(strcmp(f[0], grectls[i].cmd) == 0)
+			break;
+
+	if(i == Ncmds)
+		return "no such command";
+	if(grectls[i].argc != 0 && grectls[i].argc != n)
+		return "incorrect number of arguments";
+
+	return grectls[i].f(c, n, f);
+}
+
 void
 greinit(Fs *fs)
 {
@@ -276,7 +920,7 @@
 	gre->stats = grestats;
 	gre->ipproto = IP_GREPROTO;
 	gre->nc = 64;
-	gre->ptclsize = 0;
+	gre->ptclsize = sizeof(GREconv);
 
 	Fsproto(fs, gre);
 }
diff -u a/os/ip//icmp.c b/os/ip//icmp.c
--- a/os/ip//icmp.c
+++ b/os/ip//icmp.c
@@ -44,11 +44,6 @@
 	Maxtype		= 18,
 };
 
-enum
-{
-	MinAdvise	= 24,	/* minimum needed for us to advise another protocol */ 
-};
-
 char *icmpnames[Maxtype+1] =
 {
 [EchoReply]		"EchoReply",
@@ -70,6 +65,8 @@
 	IP_ICMPPROTO	= 1,
 	ICMP_IPSIZE	= 20,
 	ICMP_HDRSIZE	= 8,
+
+	MinAdvise	= ICMP_IPSIZE+4,	/* minimum needed for us to advise another protocol */ 
 };
 
 enum
@@ -113,7 +110,7 @@
 	c->wq = qbypass(icmpkick, c);
 }
 
-extern char*
+char*
 icmpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -126,11 +123,11 @@
 	return nil;
 }
 
-extern int
+int
 icmpstate(Conv *c, char *state, int n)
 {
 	USED(c);
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		"Datagram",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
 	);
 }
 
-extern char*
+char*
 icmpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -150,7 +147,7 @@
 	return nil;
 }
 
-extern void
+void
 icmpclose(Conv *c)
 {
 	qclose(c->rq);
@@ -169,8 +166,7 @@
 
 	if(bp == nil)
 		return;
-
-	if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+	if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
 		freeblist(bp);
 		return;
 	}
@@ -190,21 +186,50 @@
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
 {
+	uchar addr[IPaddrlen];
+	int i;
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	i = ipforme(f, addr);
+	return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+	uchar addr[IPaddrlen];
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
 	Block	*nbp;
 	Icmp	*p, *np;
+	uchar	ia[IPv4addrlen];
 
 	p = (Icmp *)bp->rp;
+	if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+		return;
 
-	netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+	netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+		ia, p->src, p->dst);
+
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
 	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
 	np = (Icmp *)nbp->rp;
 	np->vihl = IP_VER4;
+	memmove(np->src, ia, sizeof(np->src));
 	memmove(np->dst, p->src, sizeof(np->dst));
-	v6tov4(np->src, ia);
 	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
 	np->type = TimeExceed;
 	np->code = 0;
@@ -214,7 +239,6 @@
 	memset(np->cksum, 0, sizeof(np->cksum));
 	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
 }
 
 static void
@@ -222,20 +246,10 @@
 {
 	Block	*nbp;
 	Icmp	*p, *np;
-	int	i;
-	uchar	addr[IPaddrlen];
 
 	p = (Icmp *)bp->rp;
-
-	/* only do this for unicast sources and destinations */
-	v4tov6(addr, p->dst);
-	i = ipforme(f, addr);
-	if((i&Runi) == 0)
+	if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
 		return;
-	v4tov6(addr, p->src);
-	i = ipforme(f, addr);
-	if(i != 0 && (i&Runi) == 0)
-		return;
 
 	netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmpnoconv(Fs *f, Block *bp)
 {
 	icmpunreachable(f, bp, 3, 0);
 }
 
-extern void
+void
 icmpcantfrag(Fs *f, Block *bp, int mtu)
 {
 	icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
 static void
 goticmpkt(Proto *icmp, Block *bp)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
-	v4tov6(dst, p->src);
+	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
-		if(ipcmp(s->raddr, dst) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+		if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
 {
 	Icmp	*q;
 	uchar	ip[4];
 
 	q = (Icmp *)bp->rp;
+	if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+		return nil;
+
 	q->vihl = IP_VER4;
 	memmove(ip, q->src, sizeof(q->dst));
 	memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
 [3]	"port unreachable",
 [4]	"fragmentation needed and DF set",
 [5]	"source route failed",
+[6]	"destination network unknown",
+[7]	"destination host unknown",
+[8]	"source host isolated",
+[9]	"network administratively prohibited",
+[10]	"host administratively prohibited",
+[11]	"network unreachable for tos",
+[12]	"host unreachable for tos",
+[13]	"communication administratively prohibited",
+[14]	"host precedence violation",
+[15]	"precedence cutoff in effect",
 };
 
 static void
 icmpiput(Proto *icmp, Ipifc*, Block *bp)
 {
-	int	n, iplen;
+	int	n;
 	Icmp	*p;
 	Block	*r;
 	Proto	*pr;
@@ -332,12 +355,10 @@
 	Icmppriv *ipriv;
 
 	ipriv = icmp->priv;
-	
 	ipriv->stats[InMsgs]++;
 
-	p = (Icmp *)bp->rp;
-	netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
-	n = blocklen(bp);
+	bp = concatblock(bp);
+	n = BLEN(bp);
 	if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
 		ipriv->stats[InErrors]++;
 		ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
 		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
 		goto raise;
 	}
-	iplen = nhgets(p->length);
-	if(iplen > n || (iplen % 1)){
-		ipriv->stats[LenErrs]++;
+	if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
 		ipriv->stats[InErrors]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto raise;
-	}
-	if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
-		ipriv->stats[InErrors]++;
 		ipriv->stats[CsumErrs]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto raise;
 	}
+	p = (Icmp *)bp->rp;
+	netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+		(p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+		p->type, p->code);
 	if(p->type <= Maxtype)
 		ipriv->in[p->type]++;
 
 	switch(p->type) {
 	case EchoRequest:
-		if (iplen < n)
-			bp = trimblock(bp, 0, iplen);
-		r = mkechoreply(bp);
+		r = mkechoreply(bp, icmp->f);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 	case Unreachable:
-		if(p->code > 5)
-			msg = unreachcode[1];
-		else
+		if(p->code >= nelem(unreachcode)) {
+			snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+				p->src, p->dst, p->code);
+			msg = m2;
+		} else
 			msg = unreachcode[p->code];
 
+	Advise:
 		bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-		if(blocklen(bp) < MinAdvise){
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs]++;
 			goto raise;
 		}
 		p = (Icmp *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
+		if((nhgets(p->frag) & IP_FO) == 0){
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr != nil && pr->advise != nil) {
+				(*pr->advise)(pr, bp, msg);
+				return;
+			}
 		}
-
 		bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
 		goticmpkt(icmp, bp);
 		break;
 	case TimeExceed:
 		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %V", p->src);
-
-			bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-			if(blocklen(bp) < MinAdvise){
-				ipriv->stats[LenErrs]++;
-				goto raise;
-			}
-			p = (Icmp *)bp->rp;
-			pr = Fsrcvpcolx(icmp->f, p->proto);
-			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
-				return;
-			}
-			bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+			goto Advise;
 		}
-
 		goticmpkt(icmp, bp);
 		break;
 	default:
@@ -419,22 +428,25 @@
 	freeblist(bp);
 }
 
-void
+static void
 icmpadvise(Proto *icmp, Block *bp, char *msg)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
 	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, src) == 0)
 		if(ipcmp(s->raddr, dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -443,7 +455,7 @@
 	freeblist(bp);
 }
 
-int
+static int
 icmpstats(Proto *icmp, char *buf, int len)
 {
 	Icmppriv *priv;
@@ -456,7 +468,7 @@
 	for(i = 0; i < Nstats; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
 	for(i = 0; i <= Maxtype; i++){
-		if(icmpnames[i])
+		if(icmpnames[i] != nil)
 			p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
 		else
 			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
 	}
 	return p - buf;
 }
-
-int
-icmpgc(Proto *icmp)
-{
-	return natgc(icmp->ipproto);
-}
-
+	
 void
 icmpinit(Fs *fs)
 {
@@ -487,7 +493,7 @@
 	icmp->stats = icmpstats;
 	icmp->ctl = nil;
 	icmp->advise = icmpadvise;
-	icmp->gc = icmpgc;
+	icmp->gc = nil;
 	icmp->ipproto = IP_ICMPPROTO;
 	icmp->nc = 128;
 	icmp->ptclsize = 0;
diff -u a/os/ip//icmp6.c b/os/ip//icmp6.c
--- a/os/ip//icmp6.c
+++ b/os/ip//icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,41 +10,36 @@
 #include "ip.h"
 #include "ipv6.h"
 
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+	InMsgs6,
+	InErrors6,
+	OutMsgs6,
+	CsumErrs6,
+	LenErrs6,
+	HlenErrs6,
+	HoplimErrs6,
+	IcmpCodeErrs6,
+	TargetErrs6,
+	OptlenErrs6,
+	AddrmxpErrs6,
+	RouterAddrErrs6,
 
-struct ICMPpkt {
-	uchar	type;
-	uchar	code;
-	uchar	cksum[2];
-	uchar	icmpid[2];
-	uchar	seq[2];
+	Nstats6,
 };
 
-struct IPICMP {
-	Ip6hdr;
-	ICMPpkt;
+enum {
+	ICMP_USEAD6	= 40,
 };
 
-struct NdiscC
-{
-	IPICMP;
-	uchar target[IPaddrlen];
+enum {
+	Oflag	= 1<<5,
+	Sflag	= 1<<6,
+	Rflag	= 1<<7,
 };
 
-struct Ndpkt
-{
-	NdiscC;
-	uchar otype;
-	uchar olen;	// length in units of 8 octets(incl type, code),
-				// 1 for IEEE 802 addresses
-	uchar lnaddr[6];	// link-layer address
-};
-
-enum {	
-	// ICMPv6 types
+enum {
+	/* ICMPv6 types */
 	EchoReply	= 0,
 	UnreachableV6	= 1,
 	PacketTooBigV6	= 2,
@@ -69,6 +67,65 @@
 	Maxtype6	= 137,
 };
 
+enum {
+	MinAdvise	= IP6HDR+4,	/* minimum needed for us to advise another protocol */ 
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding  */
+#define ICMPHDR \
+	IPV6HDR; \
+	uchar	type; \
+	uchar	code; \
+	uchar	cksum[2]; \
+	uchar	icmpid[2]; \
+	uchar	seq[2]
+
+struct IPICMP {
+	ICMPHDR;
+	uchar	payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	otype;
+	uchar	olen;		/* length in units of 8 octets(incl type, code),
+				 * 1 for IEEE 802 addresses */
+	uchar	lnaddr[6];	/* link-layer address */
+	uchar	payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+	ulong	stats[Nstats6];
+
+	/* message counts */
+	ulong	in[Maxtype6+1];
+	ulong	out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+	uchar	headers;
+} Icmpcb6;
+
 char *icmpnames6[Maxtype6+1] =
 {
 [EchoReply]		"EchoReply",
@@ -95,24 +152,6 @@
 [RedirectV6]		"RedirectV6",
 };
 
-enum
-{
-	InMsgs6,
-	InErrors6,
-	OutMsgs6,
-	CsumErrs6,
-	LenErrs6,
-	HlenErrs6,
-	HoplimErrs6,
-	IcmpCodeErrs6,
-	TargetErrs6,
-	OptlenErrs6,
-	AddrmxpErrs6,
-	RouterAddrErrs6,
-
-	Nstats6,
-};
-
 static char *statnames6[Nstats6] =
 {
 [InMsgs6]	"InMsgs",
@@ -129,49 +168,18 @@
 [RouterAddrErrs6]	"RouterAddrErrs",
 };
 
-typedef struct Icmppriv6
-{
-	ulong	stats[Nstats6];
-
-	/* message counts */
-	ulong	in[Maxtype6+1];
-	ulong	out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6 
-{
-	QLock;
-	uchar headers;
-} Icmpcb6;
-
 static char *unreachcode[] =
 {
-[icmp6_no_route]	"no route to destination",
-[icmp6_ad_prohib]	"comm with destination administratively prohibited",
-[icmp6_unassigned]	"icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach]	"address unreachable",
-[icmp6_port_unreach]	"port unreachable",
-[icmp6_unkn_code]	"icmp unreachable: unknown code",
+[Icmp6_no_route]	"no route to destination",
+[Icmp6_ad_prohib]	"comm with destination administratively prohibited",
+[Icmp6_out_src_scope]	"beyond scope of source address",
+[Icmp6_adr_unreach]	"address unreachable",
+[Icmp6_port_unreach]	"port unreachable",
+[Icmp6_gress_src_fail]	"source address failed ingress/egress policy",
+[Icmp6_rej_route]	"reject route to destination",
+[Icmp6_unknown]		"icmp unreachable: unknown code",
 };
 
-enum {
-	ICMP_USEAD6	= 40,
-};
-
-enum {
-	Oflag	= 1<<5,
-	Sflag	= 1<<6,
-	Rflag	= 1<<7,
-};
-
-enum {
-	slladd	= 1,
-	tlladd	= 2,
-	prfinfo	= 3,
-	redhdr	= 4,
-	mtuopt	= 5,
-};
-
 static void icmpkick6(void *x, Block *bp);
 
 static void
@@ -185,13 +193,14 @@
 set_cksum(Block *bp)
 {
 	IPICMP *p = (IPICMP *)(bp->rp);
+	int n = blocklen(bp);
 
-	hnputl(p->vcf, 0);  // borrow IP header as pseudoheader
-	hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+	hnputl(p->vcf, 0);  	/* borrow IP header as pseudoheader */
+	hnputs(p->ploadlen, n - IP6HDR);
 	p->proto = 0;
-	p->ttl = ICMPv6;	// ttl gets set later
+	p->ttl = ICMPv6;	/* ttl gets set later */
 	hnputs(p->cksum, 0);
-	hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+	hnputs(p->cksum, ptclcsum(bp, 0, n));
 	p->proto = ICMPv6;
 }
 
@@ -198,7 +207,8 @@
 static Block *
 newIPICMP(int packetlen)
 {
-	Block	*nbp;
+	Block *nbp;
+
 	nbp = allocb(packetlen);
 	nbp->wp += packetlen;
 	memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
 	return nbp;
 }
 
-void
+static void
 icmpadvise6(Proto *icmp, Block *bp, char *msg)
 {
-	Conv	**c, *s;
-	IPICMP	*p;
-	ushort	recid;
+	ushort recid;
+	Conv **c, *s;
+	IPICMP *p;
 
-	p = (IPICMP *) bp->rp;
+	p = (IPICMP *)bp->rp;
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->src) == 0)
 		if(ipcmp(s->raddr, p->dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -230,9 +242,9 @@
 static void
 icmpkick6(void *x, Block *bp)
 {
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Conv *c = x;
 	IPICMP *p;
-	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Icmppriv6 *ipriv = c->p->priv;
 	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
 
@@ -249,10 +261,10 @@
 		bp->rp += IPaddrlen;
 		ipmove(raddr, bp->rp);
 		bp->rp += IPaddrlen;
-		bp = padblock(bp, sizeof(Ip6hdr));
+		bp = padblock(bp, IP6HDR);
 	}
 
-	if(blocklen(bp) < sizeof(IPICMP)){
+	if(BLEN(bp) < IPICMPSZ){
 		freeblist(bp);
 		return;
 	}
@@ -268,23 +280,20 @@
 
 	set_cksum(bp);
 	p->vcf[0] = 0x06 << 4;
-	if(p->type <= Maxtype6)	
+	if(p->type <= Maxtype6)
 		ipriv->out[p->type]++;
 	ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-char*
+static char*
 icmpctl6(Conv *c, char **argv, int argc)
 {
 	Icmpcb6 *icb;
 
 	icb = (Icmpcb6*) c->ptcl;
-
-	if(argc==1) {
-		if(strcmp(argv[0], "headers")==0) {
-			icb->headers = 6;
-			return nil;
-		}
+	if(argc==1 && strcmp(argv[0], "headers")==0) {
+		icb->headers = 6;
+		return nil;
 	}
 	return "unknown control request";
 }
@@ -292,41 +301,39 @@
 static void
 goticmpkt6(Proto *icmp, Block *bp, int muxkey)
 {
-	Conv	**c, *s;
-	IPICMP	*p = (IPICMP *)bp->rp;
-	ushort	recid; 
-	uchar 	*addr;
+	ushort recid;
+	uchar *addr;
+	Conv **c, *s;
+	IPICMP *p = (IPICMP *)bp->rp;
 
 	if(muxkey == 0) {
 		recid = nhgets(p->icmpid);
 		addr = p->src;
-	}
-	else {
+	} else {
 		recid = muxkey;
 		addr = p->dst;
 	}
-
-	for(c = icmp->conv; *c; c++){
-		s = *c;
-		if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+	for(c = icmp->conv; (s = *c) != nil; c++){
+		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
-
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
 {
+	uchar addr[IPaddrlen];
 	IPICMP *p = (IPICMP *)(bp->rp);
-	uchar	addr[IPaddrlen];
 
+	if(isv6mcast(p->src))
+		return nil;
 	ipmove(addr, p->src);
-	ipmove(p->src, p->dst);
+	if(!isv6mcast(p->dst))
+		ipmove(p->src, p->dst);
+	else if (!ipv6local(ifc, p->src, 0, addr))
+		return nil;
 	ipmove(p->dst, addr);
 	p->type = EchoReplyV6;
 	set_cksum(bp);
@@ -335,49 +342,43 @@
 
 /*
  * sends out an ICMPv6 neighbor solicitation
- * 	suni == SRC_UNSPEC or SRC_UNI, 
+ * 	suni == SRC_UNSPEC or SRC_UNI,
  *	tuni == TARG_MULTI => multicast for address resolution,
  * 	and tuni == TARG_UNI => neighbor reachability.
  */
-
-extern void
+void
 icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-
-	nbp = newIPICMP(sizeof(Ndpkt));
+	nbp = newIPICMP(NDPKTSZ);
 	np = (Ndpkt*) nbp->rp;
 
+	if(suni == SRC_UNSPEC)
+		ipmove(np->src, v6Unspecified);
+	else
+		ipmove(np->src, src);
 
-	if(suni == SRC_UNSPEC) 
-		memmove(np->src, v6Unspecified, IPaddrlen);
-	else 
-		memmove(np->src, src, IPaddrlen);
-
 	if(tuni == TARG_UNI)
-		memmove(np->dst, targ, IPaddrlen);
+		ipmove(np->dst, targ);
 	else
 		ipv62smcast(np->dst, targ);
 
 	np->type = NbrSolicit;
 	np->code = 0;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 	if(suni != SRC_UNSPEC) {
-		np->otype = SRC_LLADDRESS;
-		np->olen = 1;	/* 1+1+6 = 8 = 1 8-octet */
+		np->otype = SRC_LLADDR;
+		np->olen = 1;		/* 1+1+6 = 8 = 1 8-octet */
 		memmove(np->lnaddr, mac, sizeof(np->lnaddr));
-	}
-	else {
-		int r = sizeof(Ndpkt)-sizeof(NdiscC);
-		nbp->wp -= r;
-	}
+	} else
+		nbp->wp -= NDPKTSZ - NDISCSZ;
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
 /*
  * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
  */
-extern void
+void
 icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-	nbp = newIPICMP(sizeof(Ndpkt));
-	np = (Ndpkt*) nbp->rp;
+	nbp = newIPICMP(NDPKTSZ);
+	np = (Ndpkt*)nbp->rp;
 
-	memmove(np->src, src, IPaddrlen);
-	memmove(np->dst, dst, IPaddrlen);
+	ipmove(np->src, src);
+	ipmove(np->dst, dst);
 
 	np->type = NbrAdvert;
 	np->code = 0;
 	np->icmpid[0] = flags;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 
-	np->otype = TARGET_LLADDRESS;
-	np->olen = 1;	
+	np->otype = TARGET_LLADDR;
+	np->olen = 1;
 	memmove(np->lnaddr, mac, sizeof(np->lnaddr));
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrAdvert]++;
-	netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+	netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+		return;
 
-	if(isv6mcast(p->src)) 
-		goto clean;
+	netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
 	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
-
-	rlock(ifc);
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
-		freeblist(nbp);
-		if(free) 
-			goto clean;
-		else
-			return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = UnreachableV6;
 	np->code = code;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[UnreachableV6]++;
 
-	if(free)
+	if(tome)
 		ipiput6(f, ifc, nbp);
-	else {
+	else 
 		ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-		return;
-	}
-
-clean:
-	runlock(ifc);
-	freeblist(bp);
 }
 
-extern void
+void
 icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
+	netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
+
 	nbp = newIPICMP(sz);
 	np = (IPICMP *) nbp->rp;
-
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = TimeExceedV6;
 	np->code = 0;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
-	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
+	netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	nbp = newIPICMP(sz);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = PacketTooBigV6;
 	np->code = 0;
 	hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
  * RFC 2461, pages 39-40, pages 57-58.
  */
 static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
-	int 	sz, osz, unsp, n, ttl, iplen;
-	int 	pktsz = BLEN(bp);
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *) packet;
-	Ndpkt	*np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+	int sz, osz, unsp, ttl;
+	int pktsz = BLEN(bp);
+	uchar *packet = bp->rp;
+	IPICMP *p = (IPICMP *) packet;
+	Ndpkt *np;
 
-	USED(ifc);
-	n = blocklen(bp);
-	if(n < sizeof(IPICMP)) {
+	if(pktsz < IPICMPSZ) {
 		ipriv->stats[HlenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
 		goto err;
 	}
 
-	iplen = nhgets(p->ploadlen);
-	if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
-		ipriv->stats[LenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto err;
-	}
-
-	// Rather than construct explicit pseudoheader, overwrite IPv6 header
+	/* Rather than construct explicit pseudoheader, overwrite IPv6 header */
 	if(p->proto != ICMPv6) {
-		// This code assumes no extension headers!!!
+		/* This code assumes no extension headers!!! */
 		netlog(icmp->f, Logicmp, "icmp error: extension header\n");
 		goto err;
 	}
@@ -586,7 +553,7 @@
 	ttl = p->ttl;
 	p->ttl = p->proto;
 	p->proto = 0;
-	if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+	if(ptclcsum(bp, 0, pktsz)) {
 		ipriv->stats[CsumErrs6]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto err;
@@ -595,19 +562,16 @@
 	p->ttl = ttl;
 
 	/* additional tests for some pkt types */
-	if( (p->type == NbrSolicit) ||
-		(p->type == NbrAdvert) ||
-		(p->type == RouterAdvert) ||
-		(p->type == RouterSolicit) ||
-		(p->type == RedirectV6) ) {
-
+	if (p->type == NbrSolicit   || p->type == NbrAdvert ||
+	    p->type == RouterAdvert || p->type == RouterSolicit ||
+	    p->type == RedirectV6) {
 		if(p->ttl != HOP_LIMIT) {
-			ipriv->stats[HoplimErrs6]++; 
-			goto err; 
+			ipriv->stats[HoplimErrs6]++;
+			goto err;
 		}
 		if(p->code != 0) {
-			ipriv->stats[IcmpCodeErrs6]++; 
-			goto err; 
+			ipriv->stats[IcmpCodeErrs6]++;
+			goto err;
 		}
 
 		switch (p->type) {
@@ -615,82 +579,78 @@
 		case NbrAdvert:
 			np = (Ndpkt*) p;
 			if(isv6mcast(np->target)) {
-				ipriv->stats[TargetErrs6]++; 
-				goto err; 
+				ipriv->stats[TargetErrs6]++;
+				goto err;
 			}
-			if(optexsts(np) && (np->olen == 0)) {
-				ipriv->stats[OptlenErrs6]++; 
-				goto err; 
+			if(optexsts(np) && np->olen == 0) {
+				ipriv->stats[OptlenErrs6]++;
+				goto err;
 			}
-		
-			if(p->type == NbrSolicit) {
-				if(ipcmp(np->src, v6Unspecified) == 0) { 
-					if(!issmcast(np->dst) || optexsts(np))  {
-						ipriv->stats[AddrmxpErrs6]++; 
-						goto err;
-					}
+
+			if (p->type == NbrSolicit &&
+			    ipcmp(np->src, v6Unspecified) == 0)
+				if(!issmcast(np->dst) || optexsts(np)) {
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
-		
-			if(p->type == NbrAdvert) {
-				if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
-					ipriv->stats[AddrmxpErrs6]++; 
-					goto err; 
+
+			if(p->type == NbrAdvert)
+				if(isv6mcast(np->dst) &&
+				    (nhgets(np->icmpid) & Sflag)){
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
 			break;
-	
+
 		case RouterAdvert:
-			if(pktsz - sizeof(Ip6hdr) < 16) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 16) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			if(!islinklocal(p->src)) {
-				ipriv->stats[RouterAddrErrs6]++; 
-				goto err; 
+				ipriv->stats[RouterAddrErrs6]++;
+				goto err;
 			}
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
 				if(osz <= 0) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
-				}	
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
+				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RouterSolicit:
-			if(pktsz - sizeof(Ip6hdr) < 8) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 8) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			unsp = (ipcmp(p->src, v6Unspecified) == 0);
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
-				if((osz <= 0) ||
-					(unsp && (*(packet+sz) == slladd)) ) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
+				if(osz <= 0 ||
+				    (unsp && packet[sz] == SRC_LLADDR)) {
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
 				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RedirectV6:
-			//to be filled in
+			/* to be filled in */
 			break;
-	
+
 		default:
 			goto err;
 		}
 	}
-
 	return 1;
-
 err:
-	ipriv->stats[InErrors6]++; 
+	ipriv->stats[InErrors6]++;
 	return 0;
 }
 
@@ -700,169 +660,162 @@
 	Iplifc *lifc;
 	int t;
 
-	rlock(ifc);
-	if(ipproxyifc(f, ifc, target)) {
-		runlock(ifc);
-		return t_uniproxy;
-	}
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, target) == 0) {
-			t = (lifc->tentative) ? t_unitent : t_unirany; 
-			runlock(ifc);
-			return t;
-		}
-	}
-
-	runlock(ifc);
-	return 0;
+	if((lifc = iplocalonifc(ifc, target)) != nil)
+		t = lifc->tentative? Tunitent: Tunirany;
+	else if(ipproxyifc(f, ifc, target))
+		t = Tuniproxy;
+	else
+		t = 0;
+	return t;
 }
 
 static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
 {
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *)packet;
-	Icmppriv6 *ipriv = icmp->priv;
-	Block	*r;
-	Proto	*pr;
-	char	*msg, m2[128];
-	Ndpkt* np;
+	char *msg, m2[128];
 	uchar pktflags;
-	uchar lsrc[IPaddrlen];
-	int refresh = 1;
+	uchar ia[IPaddrlen];
+	Block *r;
+	IPICMP *p;
+	Icmppriv6 *ipriv = icmp->priv;
 	Iplifc *lifc;
+	Ndpkt* np;
+	Proto *pr;
 
-	if(!valid(icmp, ipifc, bp, ipriv)) 
-		goto raise;
+	bp = concatblock(bp);
+	p = (IPICMP*)bp->rp;
 
-	if(p->type <= Maxtype6)
-		ipriv->in[p->type]++;
-	else
+	if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
 		goto raise;
 
+	ipriv->in[p->type]++;
+
 	switch(p->type) {
 	case EchoRequestV6:
-		r = mkechoreply6(bp);
+		r = mkechoreply6(bp, ifc);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 
 	case UnreachableV6:
-		if(p->code > 4)
-			msg = unreachcode[icmp6_unkn_code];
+		if(p->code >= nelem(unreachcode))
+			msg = unreachcode[Icmp6_unknown];
 		else
 			msg = unreachcode[p->code];
-
-		bp->rp += sizeof(IPICMP);
-		if(blocklen(bp) < 8){
+	Advise:
+		bp->rp += IPICMPSZ;
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs6]++;
 			goto raise;
 		}
 		p = (IPICMP *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
-		}
 
-		bp->rp -= sizeof(IPICMP);
-		goticmpkt6(icmp, bp, 0);
-		break;
-
-	case TimeExceedV6:
-		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %I", p->src);
-
-			bp->rp += sizeof(IPICMP);
-			if(blocklen(bp) < 8){
-				ipriv->stats[LenErrs6]++;
-				goto raise;
+		/* get rid of fragment header if this is the first fragment */
+		if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+			Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+			if((nhgets(fh->offsetRM) & ~7) == 0){	/* first fragment */
+				p->proto = fh->nexthdr;
+				/* copy down payload over fragment header */
+				bp->rp += IP6HDR;
+				bp->wp -= IP6FHDR;
+				memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+				hnputs(p->ploadlen, BLEN(bp));
+				bp->rp -= IP6HDR;
 			}
-			p = (IPICMP *)bp->rp;
+		}
+		if(p->proto != FH){
 			pr = Fsrcvpcolx(icmp->f, p->proto);
 			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
+				(*pr->advise)(pr, bp, msg);
 				return;
 			}
-			bp->rp -= sizeof(IPICMP);
 		}
+		bp->rp -= IPICMPSZ;
+		goticmpkt6(icmp, bp, 0);
+		break;
 
+	case TimeExceedV6:
+		if(p->code == 0){
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+			goto Advise;
+		}
+		if(p->code == 1){
+			snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+			goto Advise;
+		}
 		goticmpkt6(icmp, bp, 0);
 		break;
 
+	case PacketTooBigV6:
+		snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+			(ulong)nhgetl(p->icmpid), p->src);
+		goto Advise;
+
 	case RouterAdvert:
 	case RouterSolicit:
-		/* using lsrc as a temp, munge hdr for goticmp6 
-		memmove(lsrc, p->src, IPaddrlen);
-		memmove(p->src, p->dst, IPaddrlen);
-		memmove(p->dst, lsrc, IPaddrlen); */
-
 		goticmpkt6(icmp, bp, p->type);
 		break;
 
 	case NbrSolicit:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 		pktflags = 0;
-		switch (targettype(icmp->f, ipifc, np->target)) {
-		case t_unirany:
+		if(ifc->sendra6)
+			pktflags |= Rflag;
+		switch (targettype(icmp->f, ifc, np->target)) {
+		case Tunirany:
 			pktflags |= Oflag;
 			/* fall through */
 
-		case t_uniproxy: 
-			if(ipcmp(np->src, v6Unspecified) != 0) {
-				arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+		case Tuniproxy:
+			if(ipv6local(ifc, ia, 0, np->src)) {
+				if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+					break;
 				pktflags |= Sflag;
-			}
-			if(ipv6local(ipifc, lsrc)) {
-				icmpna(icmp->f, lsrc, 
-				   (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
-				   np->target, ipifc->mac, pktflags); 
-			}
-			else
-				freeblist(bp);
+			} else
+				ipmove(ia, np->target);
+			icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+				np->target, ifc->mac, pktflags);
 			break;
-
-		case t_unitent:
-			/* not clear what needs to be done. send up
-			 * an icmp mesg saying don't use this address? */
-
-		default:
-			freeblist(bp);
+		case Tunitent:
+			/*
+			 * not clear what needs to be done. send up
+			 * an icmp mesg saying don't use this address?
+			 */
+			break;
 		}
-
+		freeblist(bp);
 		break;
 
 	case NbrAdvert:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 
-		/* if the target address matches one of the local interface 
-		 * address and the local interface address has tentative bit set, 
-		 * then insert into ARP table. this is so the duplication address 
-		 * detection part of ipconfig can discover duplication through 
-		 * the arp table
+		/*
+		 * if the target address matches one of the local interface
+		 * addresses and the local interface address has tentative bit
+		 * set, insert into ARP table. this is so the duplicate address
+		 * detection part of ipconfig can discover duplication through
+		 * the arp table.
 		 */
-		lifc = iplocalonifc(ipifc, np->target);
-		if(lifc && lifc->tentative)
-			refresh = 0;
-		arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+		lifc = iplocalonifc(ifc, np->target);
+		if(lifc != nil && lifc->tentative)
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+		else if(ipv6local(ifc, ia, 0, np->target))
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
 		freeblist(bp);
 		break;
 
-	case PacketTooBigV6:
-
 	default:
 		goticmpkt6(icmp, bp, 0);
 		break;
 	}
 	return;
-
 raise:
 	freeblist(bp);
-
 }
 
-int
+static int
 icmpstats6(Proto *icmp6, char *buf, int len)
 {
 	Icmppriv6 *priv;
@@ -874,23 +827,28 @@
 	e = p+len;
 	for(i = 0; i < Nstats6; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
-	for(i = 0; i <= Maxtype6; i++){
+	for(i = 0; i <= Maxtype6; i++)
 		if(icmpnames6[i])
-			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/*		else
-			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
-	}
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+				priv->in[i], priv->out[i]);
 	return p - buf;
 }
 
 
-// need to import from icmp.c
+/* import from icmp.c */
 extern int	icmpstate(Conv *c, char *state, int n);
 extern char*	icmpannounce(Conv *c, char **argv, int argc);
 extern char*	icmpconnect(Conv *c, char **argv, int argc);
 extern void	icmpclose(Conv *c);
 
+static void
+icmpclose6(Conv *c)
+{
+	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+	icb->headers = 0;
+	icmpclose(c);
+}
+
 void
 icmp6init(Fs *fs)
 {
@@ -902,7 +860,7 @@
 	icmp6->announce = icmpannounce;
 	icmp6->state = icmpstate;
 	icmp6->create = icmpcreate6;
-	icmp6->close = icmpclose;
+	icmp6->close = icmpclose6;
 	icmp6->rcv = icmpiput6;
 	icmp6->stats = icmpstats6;
 	icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
 
 	Fsproto(fs, icmp6);
 }
-
diff -u a/os/ip//igmp.c b/os/ip//igmp.c
--- a/os/ip//igmp.c
+++ b/os/ip//igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -40,8 +44,12 @@
 	uchar	unused;
 	uchar	igmpcksum[2];		/* checksum of igmp portion */
 	uchar	group[IPaddrlen];	/* multicast group */
+
+	uchar	payload[];
 };
 
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
 /*
  *  lists for group reports
  */
@@ -49,7 +57,7 @@
 struct IGMPrep
 {
 	IGMPrep		*next;
-	Media		*m;
+	Medium		*m;
 	int		ticks;
 	Multicast	*multi;
 };
@@ -76,19 +84,17 @@
 } stats;
 
 void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
 {
 	IGMPpkt *p;
 	Block *bp;
 
 	bp = allocb(sizeof(IGMPpkt));
-	if(bp == nil)
-		return;
 	p = (IGMPpkt*)bp->wp;
 	p->vihl = IP_VER4;
-	bp->wp += sizeof(IGMPpkt);
-	memset(bp->rp, 0, sizeof(IGMPpkt));
-	hnputl(p->src, Mediagetaddr(m));
+	bp->wp += IGMPPKTSZ;
+	memset(bp->rp, 0, IGMPPKTSZ);
+	hnputl(p->src, Mediumgetaddr(m));
 	hnputl(p->dst, Ipallsys);
 	p->vertype = (1<<4) | IGMPreport;
 	p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
 }
 
 void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
 {
 	int n;
 	IGMPpkt *ghp;
@@ -206,7 +212,7 @@
 		if(rp != nil)
 			break;	/* already reporting */
 
-		mp = Mediacopymulti(m);
+		mp = Mediumcopymulti(m);
 		if(mp == nil)
 			break;
 
@@ -285,7 +291,7 @@
 	igmp.ptclsize = 0;
 
 	igmpreportfn = igmpsendreport;
-	kproc("igmpproc", igmpproc, 0, 0);
+	kproc("igmpproc", igmpproc, 0);
 
 	Fsproto(fs, &igmp);
 }
diff -u a/os/ip//il.c b/os/ip//il.c
--- a/os/ip//il.c
+++ b/os/ip//il.c
@@ -189,7 +189,7 @@
 {
 	Ipht	ht;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 
 	ulong	csumerr;		/* checksum errors */
 	ulong	hlenerr;		/* header length error */
@@ -208,7 +208,7 @@
 
 
 void	ilrcvmsg(Conv*, Block*);
-void	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
 void	ilackq(Ilcb*, Block*);
 void	ilprocess(Conv*, Ilhdr*, Block*);
 void	ilpullup(Conv*);
@@ -251,6 +251,9 @@
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
+	if(c->ipversion != V4)
+		return "only IP version 4 supported";
+		
 	return ilstart(c, IL_CONNECT, fast);
 }
 
@@ -260,7 +263,7 @@
 	Ilcb *ic;
 
 	ic = (Ilcb*)(c->ptcl);
-	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
 		ilstates[ic->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -548,6 +551,9 @@
 
 	ih = (Ilhdr *)bp->rp;
 	plen = blocklen(bp);
+	if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+		goto raise;	/* ignore non V4 packets */
+
 	if(plen < IL_IPSIZE+IL_HDRSIZE){
 		netlog(il->f, Logil, "il: hlenerr\n");
 		ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
 		else
 			st = iltype[ih->iltype];
 		ipriv->stats[CsumErrs]++;
-		netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+		netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
 			csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
 		goto raise;
 	}
@@ -595,7 +601,7 @@
 			else
 				st = iltype[ih->iltype];
 			ilreject(il->f, ih);		/* no channel and not sync */
-			netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+			netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
 				st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); 
 			goto raise;
 		}
@@ -829,7 +835,7 @@
 
 	c = ic->conv;
 	id = nhgetl(h->ilid);
-	netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+	netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
 		ic->rexmit, ic->timeout,
 		c->raddr, c->lport, c->rport);
 
@@ -852,7 +858,7 @@
 	ic = (Ilcb*)s->ptcl;
 
 	USED(ic);
-	netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
 		ilstates[ic->state],  ic->rstart, ic->recvd, ic->start, 
 		ic->next, iltype[h->iltype], nhgetl(h->ilid), 
 		nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
 
 	_ilprocess(s, h, bp);
 
-	netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
 }
 
 void
@@ -917,17 +923,12 @@
 		bp->list = nil;
 		dlen = nhgets(oh->illen)-IL_HDRSIZE;
 		bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+			
 		/*
 		 * Upper levels don't know about multiple-block
 		 * messages so copy all into one (yick).
 		 */
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("ilpullup");
-		bp = packblock(bp);
-		if(bp == 0)
-			panic("ilpullup2");
-		qpass(s->rq, bp);
+		qpass(s->rq, packblock(concatblock(bp)));
 	}
 	qunlock(&ic->outo);
 }
@@ -948,7 +949,7 @@
 	id = nhgetl(h->ilid);
 	/* Window checks */
 	if(id <= ic->recvd || id > ic->recvd+ic->window) {
-		netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+		netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
 			id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
 		freeblist(bp);
 		return;
@@ -983,7 +984,7 @@
 	qunlock(&ic->outo);
 }
 
-void
+int
 ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
 {
 	Ilhdr *ih;
@@ -1034,7 +1035,7 @@
 		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
 
 if(ipc==nil)
-	panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+	panic("ipc is nil caller is %#p", getcallerpc(&ipc));
 if(ipc->p==nil)
 	panic("ipc->p is nil");
 
@@ -1042,7 +1043,7 @@
 		iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), 
 		nhgets(ih->ilsrc), nhgets(ih->ildst));
 
-	ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+	return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
 }
 
 void
@@ -1145,6 +1146,8 @@
 
 	il = x;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Iltickms);
 	for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
 		qlock(&ipriv->apl);
 		if(ipriv->ackprocstarted == 0){
 			sprint(kpname, "#I%dilack", c->p->f->dev);
-			kproc(kpname, ilackproc, c->p, 0);
+			kproc(kpname, ilackproc, c->p);
 			ipriv->ackprocstarted = 1;
 		}
 		qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
 	case IL_CONNECT:
 		ic->state = Ilsyncer;
 		iphtadd(&ipriv->ht, c);
-		ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+		if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+			ilhangup(c, "no route");
 		break;
 	}
 
@@ -1332,6 +1336,8 @@
 		if(s->lport == psource)
 		if(ipcmp(s->laddr, source) == 0)
 		if(ipcmp(s->raddr, dest) == 0){
+			if(s->ignoreadvice)
+				break;
 			qunlock(il);
 			ic = (Ilcb*)s->ptcl;
 			switch(ic->state){
@@ -1380,12 +1386,6 @@
 	}
 }
 
-int
-ilgc(Proto *il)
-{
-	return natgc(il->ipproto);
-}
-
 void
 ilinit(Fs *f)
 {
@@ -1406,7 +1406,7 @@
 	il->advise = iladvise;
 	il->stats = ilxstats;
 	il->inuse = ilinuse;
-	il->gc = ilgc;
+	il->gc = nil;
 	il->ipproto = IP_ILPROTO;
 	il->nc = scalednconv();
 	il->ptclsize = sizeof(Ilcb);
diff -u a/os/ip//ip.c b/os/ip//ip.c
--- a/os/ip//ip.c
+++ b/os/ip//ip.c
@@ -7,94 +7,6 @@
 
 #include	"ip.h"
 
-typedef struct IP		IP;
-typedef struct Fragment4	Fragment4;
-typedef struct Fragment6	Fragment6;
-typedef struct Ipfrag		Ipfrag;
-
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= 64*1024,	/* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp)	(((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
-
-	Nstats,
-};
-
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
-
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
-
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 static char *statnames[] =
 {
 [Forwarding]	"Forwarding",
@@ -118,45 +30,11 @@
 [FragCreates]	"FragCreates",
 };
 
-#define BLKIP(xp)	((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
+static Block*		ip4reassemble(IP*, int, Block*);
+static void		ipfragfree4(IP*, Fragment4*);
+static Fragment4*	ipfragallo4(IP*);
 
-ushort		ipcsum(uchar*);
-Block*		ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void		ipfragfree4(IP*, Fragment4*);
-Fragment4*	ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
-	V6params *v6p;
-
-	v6p = smalloc(sizeof(V6params));
-	
-	v6p->rp.mflag		= 0;		// default not managed
-	v6p->rp.oflag		= 0;
-	v6p->rp.maxraint	= 600000;	// millisecs
-	v6p->rp.minraint	= 200000;
-	v6p->rp.linkmtu		= 0;		// no mtu sent
-	v6p->rp.reachtime	= 0;
-	v6p->rp.rxmitra		= 0;
-	v6p->rp.ttl		= MAXTTL;
-	v6p->rp.routerlt	= 3*(v6p->rp.maxraint);	
-
-	v6p->hp.rxmithost	= 1000;		// v6 RETRANS_TIMER
-
-	v6p->cdrouter 		= -1;
-
-	f->v6p			= v6p;
-
-}
-
-void
+static void
 initfrag(IP *ip, int size)
 {
 	Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
 	IP *ip;
 
 	ip = smalloc(sizeof(IP));
+	ip->stats[DefaultTTL] = MAXTTL;
 	initfrag(ip, 100);
 	f->ip = ip;
 
@@ -202,11 +81,11 @@
 	if(f->ip->iprouting==0)
 		f->ip->stats[Forwarding] = 2;
 	else
-		f->ip->stats[Forwarding] = 1;	
+		f->ip->stats[Forwarding] = 1;
 }
 
 int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
 	Ipifc *ifc;
 	uchar *gate;
@@ -213,66 +92,41 @@
 	ulong fragoff;
 	Block *xp, *nb;
 	Ip4hdr *eh, *feh;
-	int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
-	Route *r, *sr;
+	int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+	Route *r;
 	IP *ip;
 	int rv = 0;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip4hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip4hdr*)bp->rp;
+	assert(BLEN(bp) >= IP4HDR);
 	len = blocklen(bp);
-
-	if(gating){
-		chunk = nhgets(eh->length);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk < len)
-			len = chunk;
-	}
 	if(len >= IP_MAX){
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v4lookup(f, eh->dst, c);
-	if(r == nil){
+	r = v4lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v4lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v4.gate;
 
-	if(!gating)
-		eh->vihl = IP_VER4|IP_HLEN4;
-	eh->ttl = ttl;
-	if(!gating)
-		eh->tos = tos;
-
-	if(!canrlock(ifc))
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
+	}
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
@@ -280,17 +134,18 @@
 	if(ifc->m == nil)
 		goto raise;
 
-	/* Output NAT */
-	if(nato(bp, ifc, f) != 0)
-		goto raise;
+	if(!gating){
+		eh->vihl = IP_VER4|IP_HLEN4;
+		eh->tos = tos;
+	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		if(!gating)
-			hnputs(eh->id, incref(&ip->id4));
 		hnputs(eh->length, len);
 		if(!gating){
+			hnputs(eh->id, incref(&ip->id4));
 			eh->frag[0] = 0;
 			eh->frag[1] = 0;
 		}
@@ -297,31 +152,31 @@
 		eh->cksum[0] = 0;
 		eh->cksum[1] = 0;
 		hnputs(eh->cksum, ipcsum(&eh->vihl));
-		ifc->m->bwrite(ifc, bp, V4, gate);
+
+		ipifcoput(ifc, bp, V4, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
 	if(eh->frag[0] & (IP_DF>>8)){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
 		icmpcantfrag(f, bp, medialen);
-		netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	seglen = (medialen - IP4HDR) & ~7;
+	hlen = (eh->vihl & 0xF)<<2;
+	seglen = (medialen - hlen) & ~7;
 	if(seglen < 8){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	dlen = len - IP4HDR;
+	dlen = len - hlen;
 	xp = bp;
 	if(gating)
 		lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
 	else
 		lid = incref(&ip->id4);
 
-	offset = IP4HDR;
-	while(xp != nil && offset && offset >= BLEN(xp)) {
+	offset = hlen;
+	while(offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
@@ -341,30 +196,30 @@
 		fragoff = 0;
 	dlen += fragoff;
 	for(; fragoff < dlen; fragoff += seglen) {
-		nb = allocb(IP4HDR+seglen);
-		feh = (Ip4hdr*)(nb->rp);
+		nb = allocb(hlen+seglen);
+		feh = (Ip4hdr*)nb->rp;
 
-		memmove(nb->wp, eh, IP4HDR);
-		nb->wp += IP4HDR;
+		memmove(nb->wp, eh, hlen);
+		nb->wp += hlen;
 
 		if((fragoff + seglen) >= dlen) {
 			seglen = dlen - fragoff;
 			hnputs(feh->frag, fragoff>>3);
 		}
-		else	
+		else
 			hnputs(feh->frag, (fragoff>>3)|IP_MF);
 
-		hnputs(feh->length, seglen + IP4HDR);
+		hnputs(feh->length, seglen + hlen);
 		hnputs(feh->id, lid);
 
 		/* Copy up the data area */
 		chunk = seglen;
 		while(chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk %d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -376,12 +231,13 @@
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
 				xp = xp->next;
-		} 
+		}
 
 		feh->cksum[0] = 0;
 		feh->cksum[1] = 0;
 		hnputs(feh->cksum, ipcsum(&feh->vihl));
-		ifc->m->bwrite(ifc, nb, V4, gate);
+
+		ipifcoput(ifc, nb, V4, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
 void
 ipiput4(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos, proto, olen;
+	int hl, len, hop, tos;
+	uchar v6dst[IPaddrlen];
+	ushort frag;
 	Ip4hdr *h;
 	Proto *p;
-	ushort frag;
-	int notforme;
-	uchar *dp, v6dst[IPaddrlen];
 	IP *ip;
-	Route *r;
 
-	if(BLKIPVER(bp) != IP_VER4) {
+	if((bp->rp[0]&0xF0) != IP_VER4) {
 		ipiput6(f, ifc, bp);
 		return;
 	}
@@ -430,58 +283,45 @@
 			return;
 	}
 
-	h = (Ip4hdr*)(bp->rp);
-
-	/* Input NAT */
-	nati(bp, ifc);
-
-	/* dump anything that whose header doesn't checksum */
+	h = (Ip4hdr*)bp->rp;
+	hl = (h->vihl & 0xF)<<2;
+	if(hl < IP4HDR || hl > BLEN(bp)) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+		goto drop;
+	}
 	if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
 		ip->stats[InHdrErrors]++;
-		netlog(f, Logip, "ip: checksum error %V\n", h->src);
-		freeblist(bp);
+		netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+		goto drop;
+	}
+	len = nhgets(h->length);
+	if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+		if(bp != nil)
+			goto drop;
 		return;
 	}
-	v4tov6(v6dst, h->dst);
-	notforme = ipforme(f, v6dst) == 0;
+	h = (Ip4hdr*)bp->rp;
 
-	/* Check header length and version */
-	if((h->vihl&0x0F) != IP_HLEN4) {
-		hl = (h->vihl&0xF)<<2;
-		if(hl < (IP_HLEN4<<2)) {
-			ip->stats[InHdrErrors]++;
-			netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
-			freeblist(bp);
-			return;
-		}
-	  /* If this is not routed strip off the options */
-		if(notforme == 0) {
-			olen = nhgets(h->length);
-			dp = bp->rp + (hl - (IP_HLEN4<<2));
-			memmove(dp, h, IP_HLEN4<<2);
-			bp->rp = dp;
-			h = (Ip4hdr*)(bp->rp);
-			h->vihl = (IP_VER4|IP_HLEN4);
-			hnputs(h->length, olen-hl+(IP_HLEN4<<2));
-		}
-	}
-
 	/* route */
-	if(notforme) {
-		Conv conv;
+	v4tov6(v6dst, h->dst);
+	if(!ipforme(f, v6dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
 
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
-		}
+		if(!ip->iprouting)
+			goto drop;
 
 		/* don't forward to source's network */
-		conv.r = nil;
-		r = v4lookup(f, h->dst, &conv);
-		if(r == nil || r->ifc == ifc){
+		rh.r = nil;
+		r = v4lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
 		hop = h->ttl;
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
-			icmpttlexceeded(f, ifc->lifc->local, bp);
-			freeblist(bp);
-			return;
+			icmpttlexceeded(f, ifc, bp);
+			goto drop;
 		}
 
 		/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
-		if(r->ifc->reassemble){
+		if(nifc->reassemble){
 			frag = nhgets(h->frag);
-			if(frag) {
-				h->tos = 0;
-				if(frag & IP_MF)
-					h->tos = 1;
-				bp = ip4reassemble(ip, frag, bp, h);
+			if(frag & (IP_MF|IP_FO)) {
+				bp = ip4reassemble(ip, frag, bp);
 				if(bp == nil)
 					return;
-				h = (Ip4hdr*)(bp->rp);
+				h = (Ip4hdr*)bp->rp;
 			}
 		}
 
@@ -511,27 +346,30 @@
 		ip->stats[ForwDatagrams]++;
 		tos = h->tos;
 		hop = h->ttl;
-		ipoput4(f, bp, 1, hop - 1, tos, &conv);
+		ipoput4(f, bp, 1, hop - 1, tos, &rh);
 		return;
 	}
 
+	/* If this is not routed strip off the options */
+	if(hl > IP4HDR) {
+		hl -= IP4HDR;
+		len -= hl;
+		bp->rp += hl;
+		memmove(bp->rp, h, IP4HDR);
+		h = (Ip4hdr*)bp->rp;
+		h->vihl = IP_VER4|IP_HLEN4;
+		hnputs(h->length, len);
+	}
+
 	frag = nhgets(h->frag);
-	if(frag) {
-		h->tos = 0;
-		if(frag & IP_MF)
-			h->tos = 1;
-		bp = ip4reassemble(ip, frag, bp, h);
+	if(frag & (IP_MF|IP_FO)) {
+		bp = ip4reassemble(ip, frag, bp);
 		if(bp == nil)
 			return;
-		h = (Ip4hdr*)(bp->rp);
+		h = (Ip4hdr*)bp->rp;
 	}
 
-	/* don't let any frag info go up the stack */
-	h->frag[0] = 0;
-	h->frag[1] = 0;
-
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
 	}
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -550,45 +389,43 @@
 	int i;
 
 	ip = f->ip;
-	ip->stats[DefaultTTL] = MAXTTL;
-
 	p = buf;
 	e = p+len;
-	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+	for(i = 0; i < Nipstats; i++)
+		p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
 	return p - buf;
 }
 
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
 {
-	int fend;
+	int ovlap, fragsize, len;
+	ulong src, dst;
 	ushort id;
+	Block *bl, **l, *prev;
 	Fragment4 *f, *fnext;
-	ulong src, dst;
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Ipfrag *fp, *fq;
+	Ip4hdr *ih;
 
+	/*
+	 *  block lists are too hard, concatblock into a single block
+	 */
+	bp = concatblock(bp);
+
+	ih = (Ip4hdr*)bp->rp;
 	src = nhgetl(ih->src);
 	dst = nhgetl(ih->dst);
 	id = nhgets(ih->id);
+	fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
 
-	/*
-	 *  block lists are too hard, pullupblock into a single block
-	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip4hdr*)(bp->rp);
-	}
-
 	qlock(&ip->fraglock4);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead4; f; f = fnext){
+	for(f = ip->flisthead4; f != nil; f = fnext){
 		fnext = f->next;	/* because ipfragfree4 changes the list */
-		if(f->src == src && f->dst == dst && f->id == id)
+		if(f->id == id && f->src == src && f->dst == dst)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+	if((offset & (IP_MF|IP_FO)) == 0) {
 		if(f != nil) {
-			ipfragfree4(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree4(ip, f);
 		}
 		qunlock(&ip->fraglock4);
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset<<3;
-	BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = (offset & IP_FO)<<3;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -627,8 +465,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock4);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock4);
+
 		return nil;
 	}
 
@@ -638,7 +477,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -645,15 +484,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock4);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -662,26 +502,26 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 		/* Take completely covered segments out */
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
 			if(ovlap <= 0)
 				break;
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
-				(*l)->rp += ovlap;
+			if(ovlap < fq->flen) {
+				/* move up ip header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
 				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -689,35 +529,50 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  without IP_MF set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-		if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
-			bl = f->blist;
-			len = nhgets(BLKIP(bl)->length);
-			bl->wp = bl->rp + len;
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += IP4HDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		ih = (Ip4hdr*)bl->rp;
+		if(ih->frag[0]&(IP_MF>>8))
+			continue;
 
-			bl = f->blist;
-			f->blist = nil;
+		bl = f->blist;
+		len = BLEN(bl);
+
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
+
+		if(len >= IP_MAX){
 			ipfragfree4(ip, f);
-			ih = BLKIP(bl);
-			hnputs(ih->length, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock4);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree4(ip, f);
+
+		ih = (Ip4hdr*)bl->rp;
+		ih->frag[0] = 0;
+		ih->frag[1] = 0;
+		hnputs(ih->length, len);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock4);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock4);
 	return nil;
@@ -726,20 +581,20 @@
 /*
  * ipfragfree4 - Free a list of fragments - assume hold fraglock4
  */
-void
+static void
 ipfragfree4(IP *ip, Fragment4 *frag)
 {
 	Fragment4 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	frag->src = 0;
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	frag->src = 0;
+	frag->dst = 0;
 
 	l = &ip->flisthead4;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -755,7 +610,7 @@
 /*
  * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
  */
-Fragment4 *
+static Fragment4*
 ipfragallo4(IP *ip)
 {
 	Fragment4 *f;
@@ -762,7 +617,7 @@
 
 	while(ip->fragfree4 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead4; f->next; f = f->next)
+		for(f = ip->flisthead4; f->next != nil; f = f->next)
 			;
 		ipfragfree4(ip, f);
 	}
diff -u a/os/ip//ip.h b/os/ip//ip.h
--- a/os/ip//ip.h
+++ b/os/ip//ip.h
@@ -1,35 +1,33 @@
 typedef struct	Conv	Conv;
+typedef struct	Fragment4 Fragment4;
+typedef struct	Fragment6 Fragment6;
 typedef struct	Fs	Fs;
 typedef union	Hwaddr	Hwaddr;
 typedef struct	IP	IP;
 typedef struct	IPaux	IPaux;
+typedef struct	Ip4hdr	Ip4hdr;
+typedef struct	Ipfrag	Ipfrag;
 typedef struct	Ipself	Ipself;
 typedef struct	Ipselftab	Ipselftab;
 typedef struct	Iplink	Iplink;
 typedef struct	Iplifc	Iplifc;
 typedef struct	Ipmulti	Ipmulti;
-typedef struct	IProuter IProuter;
 typedef struct	Ipifc	Ipifc;
 typedef struct	Iphash	Iphash;
 typedef struct	Ipht	Ipht;
 typedef struct	Netlog	Netlog;
-typedef struct	Ifclog	Ifclog;
 typedef struct	Medium	Medium;
 typedef struct	Proto	Proto;
 typedef struct	Arpent	Arpent;
 typedef struct	Arp Arp;
 typedef struct	Route	Route;
+typedef struct	Routehint Routehint;
 
 typedef struct	Routerparams	Routerparams;
 typedef struct 	Hostparams	Hostparams;
-typedef struct 	V6router	V6router;
-typedef struct	V6params	V6params;
+typedef struct	v6params	v6params;
 
-typedef struct Ip4hdr     Ip4hdr;
-typedef struct Nat	Nat;
-
 #pragma incomplete Arp
-#pragma	incomplete Ifclog
 #pragma incomplete Ipself
 #pragma incomplete Ipselftab
 #pragma incomplete IP
@@ -39,10 +37,9 @@
 {
 	Addrlen=	64,
 	Maxproto=	20,
-	Nhash=		64,
-	Maxincall=	5,
-	Nchans=		16383,
-	MAClen=		16,		/* longest mac address */
+	Maxincall=	10,
+	Nchans=		1024,
+	MAClen=		8,		/* longest mac address */
 
 	MAXTTL=		255,
 	DFLTTOS=	0,
@@ -57,6 +54,12 @@
 	V6=		6,
 	IP_VER4= 	0x40,
 	IP_VER6=	0x60,
+	IP_HLEN4=	5,		/* v4: Header length in words */
+	IP_DF=		0x4000,		/* v4: Don't fragment */
+	IP_MF=		0x2000,		/* v4: More fragments */
+	IP_FO=		0x1fff,		/* v4: Fragment offset */
+	IP4HDR=		IP_HLEN4<<2,	/* sizeof(Ip4hdr) */
+	IP_MAX=		64*1024,	/* Max. Internet packet size, v4 & v6 */
 
 	/* 2^Lroot trees in the root table */
 	Lroot=		10,
@@ -73,6 +76,79 @@
 	Connected=	4,
 };
 
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nipstats,
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+	uchar	payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+	uvlong		stats[Nipstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
 /* on the wire packet header */
 struct Ip4hdr
 {
@@ -86,9 +162,14 @@
 	uchar	cksum[2];	/* Header checksum */
 	uchar	src[4];		/* IP source */
 	uchar	dst[4];		/* IP destination */
-	uchar	data[1];	/* start of data */
 };
 
+struct Routehint
+{
+	Route	*r;			/* last route used */
+	ulong	rgen;			/* routetable generation for *r */
+};
+
 /*
  *  one per conversation directory
  */
@@ -100,9 +181,9 @@
 	Proto*	p;
 
 	int	restricted;		/* remote port is restricted */
+	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 	uint	ttl;			/* max time to live */
 	uint	tos;			/* type of service */
-	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 
 	uchar	ipversion;
 	uchar	laddr[IPaddrlen];	/* local IP address */
@@ -139,8 +220,7 @@
 
 	void*	ptcl;			/* protocol specific stuff */
 
-	Route	*r;			/* last route used */
-	ulong	rgen;			/* routetable generation for *r */
+	Routehint;
 };
 
 struct Medium
@@ -161,18 +241,8 @@
 	/* process packets written to 'data' */
 	void	(*pktin)(Fs *f, Ipifc *ifc, Block *bp);
 
-	/* routes for router boards */
-	void	(*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
-	void	(*remroute)(Ipifc *ifc, int, uchar*, uchar*);
-	void	(*flushroutes)(Ipifc *ifc);
-
-	/* for routing multicast groups */
-	void	(*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
-	void	(*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
 	/* address resolution */
-	void	(*ares)(Fs*, int, uchar*, uchar*, int, int);	/* resolve */
-	void	(*areg)(Ipifc*, uchar*);			/* register */
+	void	(*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 
 	/* v6 address generation */
 	void	(*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
 	uchar	mask[IPaddrlen];
 	uchar	remote[IPaddrlen];
 	uchar	net[IPaddrlen];
+	uchar	type;		/* route type */
 	uchar	tentative;	/* =1 => v6 dup disc on, =0 => confirmed unique */
 	uchar	onlink;		/* =1 => onlink, =0 offlink. */
 	uchar	autoflag;	/* v6 autonomous flag */
-	long 	validlt;	/* v6 valid lifetime */
-	long 	preflt;		/* v6 preferred lifetime */
-	long	origint;	/* time when addr was added */
+	ulong 	validlt;	/* v6 valid lifetime */
+	ulong 	preflt;		/* v6 preferred lifetime */
+	ulong	origint;	/* time when addr was added */
 	Iplink	*link;		/* addresses linked to this lifc */
 	Iplifc	*next;
 };
@@ -203,25 +274,25 @@
 	Ipself	*self;
 	Iplifc	*lifc;
 	Iplink	*selflink;	/* next link for this local address */
-	Iplink	*lifclink;	/* next link for this ifc */
-	ulong	expire;
+	Iplink	*lifclink;	/* next link for this lifc */
 	Iplink	*next;		/* free list */
+	ulong	expire;
 	int	ref;
 };
 
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
 
 /* default values, one per stack */
 struct Routerparams {
-	int	mflag;
-	int	oflag;
-	int 	maxraint;
-	int	minraint;
-	int	linkmtu;
-	int	reachtime;
-	int	rxmitra;
-	int	ttl;
-	int	routerlt;	
+	int	mflag;		/* flag: managed address configuration */
+	int	oflag;		/* flag: other stateful configuration */
+	int 	maxraint;	/* max. router adv interval (ms) */
+	int	minraint;	/* min. router adv interval (ms) */
+	int	linkmtu;	/* mtu options */
+	int	reachtime;	/* reachable time */
+	int	rxmitra;	/* retransmit interval */
+	int	ttl;		/* cur hop count limit */
+	int	routerlt;	/* router lifetime */
 };
 
 struct Hostparams {
@@ -231,22 +302,18 @@
 struct Ipifc
 {
 	RWlock;
-	
+
 	Conv	*conv;		/* link to its conversation structure */
 	char	dev[64];	/* device we're attached to */
 	Medium	*m;		/* Media pointer */
 	int	maxtu;		/* Maximum transfer unit */
 	int	mintu;		/* Minumum tranfer unit */
-	int	mbps;		/* megabits per second */
 	void	*arg;		/* medium specific */
-	int	reassemble;	/* reassemble IP packets before forwarding */
 
-	/* these are used so that we can unbind on the fly */
-	Lock	idlock;
+	uchar	reflect;	/* allow forwarded packets to go out the same interface */
+	uchar	reassemble;	/* reassemble IP packets before forwarding to this interface */
+	
 	uchar	ifcid;		/* incremented each 'bind/unbind/add/remove' */
-	int	ref;		/* number of proc's using this ipifc */
-	Rendez	wait;		/* where unbinder waits for ref == 0 */
-	int	unbinding;
 
 	uchar	mac[MAClen];	/* MAC address */
 
@@ -255,10 +322,16 @@
 	ulong	in, out;	/* message statistics */
 	ulong	inerr, outerr;	/* ... */
 
-	uchar	sendra6;	/* == 1 => send router advs on this ifc	*/
-	uchar	recvra6;	/* == 1 => recv router advs on this ifc */
-	Routerparams rp;	/* router parameters as in RFC 2461, pp.40--43. 
+	uchar	sendra6;	/* flag: send router advs on this ifc */
+	uchar	recvra6;	/* flag: recv router advs on this ifc */
+	Routerparams rp;	/* router parameters as in RFC 2461, pp.40—43.
 					used only if node is router */
+
+	int	speed;		/* link speed in bits per second */
+	int	delay;		/* burst delay in ms */
+	int	burst;		/* burst delay in bytes */
+	int	load;		/* bytes in flight */
+	ulong	ticks;
 };
 
 /*
@@ -330,20 +403,11 @@
 	int		nc;		/* number of conversations */
 	int		ac;
 	Qid		qid;		/* qid for protocol directory */
-	ushort		nextport;
 	ushort		nextrport;
 
 	void		*priv;
 };
 
-/*
- *  Stream for sending packets to user level
- */
-struct IProuter {
-	QLock;
-	int	opens;
-	Queue	*q;
-};
 
 /*
  *  one per IP protocol stack
@@ -362,8 +426,7 @@
 	IP	*ip;
 	Ipselftab	*self;
 	Arp	*arp;
-	V6params	*v6p;
-	IProuter iprouter;
+	v6params	*v6p;
 
 	Route	*v4root[1<<Lroot];	/* v4 routing forest */
 	Route	*v6root[1<<Lroot];	/* v6 routing forest */
@@ -370,7 +433,6 @@
 	Route	*queue;			/* used as temp when reinjecting routes */
 
 	Netlog	*alog;
-	Ifclog	*ilog;
 
 	char	ndb[1024];		/* an ndb entry for this interface */
 	int	ndbvers;
@@ -377,23 +439,10 @@
 	long	ndbmtime;
 };
 
-/* one per default router known to host */
-struct V6router {
-	uchar	inuse;
-	Ipifc	*ifc;
-	int	ifcid;
-	uchar	routeraddr[IPaddrlen];
-	long	ltorigin;
-	Routerparams	rp;
-};
-
-struct V6params
+struct v6params
 {
 	Routerparams	rp;		/* v6 params, one copy per node now */
 	Hostparams	hp;
-	V6router	v6rlist[3];	/* max 3 default routers, currently */
-	int		cdrouter;	/* uses only v6rlist[cdrouter] if   */ 
-					/* cdrouter >= 0. */
 };
 
 
@@ -410,8 +459,7 @@
 char*	Fsstdbind(Conv*, char**, int);
 ulong	scalednconv(void);
 void	closeconv(Conv*);
-
-/* 
+/*
  *  logging
  */
 enum
@@ -434,7 +482,6 @@
 	Logrudpmsg=	1<<16,
 	Logesp=		1<<17,
 	Logtcpwin=	1<<18,
-	Lognat=		1<<19,
 };
 
 void	netloginit(Fs*);
@@ -449,17 +496,17 @@
 void	ifclogopen(Fs*, Chan*);
 void	ifclogclose(Fs*, Chan*);
 
+#pragma varargck argpos netlog	3
+
 /*
  *  iproute.c
  */
 typedef	struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
 typedef struct V4route V4route;
 typedef struct V6route V6route;
 
 enum
 {
-
 	/* type bits */
 	Rv4=		(1<<0),		/* this is a version 4 route */
 	Rifc=		(1<<1),		/* this route is a directly connected interface */
@@ -468,27 +515,18 @@
 	Rbcast=		(1<<4),		/* a broadcast self address */
 	Rmulti=		(1<<5),		/* a multicast self address */
 	Rproxy=		(1<<6),		/* this route should be proxied */
+	Rsrc=		(1<<7),		/* source specific route */
 };
 
-struct Routewalk
-{
-	int	o;
-	int	h;
-	char*	p;
-	char*	e;
-	void*	state;
-	void	(*walk)(Route*, Routewalk*);
-};
-
 struct	RouteTree
 {
-	Route*	right;
-	Route*	left;
-	Route*	mid;
+	Route	*mid;
+	Route	*left;
+	Route	*right;
+	Ipifc	*ifc;
+	uchar	ifcid;		/* must match ifc->id */
 	uchar	depth;
 	uchar	type;
-	uchar	ifcid;		/* must match ifc->id */
-	Ipifc	*ifc;
 	char	tag[4];
 	int	ref;
 };
@@ -497,6 +535,10 @@
 {
 	ulong	address;
 	ulong	endaddress;
+
+	ulong	source;
+	ulong	endsource;
+
 	uchar	gate[IPv4addrlen];
 };
 
@@ -504,6 +546,10 @@
 {
 	ulong	address[IPllen];
 	ulong	endaddress[IPllen];
+
+	ulong	source[IPllen];
+	ulong	endsource[IPllen];
+
 	uchar	gate[IPaddrlen];
 };
 
@@ -516,17 +562,16 @@
 		V4route v4;
 	};
 };
-extern void	v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void	v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route*	v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route*	v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void	addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void	remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route*	v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v4source(Fs *f, uchar *a, uchar *s);
+extern Route*	v6source(Fs *f, uchar *a, uchar *s);
 extern long	routeread(Fs *f, char*, ulong, int);
 extern long	routewrite(Fs *f, Chan*, char*, int);
-extern void	routetype(int, char*);
-extern void	ipwalkroutes(Fs*, Routewalk*);
-extern void	convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void	routetype(int type, char p[8]);
 
 /*
  *  devip.c
@@ -543,7 +588,6 @@
 };
 
 extern IPaux*	newipaux(char*, char*);
-extern void	setlport(Conv*);
 
 /*
  *  arp.c
@@ -552,18 +596,16 @@
 {
 	uchar	ip[IPaddrlen];
 	uchar	mac[MAClen];
-	Medium	*type;			/* media type */
-	Arpent*	hash;
-	Block*	hold;
-	Block*	last;
-	uint	ctime;			/* time entry was created or refreshed */
-	uint	utime;			/* time entry was last used */
-	uchar	state;
+	Arpent	*hash;
 	Arpent	*nextrxt;		/* re-transmit chain */
-	uint	rtime;			/* time for next retransmission */
-	uchar	rxtsrem;
+	Block	*hold;
+	Block	*last;
 	Ipifc	*ifc;
 	uchar	ifcid;			/* must match ifc->id */
+	uchar	state;
+	uchar	rxtsrem;		/* re-tranmissions remaining */
+	ulong	ctime;			/* time entry was created or refreshed */
+	ulong	utime;			/* time entry was last used */
 };
 
 extern void	arpinit(Fs*);
@@ -572,15 +614,17 @@
 extern Arpent*	arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
 extern void	arprelease(Arp*, Arpent *a);
 extern Block*	arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void	arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int	arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void	ndpsendsol(Fs*, Ipifc*, Arpent*);
 
 /*
  * ipaux.c
  */
 
-extern int	myetheraddr(uchar*, char*);
-extern ulong	parseip(uchar*, char*);
-extern ulong	parseipmask(uchar*, char*);
+extern int	parseether(uchar*, char*);
+extern vlong	parseip(uchar*, char*);
+extern vlong	parseipmask(uchar*, char*, int);
+extern vlong	parseipandmask(uchar*, uchar*, char*, char*);
 extern char*	v4parseip(uchar*, char*);
 extern void	maskip(uchar *from, uchar *mask, uchar *to);
 extern int	parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
 extern void	v4tov6(uchar *v6, uchar *v4);
 extern int	v6tov4(uchar *v4, uchar *v6);
 extern int	eipfmt(Fmt*);
+extern int	convipvers(Conv *c);
 
 #define	ipmove(x, y) memmove(x, y, IPaddrlen)
 #define	ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
- 
-#define	ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define	ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
 
 extern uchar IPv4bcast[IPaddrlen];
 extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
 extern Medium	ethermedium;
 extern Medium	nullmedium;
 extern Medium	pktmedium;
-extern Medium	tripmedium;
 
 /*
  *  ipifc.c
@@ -619,33 +660,24 @@
  */
 extern Medium*	ipfindmedium(char *name);
 extern void	addipmedium(Medium *med);
+extern void	ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
 extern int	ipforme(Fs*, uchar *addr);
-extern int	iptentative(Fs*, uchar *addr);
-extern int	ipisbm(uchar *);
-extern int	ipismulticast(uchar *);
-extern Ipifc*	findipifc(Fs*, uchar *remote, int type);
-extern void	findprimaryip(Fs*, uchar*);
+extern int	ipismulticast(uchar *ip);
+extern Ipifc*	findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc*	findipifcstr(Fs *f, char *s);
 extern void	findlocalip(Fs*, uchar *local, uchar *remote);
-extern int	ipv4local(Ipifc *ifc, uchar *addr);
-extern int	ipv6local(Ipifc *ifc, uchar *addr);
-extern int	ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int	ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int	ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
 extern Iplifc*	iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc*	ipremoteonifc(Ipifc *ifc, uchar *ip);
 extern int	ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int	ipismulticast(uchar *ip);
-extern int	ipisbooting(void);
-extern int	ipifccheckin(Ipifc *ifc, Medium *med);
-extern void	ipifccheckout(Ipifc *ifc);
-extern int	ipifcgrab(Ipifc *ifc);
-extern void	ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void	ipifcremroute(Fs*, int, uchar*, uchar*);
 extern void	ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
 extern void	ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
 extern char*	ipifcrem(Ipifc *ifc, char **argv, int argc);
 extern char*	ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
 extern long	ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char*	ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void	ipsendra6(Fs *f, int on);
-
+extern char*	ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char*	ipifcremove6(Ipifc *ifc, char**argv, int argc);
 /*
  *  ip.c
  */
@@ -652,37 +684,26 @@
 extern void	iprouting(Fs*, int);
 extern void	icmpnoconv(Fs*, Block*);
 extern void	icmpcantfrag(Fs*, Block*, int);
-extern void	icmpttlexceeded(Fs*, uchar*, Block*);
+extern void	icmpttlexceeded(Fs*, Ipifc*, Block*);
 extern ushort	ipcsum(uchar*);
 extern void	ipiput4(Fs*, Ipifc*, Block*);
 extern void	ipiput6(Fs*, Ipifc*, Block*);
-extern int	ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int	ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int	ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int	ipoput6(Fs*, Block*, int, int, int, Routehint*);
 extern int	ipstats(Fs*, char*, int);
 extern ushort	ptclbsum(uchar*, int);
 extern ushort	ptclcsum(Block*, int, int);
 extern void	ip_init(Fs*);
-extern void	update_mtucache(uchar*, ulong);
-extern ulong	restrict_mtu(uchar*, ulong);
+extern void	ip_init_6(Fs*);
 
 /*
  * bootp.c
  */
-char*	(*bootp)(Ipifc*);
-int	(*bootpread)(char*, ulong, int);
+extern int	bootpread(char*, ulong, int);
 
 /*
- *  iprouter.c
- */
-void	useriprouter(Fs*, Ipifc*, Block*);
-void	iprouteropen(Fs*);
-void	iprouterclose(Fs*);
-long	iprouterread(Fs*, void*, int);
-
-/*
  *  resolving inferno/plan9 differences
  */
-Chan*		commonfdtochan(int, int, int, int);
 char*		commonuser(void);
 char*		commonerror(void);
 
@@ -695,15 +716,3 @@
  *  global to all of the stack
  */
 extern void	(*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int	nato(Block*, Ipifc*, Fs*);
-extern void	nati(Block*, Ipifc*);
-extern int	natgc(uchar);
-
-extern int	addnataddr(uchar*, uchar*, Iplifc*);
-extern int	removenataddr(uchar*, uchar*, Iplifc*);
-extern void	shownataddr(void);
-extern void flushnataddr(void);
diff -u a/os/ip//ipaux.c b/os/ip//ipaux.c
--- a/os/ip//ipaux.c
+++ b/os/ip//ipaux.c
@@ -5,49 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 #include	"ip.h"
-#include  "ipv6.h"
+#include	"ipv6.h"
 
-/*
- *  well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- *  prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0, 0, 0, 0
-};
-
-
 char *v6hdrtypes[Maxhdrtype] =
 {
 	[HBH]		"HopbyHop",
@@ -54,7 +13,7 @@
 	[ICMP]		"ICMP",
 	[IGMP]		"IGMP",
 	[GGP]		"GGP",
-	[IPINIP]		"IP",
+	[IPINIP]	"IP",
 	[ST]		"ST",
 	[TCP]		"TCP",
 	[UDP]		"UDP",
@@ -87,6 +46,7 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+
 uchar v6linklocal[IPaddrlen] = {
 	0xfe, 0x80, 0, 0,
 	0, 0, 0, 0,
@@ -99,26 +59,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6llpreflen = 8;	// link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
-	0xfe, 0xc0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6slpreflen = 6;	// site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
-	0x08, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
+int v6llpreflen = 8;	/* link-local prefix length in bytes */
+
 uchar v6multicast[IPaddrlen] = {
 	0xff, 0, 0, 0,
 	0, 0, 0, 0,
@@ -131,7 +73,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6mcpreflen = 1;	// multicast prefix length
+int v6mcpreflen = 1;	/* multicast prefix length */
+
 uchar v6allnodesN[IPaddrlen] = {
 	0xff, 0x01, 0, 0,
 	0, 0, 0, 0,
@@ -138,6 +81,12 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+uchar v6allroutersN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
 uchar v6allnodesNmask[IPaddrlen] = {
 	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
@@ -144,7 +93,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6aNpreflen = 2;	// all nodes (N) prefix
+int v6aNpreflen = 2;	/* all nodes (N) prefix */
+
 uchar v6allnodesL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -151,19 +101,6 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
-uchar v6allnodesLmask[IPaddrlen] = {
-	0xff, 0xff, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6aLpreflen = 2;	// all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
-	0xff, 0x01, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0x02
-};
 uchar v6allroutersL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -170,12 +107,14 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x02
 };
-uchar v6allroutersS[IPaddrlen] = {
-	0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
 	0, 0, 0, 0,
-	0, 0, 0, 0x02
+	0, 0, 0, 0
 };
+int v6aLpreflen = 2;	/* all nodes (L) prefix */
+
 uchar v6solicitednode[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -190,9 +129,6 @@
 };
 int v6snpreflen = 13;
 
-
-
-
 ushort
 ptclcsum(Block *bp, int offset, int len)
 {
@@ -215,7 +151,7 @@
 	if(bp->next == nil) {
 		if(blocklen < len)
 			len = blocklen;
-		return ~ptclbsum(addr, len) & 0xffff;
+		return ptclbsum(addr, len) ^ 0xffff;
 	}
 
 	losum = 0;
@@ -247,7 +183,7 @@
 	while((csum = losum>>16) != 0)
 		losum = csum + (losum & 0xffff);
 
-	return ~losum & 0xffff;
+	return losum ^ 0xffff;
 }
 
 enum
@@ -255,306 +191,9 @@
 	Isprefix= 16,
 };
 
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
-	char buf[5*8];
-	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
-	static char *ifmt = "%d.%d.%d.%d";
-	uchar *p, ip[16];
-	ulong *lp;
-	ushort s;
-	int i, j, n, eln, eli;
-
-	switch(f->r) {
-	case 'E':		/* Ethernet address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
-	case 'I':		/* Ip address */
-		p = va_arg(f->args, uchar*);
-common:
-		if(memcmp(p, v4prefix, 12) == 0)
-			return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
-		/* find longest elision */
-		eln = eli = -1;
-		for(i = 0; i < 16; i += 2){
-			for(j = i; j < 16; j += 2)
-				if(p[j] != 0 || p[j+1] != 0)
-					break;
-			if(j > i && j - i > eln){
-				eli = i;
-				eln = j - i;
-			}
-		}
-
-		/* print with possible elision */
-		n = 0;
-		for(i = 0; i < 16; i += 2){
-			if(i == eli){
-				n += sprint(buf+n, "::");
-				i += eln;
-				if(i >= 16)
-					break;
-			} else if(i != 0)
-				n += sprint(buf+n, ":");
-			s = (p[i]<<8) + p[i+1];
-			n += sprint(buf+n, "%ux", s);
-		}
-		return fmtstrcpy(f, buf);
-
-	case 'i':		/* v6 address as 4 longs */
-		lp = va_arg(f->args, ulong*);
-		for(i = 0; i < 4; i++)
-			hnputl(ip+4*i, *lp++);
-		p = ip;
-		goto common;
-
-	case 'V':		/* v4 ip address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
-	case 'M':		/* ip mask */
-		p = va_arg(f->args, uchar*);
-
-		/* look for a prefix mask */
-		for(i = 0; i < 16; i++)
-			if(p[i] != 0xff)
-				break;
-		if(i < 16){
-			if((prefixvals[p[i]] & Isprefix) == 0)
-				goto common;
-			for(j = i+1; j < 16; j++)
-				if(p[j] != 0)
-					goto common;
-			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
-		} else
-			n = 8*16;
-
-		/* got one, use /xx format */
-		return fmtprint(f, "/%d", n);
-	}
-	return fmtstrcpy(f, "(eipfmt)");
-}
-
 #define CLASS(p) ((*(uchar*)(p))>>6)
 
-extern char*
-v4parseip(uchar *to, char *from)
-{
-	int i;
-	char *p;
-
-	p = from;
-	for(i = 0; i < 4 && *p; i++){
-		to[i] = strtoul(p, &p, 0);
-		if(*p == '.')
-			p++;
-	}
-	switch(CLASS(to)){
-	case 0:	/* class A - 1 uchar net */
-	case 1:
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = to[1];
-			to[1] = 0;
-		} else if(i == 2){
-			to[3] = to[1];
-			to[1] = 0;
-		}
-		break;
-	case 2:	/* class B - 2 uchar net */
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = 0;
-		}
-		break;
-	}
-	return p;
-}
-
-int
-isv4(uchar *ip)
-{
-	return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- *  the following routines are unrolled with no memset's to speed
- *  up the usual case
- */
 void
-v4tov6(uchar *v6, uchar *v4)
-{
-	v6[0] = 0;
-	v6[1] = 0;
-	v6[2] = 0;
-	v6[3] = 0;
-	v6[4] = 0;
-	v6[5] = 0;
-	v6[6] = 0;
-	v6[7] = 0;
-	v6[8] = 0;
-	v6[9] = 0;
-	v6[10] = 0xff;
-	v6[11] = 0xff;
-	v6[12] = v4[0];
-	v6[13] = v4[1];
-	v6[14] = v4[2];
-	v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
-	if(v6[0] == 0
-	&& v6[1] == 0
-	&& v6[2] == 0
-	&& v6[3] == 0
-	&& v6[4] == 0
-	&& v6[5] == 0
-	&& v6[6] == 0
-	&& v6[7] == 0
-	&& v6[8] == 0
-	&& v6[9] == 0
-	&& v6[10] == 0xff
-	&& v6[11] == 0xff)
-	{
-		v4[0] = v6[12];
-		v4[1] = v6[13];
-		v4[2] = v6[14];
-		v4[3] = v6[15];
-		return 0;
-	} else {
-		memset(v4, 0, 4);
-		return -1;
-	}
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
-	int i, elipsis = 0, v4 = 1;
-	ulong x;
-	char *p, *op;
-
-	memset(to, 0, IPaddrlen);
-	p = from;
-	for(i = 0; i < 16 && *p; i+=2){
-		op = p;
-		x = strtoul(p, &p, 16);
-		if(*p == '.' || (*p == 0 && i == 0)){
-			p = v4parseip(to+i, op);
-			i += 4;
-			break;
-		} else {
-			to[i] = x>>8;
-			to[i+1] = x;
-		}
-		if(*p == ':'){
-			v4 = 0;
-			if(*++p == ':'){
-				elipsis = i+2;
-				p++;
-			}
-		}
-	}
-	if(i < 16){
-		memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
-		memset(&to[elipsis], 0, 16-i);
-	}
-	if(v4){
-		to[10] = to[11] = 0xff;
-		return nhgetl(to+12);
-	} else
-		return 6;
-}
-
-/*
- *  hack to allow ip v4 masks to be entered in the old
- *  style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
-	ulong x;
-	int i;
-	uchar *p;
-
-	if(*from == '/'){
-		/* as a number of prefix bits */
-		i = atoi(from+1);
-		if(i < 0)
-			i = 0;
-		if(i > 128)
-			i = 128;
-		memset(to, 0, IPaddrlen);
-		for(p = to; i >= 8; i -= 8)
-			*p++ = 0xff;
-		if(i > 0)
-			*p = ~((1<<(8-i))-1);
-		x = nhgetl(to+IPv4off);
-	} else {
-		/* as a straight bit mask */
-		x = parseip(to, from);
-		if(memcmp(to, v4prefix, IPv4off) == 0)
-			memset(to, 0xff, IPv4off);
-	}
-	return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
-	int i;
-
-	for(i = 0; i < IPaddrlen; i++)
-		to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
-	if(isv4(ip))
-		return classmask[ip[IPv4off]>>6];
-	else {
-		if(ipcmp(ip, v6loopback) == 0)
-			return IPallbits;
-		else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
-			return v6linklocalmask;
-		else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
-			return v6sitelocalmask;
-		else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
-			return v6solicitednodemask;
-		else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
-			return v6multicastmask;
-		return IPallbits;
-	}
-}
-
-void
 ipv62smcast(uchar *smcast, uchar *a)
 {
 	assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
 ulong
 iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
 {
-	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
 }
 
 void
@@ -678,7 +317,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address and port */
 	hv = iphash(IPnoaddr, 0, da, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match just port */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address */
 	hv = iphash(IPnoaddr, 0, da, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
 			return c;
 		}
 	}
-	
+
 	/* look for something that matches anything */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
 	}
 	unlock(ht);
 	return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+	if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+		return V4;
+	else
+		return V6;
 }
diff -u a/os/ip//ipifc.c b/os/ip//ipifc.c
--- a/os/ip//ipifc.c
+++ b/os/ip//ipifc.c
@@ -11,17 +11,14 @@
 #define DPRINT if(0)print
 
 enum {
-	Maxmedia = 32,
-	Nself = Maxmedia*5,
-	NHASH = (1<<6),
-	NCACHE = 256,
-	QMAX = 64*1024-1,
+	Maxmedia	= 32,
+	Nself		= Maxmedia*5,
+	NHASH		= 1<<6,
+	NCACHE		= 256,
+	QMAX		= 192*1024-1,
 };
 
-Medium *media[Maxmedia] =
-{
-	0
-};
+Medium *media[Maxmedia] = { 0 };
 
 /*
  *  cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
 struct Ipself
 {
 	uchar	a[IPaddrlen];
-	Ipself	*hnext;		/* next address in the hash table */
+	Ipself	*next;		/* next address in the hash table */
 	Iplink	*link;		/* binding twixt Ipself and Ipifc */
 	ulong	expire;
 	uchar	type;		/* type of address */
-	int	ref;
-	Ipself	*next;		/* free list */
 };
 
 struct Ipselftab
@@ -64,11 +59,47 @@
 
 static void	addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
 static void	remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char*	ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char*	ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char*	ipifcremlifc(Ipifc*, Iplifc*);
+static void	ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char*	ipifcremlifc(Ipifc*, Iplifc**);
 
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+	unknownv6,		/* UGH */
+	unspecifiedv6,
+	linklocalv6,
+	globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+	if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+		return unknownv6;
+	else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+	    isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+		return linklocalv6;
+	else
+		return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+	int i, c;
+
+	for(i = 0; i < n; i++){
+		if((c = a[i] ^ b[i]) == 0)
+			continue;
+		for(i <<= 3; (c & 0x80) == 0; i++)
+			c <<= 1;
+		return i;
+	}
+	return i << 3;
+}
+
 /*
  *  link in a new medium
  */
@@ -121,7 +152,7 @@
 	wlock(ifc);
 	if(ifc->m != nil){
 		wunlock(ifc);
-		return "interface already bound";	
+		return Ebound;
 	}
 	if(waserror()){
 		wunlock(ifc);
@@ -142,18 +173,14 @@
 	ifc->m = m;
 	ifc->mintu = ifc->m->mintu;
 	ifc->maxtu = ifc->m->maxtu;
+	ifc->delay = 40;
+	ifc->speed = 0;
 	if(ifc->m->unbindonclose == 0)
 		ifc->conv->inuse++;
-	ifc->rp.mflag = 0;		// default not managed
-	ifc->rp.oflag = 0;
-	ifc->rp.maxraint = 600000;	// millisecs
-	ifc->rp.minraint = 200000;
-	ifc->rp.linkmtu = 0;		// no mtu sent
-	ifc->rp.reachtime = 0;
-	ifc->rp.rxmitra = 0;
-	ifc->rp.ttl = MAXTTL;
-	ifc->rp.routerlt = 3*(ifc->rp.maxraint);
 
+	/* default router paramters */
+	ifc->rp = c->p->f->v6p->rp;
+
 	/* any ancillary structures (like routes) no longer pertain */
 	ifc->ifcid++;
 
@@ -170,29 +197,44 @@
 
 /*
  *  detach a device from an interface, close the interface
- *  called with ifc->conv closed
  */
 static char*
 ipifcunbind(Ipifc *ifc)
 {
-	char *err;
+	Medium *m;
 
-	if(waserror()){
+	wlock(ifc);
+	m = ifc->m;
+	if(m == nil){
 		wunlock(ifc);
-		nexterror();
+		return Eunbound;
 	}
-	wlock(ifc);
 
-	/* dissociate routes */
-	if(ifc->m != nil && ifc->m->unbindonclose == 0)
-		ifc->conv->inuse--;
-	ifc->ifcid++;
+	/* disassociate logical interfaces (before zeroing ifc->arg) */
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 
 	/* disassociate device */
-	if(ifc->m != nil && ifc->m->unbind)
-		(*ifc->m->unbind)(ifc);
+	if(m->unbind != nil){
+		extern Medium nullmedium;
+
+		/*
+		 * unbind() might unlock the ifc, so change the medium
+		 * to the nullmedium to prevent packets from getting
+		 * sent while the medium is shutting down.
+		 */
+		ifc->m = &nullmedium;
+
+		if(!waserror()){
+			(*m->unbind)(ifc);
+			poperror();
+		}
+	}
+
 	memset(ifc->dev, 0, sizeof(ifc->dev));
 	ifc->arg = nil;
+
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 
 	/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
 	qclose(ifc->conv->wq);
 	qclose(ifc->conv->sq);
 
-	/* disassociate logical interfaces */
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
-
+	/* dissociate routes */
+	ifc->ifcid++;
+	if(m->unbindonclose == 0)
+		ifc->conv->inuse--;
 	ifc->m = nil;
 	wunlock(ifc);
-	poperror();
+
 	return nil;
 }
 
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
 
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
 char slineformat[] = "	%-40I %-10M %-40I %-12lud %-12lud\n";
 
-
 static int
 ipifcstate(Conv *c, char *state, int n)
 {
@@ -228,19 +266,18 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
 	m = snprint(state, n, sfixedformat,
 		ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
 		ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
 		ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
 		ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
-		ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+		ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+		ifc->speed, ifc->delay);
 
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
-		m += snprint(state+m, n - m, slineformat,
-			lifc->local, lifc->mask, lifc->remote,
-			lifc->validlt, lifc->preflt);
+	for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+		m += snprint(state+m, n - m, slineformat, lifc->local,
+			lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
 	if(ifc->lifc == nil)
 		m += snprint(state+m, n - m, "\n");
 	runlock(ifc);
@@ -256,13 +293,11 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
-	m = 0;
-
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+	m = 0;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 		m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
-		for(link = lifc->link; link; link = link->lifclink)
+		for(link = lifc->link; link != nil; link = link->lifclink)
 			m += snprint(state+m, n - m, " %-40.40I", link->self->a);
 		m += snprint(state+m, n - m, "\n");
 	}
@@ -279,6 +314,59 @@
 	return ifc->m != nil;
 }
 
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+	int burst;
+
+	burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+	if(burst < ifc->maxtu)
+		burst = ifc->maxtu;
+	ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+	if(delay < 0)
+		delay = 0;
+	else if(delay > 1000)
+		delay = 1000;
+	ifc->delay = delay;
+	ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+	if(speed < 0)
+		speed = 0;
+	ifc->speed = speed;
+	ifc->load = 0;
+	ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	if(ifc->speed){
+		ulong now = MACHP(0)->ticks;
+		int dt = TK2MS(now - ifc->ticks);
+		ifc->ticks = now;
+		ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+		if(ifc->load < 0 || dt < 0 || dt > 1000)
+			ifc->load = 0;
+		else if(ifc->load > ifc->burst){
+			freeblist(bp);
+			return;
+		}
+	}
+	bp = concatblock(bp);
+	ifc->load += BLEN(bp);
+	ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
 /*
  *  called when a process writes to an interface's 'data'
  */
@@ -294,18 +382,15 @@
 		return;
 
 	ifc = (Ipifc*)c->ptcl;
-	if(!canrlock(ifc)){
-		freeb(bp);
-		return;
-	}
+	rlock(ifc);
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
-	if(ifc->m == nil || ifc->m->pktin == nil)
-		freeb(bp);
-	else
+	if(ifc->m != nil && ifc->m->pktin != nil)
 		(*ifc->m->pktin)(c->p->f, ifc, bp);
+	else
+		freeb(bp);
 	runlock(ifc);
 	poperror();
 }
@@ -319,27 +404,26 @@
 	Ipifc *ifc;
 
 	c->rq = qopen(QMAX, 0, 0, 0);
-	c->sq = qopen(2*QMAX, 0, 0, 0);
 	c->wq = qopen(QMAX, Qkick, ipifckick, c);
+	c->sq = qopen(QMAX, 0, 0, 0);
+	if(c->rq == nil || c->wq == nil || c->sq == nil)
+		error(Enomem);
 	ifc = (Ipifc*)c->ptcl;
 	ifc->conv = c;
-	ifc->unbinding = 0;
 	ifc->m = nil;
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 }
 
 /*
  *  called after last close of ipifc data or ctl
- *  called with c locked, we must unlock
  */
 static void
 ipifcclose(Conv *c)
 {
-	Ipifc *ifc;
-	Medium *m;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
+	Medium *m = ifc->m;
 
-	ifc = (Ipifc*)c->ptcl;
-	m = ifc->m;
 	if(m != nil && m->unbindonclose)
 		ipifcunbind(ifc);
 }
@@ -347,19 +431,17 @@
 /*
  *  change an interface's mtu
  */
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
 {
-	int mtu;
+	Medium *m = ifc->m;
 
-	if(argc < 2)
+	if(m == nil)
+		return Eunbound;
+	if(mtu < m->mintu || mtu > m->maxtu)
 		return Ebadarg;
-	if(ifc->m == nil)
-		return Ebadarg;
-	mtu = strtoul(argv[1], 0, 0);
-	if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
-		return Ebadarg;
 	ifc->maxtu = mtu;
+	ipifcadjustburst(ifc);
 	return nil;
 }
 
@@ -374,13 +456,8 @@
 	Iplifc *lifc, **l;
 	int i, type, mtu;
 	Fs *f;
-	int sendnbrdisc = 0;
 
-	if(ifc->m == nil)
-		return "ipifc not yet bound to device";
-
-	f = ifc->conv->p->f;
-
+	mtu = 0;
 	type = Rifc;
 	memset(ip, 0, IPaddrlen);
 	memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
 		/* fall through */
 	case 5:
 		mtu = strtoul(argv[4], 0, 0);
-		if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
-			ifc->maxtu = mtu;
 		/* fall through */
 	case 4:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
-		parseip(rem, argv[3]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+			return Ebadip;
 		maskip(rem, mask, net);
 		break;
 	case 3:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+			return Ebadip;
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
 		break;
 	case 2:
-		parseip(ip, argv[1]);
+		if (parseip(ip, argv[1]) == -1)
+			return Ebadip;
 		memmove(mask, defmask(ip), IPaddrlen);
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
@@ -415,26 +490,55 @@
 		break;
 	default:
 		return Ebadarg;
-		break;
 	}
-	if(isv4(ip))
+
+	/* check for point-to-point interface */
+	if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+	if(ipcmp(mask, IPallbits) == 0)
+		type |= Rptpt;
+
+	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+		type |= Rv4;
 		tentative = 0;
+	}
+
 	wlock(ifc);
+	if(ifc->m == nil){
+		wunlock(ifc);
+		return Eunbound;
+	}
+	f = ifc->conv->p->f;
+	if(waserror()){
+		wunlock(ifc);
+		return up->errstr;
+	}
 
+	if(mtu > 0)
+		ipifcsetmtu(ifc, mtu);
+
 	/* ignore if this is already a local address for this ifc */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, ip) == 0) {
-			if(lifc->tentative != tentative)
-				lifc->tentative = tentative;
-			if(lifcp != nil) {
-				lifc->onlink = lifcp->onlink;
-				lifc->autoflag = lifcp->autoflag;
-				lifc->validlt = lifcp->validlt;
-				lifc->preflt = lifcp->preflt;
-				lifc->origint = lifcp->origint;
+	if((lifc = iplocalonifc(ifc, ip)) != nil){
+		if(lifcp != nil) {
+			if(!lifc->onlink && lifcp->onlink){
+				lifc->onlink = 1;
+				addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+					lifc->remote, lifc->type, ifc, tifc);
+				if(v6addrtype(ip) != linklocalv6)
+					addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+						lifc->remote, lifc->type, ifc, tifc);
 			}
-			goto out;
+			lifc->autoflag = lifcp->autoflag;
+			lifc->validlt = lifcp->validlt;
+			lifc->preflt = lifcp->preflt;
+			lifc->origint = lifcp->origint;
 		}
+		if(lifc->tentative != tentative){
+			lifc->tentative = tentative;
+			goto done;
+		}
+		wunlock(ifc);
+		poperror();
+		return nil;
 	}
 
 	/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
 	ipmove(lifc->mask, mask);
 	ipmove(lifc->remote, rem);
 	ipmove(lifc->net, net);
+	lifc->type = type;
 	lifc->tentative = tentative;
 	if(lifcp != nil) {
 		lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
 		lifc->validlt = lifcp->validlt;
 		lifc->preflt = lifcp->preflt;
 		lifc->origint = lifcp->origint;
+	} else {		/* default values */
+		lifc->onlink = lifc->autoflag = 1;
+		lifc->validlt = lifc->preflt = ~0UL;
+		lifc->origint = NOW / 1000;
 	}
-	else {		// default values
-		lifc->onlink = 1;
-		lifc->autoflag = 1;
-		lifc->validlt = 0xffffffff;
-		lifc->preflt = 0xffffffff;
-		lifc->origint = NOW / 10^3;
-	}
 	lifc->next = nil;
 
-	for(l = &ifc->lifc; *l; l = &(*l)->next)
+	for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
 		;
 	*l = lifc;
 
-	/* check for point-to-point interface */
-	if(ipcmp(ip, v6loopback))  /* skip v6 loopback, it's a special address */
-	if(ipcmp(mask, IPallbits) == 0)
-		type |= Rptpt;
+	/* add route for this logical interface */
+	if(lifc->onlink){
+		addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+		if(v6addrtype(ip) != linklocalv6)
+			addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+	}
 
-	/* add local routes */
-	if(isv4(ip))
-		v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
-	else
-		v6addroute(f, tifc, rem, mask, rem, type);
-
 	addselfcache(f, ifc, lifc, ip, Runi);
 
-	if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
-		ipifcregisterproxy(f, ifc, rem);
-		goto out;
+	/* register proxy */
+	if(type & Rptpt){
+		if(type & Rproxy)
+			ipifcregisterproxy(f, ifc, rem, 1);
+		goto done;
 	}
 
-	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+	if(type & Rv4) {
 		/* add subnet directed broadcast address to the self cache */
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) & mask[i];
 		addselfcache(f, ifc, lifc, bcast, Rbcast);
-		
+
 		addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
-	}
-	else {
+	} else {
 		if(ipcmp(ip, v6loopback) == 0) {
 			/* add node-local mcast address */
 			addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
 
 			/* add route for all node multicast */
-			v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+			addroute(f, v6allnodesN, v6allnodesNmask,
+				ip, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
 		}
 
 		/* add all nodes multicast address */
 		addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-		
+
 		/* add route for all nodes multicast */
-		v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-		
+		addroute(f, v6allnodesL, v6allnodesLmask,
+			ip, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
+
 		/* add solicited-node multicast address */
 		ipv62smcast(bcast, ip);
 		addselfcache(f, ifc, lifc, bcast, Rmulti);
-
-		sendnbrdisc = 1;
 	}
 
-	/* register the address on this network for address resolution */
-	if(isv4(ip) && ifc->m->areg != nil)
-		(*ifc->m->areg)(ifc, ip);
-
-out:
+done:
 	wunlock(ifc);
-	if(tentative && sendnbrdisc)
-		icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+
+	rlock(ifc);
+	ipifcregisteraddr(f, ifc, lifc, ip);
+	runlock(ifc);
+
 	return nil;
 }
 
 /*
  *  remove a logical interface from an ifc
- *  always called with ifc wlock'd
+ *	called with ifc wlock'd
  */
 static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
 {
-	Iplifc **l;
-	Fs *f;
+	Iplifc *lifc = *l;
+	Fs *f = ifc->conv->p->f;
 
-	f = ifc->conv->p->f;
-
-	/*
-	 *  find address on this interface and remove from chain.
-	 *  for pt to pt we actually specify the remote address as the
-	 *  addresss to remove.
-	 */
-	for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
-		;
-	if(*l == nil)
+	if(lifc == nil)
 		return "address not on this interface";
 	*l = lifc->next;
 
 	/* disassociate any addresses */
-	while(lifc->link)
+	while(lifc->link != nil)
 		remselfcache(f, ifc, lifc, lifc->link->self->a);
 
 	/* remove the route for this logical interface */
-	if(isv4(lifc->local))
-		v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
-	else {
-		v6delroute(f, lifc->remote, lifc->mask, 1);
+	if(lifc->onlink){
+		remroute(f, lifc->remote, lifc->mask,
+			lifc->local, IPallbits,
+			lifc->remote, lifc->type, ifc, tifc);
+		if(v6addrtype(lifc->local) != linklocalv6)
+			remroute(f, lifc->remote, lifc->mask,
+				lifc->local, IPnoaddr,
+				lifc->remote, lifc->type, ifc, tifc);
+	}
+
+	/* unregister proxy */
+	if(lifc->type & Rptpt){
+		if(lifc->type & Rproxy)
+			ipifcregisterproxy(f, ifc, lifc->remote, 0);
+		goto done;
+	}
+
+	/* remove route for all nodes multicast */
+	if((lifc->type & Rv4) == 0){
 		if(ipcmp(lifc->local, v6loopback) == 0)
-			/* remove route for all node multicast */
-			v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
-		else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
-			/* remove route for all link multicast */
-			v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+			remroute(f, v6allnodesN, v6allnodesNmask,
+				lifc->local, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
+
+		remroute(f, v6allnodesL, v6allnodesLmask,
+			lifc->local, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
 	}
 
+done:
 	free(lifc);
 	return nil;
-
 }
 
 /*
  *  remove an address from an interface.
- *  called with c locked
  */
 char*
 ipifcrem(Ipifc *ifc, char **argv, int argc)
 {
-	uchar ip[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar rem[IPaddrlen];
-	Iplifc *lifc;
-	char *rv;
+	uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+	Iplifc *lifc, **l;
+	char *err;
 
 	if(argc < 3)
 		return Ebadarg;
-
-	parseip(ip, argv[1]);
-	parseipmask(mask, argv[2]);
+	if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+		return Ebadip;
 	if(argc < 4)
 		maskip(ip, mask, rem);
-	else
-		parseip(rem, argv[3]);
+	else if(parseip(rem, argv[3]) == -1)
+		return Ebadip;
 
-	wlock(ifc);
-
 	/*
 	 *  find address on this interface and remove from chain.
 	 *  for pt to pt we actually specify the remote address as the
 	 *  addresss to remove.
 	 */
+	wlock(ifc);
+	l = &ifc->lifc;
 	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
-		if (memcmp(ip, lifc->local, IPaddrlen) == 0
-		&& memcmp(mask, lifc->mask, IPaddrlen) == 0
-		&& memcmp(rem, lifc->remote, IPaddrlen) == 0)
+		if(ipcmp(ip, lifc->local) == 0
+		&& ipcmp(mask, lifc->mask) == 0
+		&& ipcmp(rem, lifc->remote) == 0)
 			break;
+		l = &lifc->next;
 	}
-
-	rv = ipifcremlifc(ifc, lifc);
+	err = ipifcremlifc(ifc, l);
 	wunlock(ifc);
-	return rv;
+	return err;
 }
 
 /*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->addroute != nil)
-				m->addroute(ifc, vers, addr, mask, gate, type);
-		}
-	}
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->remroute != nil)
-				m->remroute(ifc, vers, addr, mask);
-		}
-	}
-}
-
-/*
  *  associate an address with the interface.  This wipes out any previous
  *  addresses.  This is a macro that means, remove all the old interfaces
  *  and add a new one.
@@ -679,170 +740,89 @@
 static char*
 ipifcconnect(Conv* c, char **argv, int argc)
 {
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 	char *err;
-	Ipifc *ifc;
 
-	ifc = (Ipifc*)c->ptcl;
-
-	if(ifc->m == nil)
-		 return "ipifc not yet bound to device";
-
-	if(waserror()){
-		wunlock(ifc);
-		nexterror();
-	}
 	wlock(ifc);
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 	wunlock(ifc);
-	poperror();
 
 	err = ipifcadd(ifc, argv, argc, 0, nil);
-	if(err)
+	if(err != nil)
 		return err;
 
 	Fsconnected(c, nil);
-
 	return nil;
 }
 
 char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
 {
-	int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+	int i, argsleft;
+	uchar sendra, recvra;
+	Routerparams rp;
 
-	argsleft = argc - 1;
 	i = 1;
-
-	if(argsleft % 2 != 0)
+	argsleft = argc - 1;
+	if((argsleft % 2) != 0)
 		return Ebadarg;
 
+	sendra = ifc->sendra6;
+	recvra = ifc->recvra6;
+	rp = ifc->rp;
+
 	while (argsleft > 1) {
-		if(strcmp(argv[i],"recvra")==0)
-			ifc->recvra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"sendra")==0)
-			ifc->sendra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"mflag")==0)
-			ifc->rp.mflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"oflag")==0)
-			ifc->rp.oflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"maxraint")==0)
-			ifc->rp.maxraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"minraint")==0)
-			ifc->rp.minraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"linkmtu")==0)
-			ifc->rp.linkmtu = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"reachtime")==0)
-			ifc->rp.reachtime = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"rxmitra")==0)
-			ifc->rp.rxmitra = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"ttl")==0)
-			ifc->rp.ttl = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"routerlt")==0)
-			ifc->rp.routerlt = atoi(argv[i+1]);
+		if(strcmp(argv[i], "recvra") == 0)
+			recvra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "sendra") == 0)
+			sendra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "mflag") == 0)
+			rp.mflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "oflag") == 0)
+			rp.oflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "maxraint") == 0)
+			rp.maxraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "minraint") == 0)
+			rp.minraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "linkmtu") == 0)
+			rp.linkmtu = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "reachtime") == 0)
+			rp.reachtime = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "rxmitra") == 0)
+			rp.rxmitra = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "ttl") == 0)
+			rp.ttl = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "routerlt") == 0)
+			rp.routerlt = atoi(argv[i+1]);
 		else
-			return Ebadarg;	
+			return Ebadarg;
 
 		argsleft -= 2;
 		i += 2;
 	}
 
-	// consistency check
-	if(ifc->rp.maxraint < ifc->rp.minraint) {
-		ifc->rp.maxraint = vmax;
-		ifc->rp.minraint = vmin;
+	/* consistency check */
+	if(rp.maxraint < rp.minraint)
 		return Ebadarg;
-	}
 
-	return nil;
-}
+	ifc->rp = rp;
+	ifc->sendra6 = sendra;
+	ifc->recvra6 = recvra;
 
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->sendra6 = (i!=0);
 	return nil;
 }
 
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->recvra6 = (i!=0);	
-	return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
-	uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
-	Iplifc *lifc;
-
-	if(argc == 2){
-		if((strcmp(argv[1], "show") == 0)){
-			shownataddr();
-			return nil;
-		}else if((strcmp(argv[1], "flush") == 0)){
-			flushnataddr();
-			return nil;
-		}else
-			return Ebadarg;
-	}
-
-	if(argc != 5)
-		return Ebadarg;
-
-	if (parseip(src, argv[2]) == -1)
-		return Ebadip;
-
-	if (parseipmask(mask, argv[3]) == -1)
-		return Ebadip;
-
-	if (parseip(dst, argv[4]) == -1)
-		return Ebadip;
-
-	if((lifc=iplocalonifc(ifc, dst)) == nil)
-		return Ebadip;
-
-	if(strcmp(argv[1], "add") == 0){
-		if(addnataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else if(strcmp(argv[1], "remove") == 0){
-		if(removenataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else
-		return Ebadarg;
-
-	return nil;
-}
-
 /*
  *  non-standard control messages.
- *  called with c locked.
  */
 static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
 {
-	Ipifc *ifc;
-	int i;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 
-	ifc = (Ipifc*)c->ptcl;
 	if(strcmp(argv[0], "add") == 0)
 		return ipifcadd(ifc, argv, argc, 0, nil);
-	else if(strcmp(argv[0], "bootp") == 0)
-		return bootp(ifc);
 	else if(strcmp(argv[0], "try") == 0)
 		return ipifcadd(ifc, argv, argc, 1, nil);
 	else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
 		return ipifcrem(ifc, argv, argc);
 	else if(strcmp(argv[0], "unbind") == 0)
 		return ipifcunbind(ifc);
-	else if(strcmp(argv[0], "joinmulti") == 0)
-		return ipifcjoinmulti(ifc, argv, argc);
-	else if(strcmp(argv[0], "leavemulti") == 0)
-		return ipifcleavemulti(ifc, argv, argc);
 	else if(strcmp(argv[0], "mtu") == 0)
-		return ipifcsetmtu(ifc, argv, argc);
-	else if(strcmp(argv[0], "reassemble") == 0){
-		ifc->reassemble = 1;
+		return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+	else if(strcmp(argv[0], "speed") == 0){
+		ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
 		return nil;
 	}
+	else if(strcmp(argv[0], "delay") == 0){
+		ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+		return nil;
+	}
 	else if(strcmp(argv[0], "iprouting") == 0){
-		i = 1;
-		if(argc > 1)
-			i = atoi(argv[1]);
-		iprouting(c->p->f, i);
+		iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
 		return nil;
 	}
-	else if(strcmp(argv[0], "addpref6") == 0)
-		return ipifcaddpref6(ifc, argv, argc);
-	else if(strcmp(argv[0], "setpar6") == 0)
-		return ipifcsetpar6(ifc, argv, argc);
-	else if(strcmp(argv[0], "sendra6") == 0)
-		return ipifcsendra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "recvra6") == 0)
-		return ipifcrecvra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "nat") == 0)
-		return ipifcnat(ifc, argv, argc);
+	else if(strcmp(argv[0], "reflect") == 0){
+		ifc->reflect = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "reassemble") == 0){
+		ifc->reassemble = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "add6") == 0)
+		return ipifcadd6(ifc, argv, argc);
+	else if(strcmp(argv[0], "remove6") == 0)
+		return ipifcremove6(ifc, argv, argc);
+	else if(strcmp(argv[0], "ra6") == 0)
+		return ipifcra6(ifc, argv, argc);
 	return "unsupported ctl";
 }
 
+int
 ipifcstats(Proto *ipifc, char *buf, int len)
 {
 	return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
 	ipifc->nc = Maxmedia;
 	ipifc->ptclsize = sizeof(Ipifc);
 
-	f->ipifc = ipifc;			/* hack for ipifcremroute, findipifc, ... */
+	f->ipifc = ipifc;	/* hack for ipifcremroute, findipifc, ... */
 	f->self = smalloc(sizeof(Ipselftab));	/* hack for ipforme */
 
 	Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
 
 /*
  *  add to self routing cache
- *	called with c locked
  */
 static void
 addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
 {
-	Ipself *p;
 	Iplink *lp;
+	Ipself *p;
 	int h;
 
+	type |= (lifc->type & Rv4);
 	qlock(f->self);
+	if(waserror()){
+		qunlock(f->self);
+		nexterror();
+	}
 
 	/* see if the address already exists */
 	h = hashipa(a);
-	for(p = f->self->hash[h]; p; p = p->next)
-		if(memcmp(a, p->a, IPaddrlen) == 0)
+	for(p = f->self->hash[h]; p != nil; p = p->next)
+		if(ipcmp(a, p->a) == 0)
 			break;
 
 	/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
 	}
 
 	/* look for a link for this lifc */
-	for(lp = p->link; lp; lp = lp->selflink)
+	for(lp = p->link; lp != nil; lp = lp->selflink)
 		if(lp->lifc == lifc)
 			break;
 
@@ -962,18 +948,19 @@
 		lifc->link = lp;
 
 		/* add to routing table */
-		if(isv4(a))
-			v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
-		else
-			v6addroute(f, tifc, a, IPallbits, a, type);
+		addroute(f, a, IPallbits,
+			lifc->local, 
+			((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+				IPallbits : IPnoaddr,
+			a, type, ifc, tifc);
 
 		if((type & Rmulti) && ifc->m->addmulti != nil)
 			(*ifc->m->addmulti)(ifc, a, lifc->local);
-	} else {
+	} else
 		lp->ref++;
-	}
 
 	qunlock(f->self);
+	poperror();
 }
 
 /*
@@ -992,8 +979,8 @@
 	ulong now = NOW;
 
 	l = &freeiplink;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1000,10 +987,11 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
+
 static void
 ipselffree(Ipself *p)
 {
@@ -1011,8 +999,8 @@
 	ulong now = NOW;
 
 	l = &freeipself;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1019,7 +1007,7 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
@@ -1027,7 +1015,6 @@
 /*
  *  Decrement reference for this address on this link.
  *  Unlink from selftab if this is the last ref.
- *	called with c locked
  */
 static void
 remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
 
 	/* find the unique selftab entry */
 	l = &f->self->hash[hashipa(a)];
-	for(p = *l; p; p = *l){
+	for(p = *l; p != nil; p = *l){
 		if(ipcmp(p->a, a) == 0)
 			break;
 		l = &p->next;
@@ -1053,7 +1040,7 @@
 	 *  that matches the selftab entry
 	 */
 	l_lifc = &lifc->link;
-	for(link = *l_lifc; link; link = *l_lifc){
+	for(link = *l_lifc; link != nil; link = *l_lifc){
 		if(link->self == p)
 			break;
 		l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
 	 *  the one we just found
 	 */
 	l_self = &p->link;
-	for(link = *l_self; link; link = *l_self){
-		if(link == *(l_lifc))
+	for(link = *l_self; link != nil; link = *l_self){
+		if(link == *l_lifc)
 			break;
 		l_self = &link->selflink;
 	}
@@ -1079,9 +1066,20 @@
 	if(--(link->ref) != 0)
 		goto out;
 
-	if((p->type & Rmulti) && ifc->m->remmulti != nil)
-		(*ifc->m->remmulti)(ifc, a, lifc->local);
+	/* remove from routing table */
+	remroute(f, a, IPallbits,
+		lifc->local, 
+		((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+			IPallbits : IPnoaddr,
+		a, p->type, ifc, tifc);
 
+	if((p->type & Rmulti) && ifc->m->remmulti != nil){
+		if(!waserror()){
+			(*ifc->m->remmulti)(ifc, a, lifc->local);
+			poperror();
+		}
+	}
+
 	/* ref == 0, remove from both chains and free the link */
 	*l_lifc = link->lifclink;
 	*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
 	if(p->link != nil)
 		goto out;
 
-	/* remove from routing table */
-	if(isv4(a))
-		v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
-	else
-		v6delroute(f, a, IPallbits, 1);
-	
+	/* if null address, forget */
+	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+		f->self->acceptall = 0;
+
 	/* no more links, remove from hash and free */
 	*l = p->next;
 	ipselffree(p);
 
-	/* if IPnoaddr, forget */
-	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
-		f->self->acceptall = 0;
-
 out:
 	qunlock(f->self);
 }
 
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
-	Nstformat= 41,
-};
-
 long
 ipselftabread(Fs *f, char *cp, ulong offset, int n)
 {
@@ -1124,14 +1110,14 @@
 
 	m = 0;
 	off = offset;
-	qlock(f->self);
 	for(i = 0; i < NHASH && m < n; i++){
 		for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
 			nifc = 0;
-			for(link = p->link; link; link = link->selflink)
+			for(link = p->link; link != nil; link = link->selflink)
 				nifc++;
 			routetype(p->type, state);
-			m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+			m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+				p->a, nifc, state);
 			if(off > 0){
 				off -= m;
 				m = 0;
@@ -1138,30 +1124,15 @@
 			}
 		}
 	}
-	qunlock(f->self);
 	return m;
 }
 
-int
-iptentative(Fs *f, uchar *addr)
-{
- 	Ipself *p;
-
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
-		if(ipcmp(addr, p->a) == 0) {
-			return p->link->lifc->tentative;
-		}
-	}
-	return 0;
-}
-
 /*
  *  returns
  *	0		- no match
  *	Runi
  *	Rbcast
- *	Rmcast
+ *	Rmulti
  */
 int
 ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
 {
 	Ipself *p;
 
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
+	for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
 		if(ipcmp(addr, p->a) == 0)
-			return p->type;
-	}
+			return p->type & (Runi|Rbcast|Rmulti);
 
 	/* hack to say accept anything */
 	if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
  *  return nil.
  */
 Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
 {
+	uchar gnet[IPaddrlen];
+	int spec, xspec;
 	Ipifc *ifc, *x;
 	Iplifc *lifc;
-	Conv **cp, **e;
-	uchar gnet[IPaddrlen];
-	uchar xmask[IPaddrlen];
+	Conv **cp;
 
-	x = nil; memset(xmask, 0, IPaddrlen);
-
-	/* find most specific match */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
-
+	x = nil;
+	xspec = 0;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!canrlock(ifc))
+			continue;
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if(type & Runi){
+				if(ipcmp(remote, lifc->local) == 0){
+				Found:
+					runlock(ifc);
+					return ifc;
+				}
+			} else if(type & (Rbcast|Rmulti)) {
+				if(ipcmp(local, lifc->local) == 0)
+					goto Found;
+			}
 			maskip(remote, lifc->mask, gnet);
 			if(ipcmp(gnet, lifc->net) == 0){
-				if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+				spec = comprefixlen(remote, lifc->local, IPaddrlen);
+				if(spec > xspec){
 					x = ifc;
-					ipmove(xmask, lifc->mask);
+					xspec = spec;
 				}
 			}
 		}
+		runlock(ifc);
 	}
-	if(x != nil)
-		return x;
+	return x;
+}
 
-	/* for now for broadcast and multicast, just use first interface */
-	if(type & (Rbcast|Rmulti)){
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == 0)
-				continue;
-			ifc = (Ipifc*)(*cp)->ptcl;
-			if(ifc->lifc != nil)
-				return ifc;
-		}
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+	uchar ip[IPaddrlen];
+	Conv *c;
+	char *p;
+	long x;
+
+	x = strtol(s, &p, 10);
+	if(p > s && *p == '\0'){
+		if(x < 0)
+			return nil;
+		if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+			return (Ipifc*)c->ptcl;
 	}
-		
+	if(parseip(ip, s) != -1)
+		return findipifc(f, ip, ip, Runi);
 	return nil;
 }
 
-enum {
-	unknownv6,
-	multicastv6,
-	unspecifiedv6,
-	linklocalv6,
-	sitelocalv6,
-	globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
-	if(isv6global(addr))
-		return globalv6;
-	if(islinklocal(addr))
-		return linklocalv6;
-	if(isv6mcast(addr))
-		return multicastv6;
-	if(issitelocal(addr))
-		return sitelocalv6;
-	return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ *  find "best" (global > link local > unspecified)
+ *  local address; address must be current.
+ */
 static void
 findprimaryipv6(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
-	Iplifc *lifc;
+	ulong now = NOW/1000;
 	int atype, atypel;
+	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	ipmove(local, v6Unspecified);
 	atype = unspecifiedv6;
 
-	/* find "best" (global > sitelocal > link local > unspecified)
-	 * local address; address must be current */
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 			atypel = v6addrtype(lifc->local);
 			if(atypel > atype)
-			if(v6addrcurr(lifc)) {
+			if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
 				ipmove(local, lifc->local);
 				atype = atypel;
-				if(atype == globalv6)
+				if(atype == globalv6){
+					runlock(ifc);
 					return;
+				}
 			}
 		}
+		runlock(ifc);
 	}
 }
 
 /*
- *  returns first ip address configured
+ *  returns first v4 address configured
  */
 static void
 findprimaryipv4(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
 	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	/* find first ifc local address */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		if((lifc = ifc->lifc) != nil){
-			ipmove(local, lifc->local);
-			return;
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if((lifc->type & Rv4) != 0){
+				ipmove(local, lifc->local);
+				runlock(ifc);
+				return;
+			}
 		}
+		runlock(ifc);
 	}
+	ipmove(local, IPnoaddr);
 }
 
 /*
- *  find the local address 'closest' to the remote system, copy it to
- *  local and return the ifc for that address
+ * ipv4local, ipv6local:
+ *  return a local address associated with an interface close to remote.
+ *  prefixlen is the number of leading bits in the local address that
+ *  have to match an interface address to be considered. this is used
+ *  by source specific routes to filter on the source address.
+ *  return non-zero on success or zero when no address was found.
+ *
+ *  for ipv4local, all addresses are 4 byte format.
  */
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
-	Ipifc *ifc;
 	Iplifc *lifc;
-	Route *r;
-	uchar gate[IPaddrlen];
-	uchar gnet[IPaddrlen];
-	int version;
-	int atype = unspecifiedv6, atypel = unknownv6;
+	int a, b;
 
-	USED(atype);
-	USED(atypel);
-	qlock(f->ipifc);
-	r = v6lookup(f, remote, nil);
- 	version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-	
-	if(r != nil){
-		ifc = r->ifc;
-		if(r->type & Rv4)
-			v4tov6(gate, r->v4.gate);
-		else {
-			ipmove(gate, r->v6.gate);
-			ipmove(local, v6Unspecified);
-		}
+	b = -1;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+			continue;
 
-		/* find ifc address closest to the gateway to use */
-		switch(version) {
-		case V4:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0){
-					ipmove(local, lifc->local);
-					goto out;
-				}
-			}
-			break;
-		case V6:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				atypel = v6addrtype(lifc->local);
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0)
-				if(atypel > atype)
-				if(v6addrcurr(lifc)) {
-					ipmove(local, lifc->local);
-					atype = atypel;
-					if(atype == globalv6)
-						break;
-				}
-			}
-			if(atype > unspecifiedv6)
-				goto out;
-			break;
-		default:
-			panic("findlocalip: version %d", version);
+		if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+			continue;
+		
+		a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+		if(a > b){
+			b = a;
+			memmove(local, lifc->local+IPv4off, IPv4addrlen);
 		}
 	}
-
-	switch(version){
-	case V4:
-		findprimaryipv4(f, local);
-		break;
-	case V6:
-		findprimaryipv6(f, local);
-		break;
-	default:
-		panic("findlocalip2: version %d", version);
-	}
-
-out:
-	qunlock(f->ipifc);
+	return b >= 0;
 }
 
-/*
- *  return first v4 address associated with an interface
- */
 int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
+	struct {
+		int	atype;
+		int	deprecated;
+		int	comprefixlen;
+	} a, b;
+	int atype;
+	ulong now;
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(isv4(lifc->local)){
-			memmove(addr, lifc->local+IPv4off, IPv4addrlen);
-			return 1;
-		}
+	if(isv4(remote)){
+		memmove(local, v4prefix, IPv4off);
+		if((prefixlen -= IPv4off*8) < 0)
+			prefixlen = 0;
+		return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
 	}
-	return 0;
-}
 
-/*
- *  return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
-	Iplifc *lifc;
+	atype = v6addrtype(remote);
+	b.atype = unknownv6;
+	b.deprecated = 1;
+	b.comprefixlen = 0;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local) && !(lifc->tentative)){
-			ipmove(addr, lifc->local);
-			return 1;
+	now = NOW/1000;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if(lifc->tentative)
+			continue;
+
+		if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+			continue;
+
+		a.atype = v6addrtype(lifc->local);
+		a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+		a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+		/* prefer appropriate scope */
+		if(a.atype != b.atype){
+			if(a.atype > b.atype && b.atype < atype ||
+			   a.atype < b.atype && b.atype > atype)
+				goto Good;
+			continue;
 		}
+		/* prefer non-deprecated addresses */
+		if(a.deprecated != b.deprecated){
+			if(b.deprecated)
+				goto Good;
+			continue;
+		}
+		/* prefer longer common prefix */
+		if(a.comprefixlen != b.comprefixlen){
+			if(a.comprefixlen > b.comprefixlen)
+				goto Good;
+			continue;
+		}
+		continue;
+	Good:
+		b = a;
+		ipmove(local, lifc->local);
 	}
-	return 0;
+
+	return b.atype >= atype;
 }
 
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ *  find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
 {
-	Iplifc *lifc;
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local)){
-			ipmove(addr, lifc->local);
-			return SRC_UNI;
-		}
+	if(isv4(remote)) {
+		memmove(local, v4prefix, IPv4off);
+		if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+			findprimaryipv4(f, local);
+	} else {
+		if(v6source(f, remote, local) == nil)
+			findprimaryipv6(f, local);
 	}
-	return SRC_UNSPEC;
 }
 
 /*
@@ -1444,13 +1396,28 @@
 {
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
 		if(ipcmp(ip, lifc->local) == 0)
 			return lifc;
+
 	return nil;
 }
 
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+	uchar net[IPaddrlen];
+	Iplifc *lifc;
 
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		maskip(ip, lifc->mask, net);
+		if(ipcmp(net, lifc->remote) == 0)
+			return lifc;
+	}
+	return nil;
+}
+
+
 /*
  *  See if we're proxying for this address on this interface
  */
@@ -1458,24 +1425,13 @@
 ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
 {
 	Route *r;
-	uchar net[IPaddrlen];
-	Iplifc *lifc;
 
 	/* see if this is a direct connected pt to pt address */
-	r = v6lookup(f, ip, nil);
-	if(r == nil)
+	r = v6lookup(f, ip, ip, nil);
+	if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
 		return 0;
-	if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
-		return 0;
 
-	/* see if this is on the right interface */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		maskip(ip, lifc->mask, net);
-		if(ipcmp(net, lifc->remote) == 0)
-			return 1;
-	}
-
-	return 0;
+	return ipremoteonifc(ifc, ip) != nil;
 }
 
 /*
@@ -1487,73 +1443,53 @@
 	if(isv4(ip)){
 		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
 			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
 	}
+	else if(ip[0] == 0xff)
+		return V6;
 	return 0;
 }
 
-int
-ipisbm(uchar *ip)
-{
-	if(isv4(ip)){
-		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
-			return V4;
-		if(ipcmp(ip, IPv4bcast) == 0)
-			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
-	}
-	return 0;
-}
-
-
 /*
- *  add a multicast address to an interface, called with c locked
+ *  add a multicast address to an interface.
  */
 void
 ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
 {
-	Ipifc *ifc;
-	Iplifc *lifc;
-	Conv **p;
 	Ipmulti *multi, **l;
+	Iplifc *lifc;
+	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	if(isv4(ma) != isv4(ia))
+		error("incompatible multicast/interface ip address");
+
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			return;		/* it's already there */
 
-	multi = *l = smalloc(sizeof(*multi));
-	ipmove(multi->ma, ma);
-	ipmove(multi->ia, ia);
-	multi->next = nil;
-
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-		ifc = (Ipifc*)(*p)->ptcl;
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
 		if(waserror()){
-			wunlock(ifc);
+			runlock(ifc);
 			nexterror();
 		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
-				addselfcache(f, ifc, lifc, ma, Rmulti);
-		wunlock(ifc);
+		if((lifc = iplocalonifc(ifc, ia)) != nil)
+			addselfcache(f, ifc, lifc, ma, Rmulti);
+		runlock(ifc);
 		poperror();
 	}
+
+	multi = smalloc(sizeof(*multi));
+	ipmove(multi->ma, ma);
+	ipmove(multi->ia, ia);
+	multi->next = nil;
+	*l = multi;
 }
 
 
 /*
- *  remove a multicast address from an interface, called with c locked
+ *  remove a multicast address from an interface.
  */
 void
 ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
 {
 	Ipmulti *multi, **l;
 	Iplifc *lifc;
-	Conv **p;
 	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			break;
 
 	multi = *l;
@@ -1576,161 +1508,101 @@
 		return; 	/* we don't have it open */
 
 	*l = multi->next;
+	multi->next = nil;
 
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-
-		ifc = (Ipifc*)(*p)->ptcl;
-		if(waserror()){
-			wunlock(ifc);
-			nexterror();
-		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
+		if(!waserror()){
+			if((lifc = iplocalonifc(ifc, ia)) != nil)
 				remselfcache(f, ifc, lifc, ma);
-		wunlock(ifc);
-		poperror();
+			poperror();
+		}
+		runlock(ifc);
 	}
-
 	free(multi);
 }
 
-/*
- *  make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
 {
-	USED(ifc, argv, argc);
-	return nil;
+	if(waserror()){
+		print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+		return;
+	}
+	if(ifc->m != nil && ifc->m->areg != nil)
+		(*ifc->m->areg)(f, ifc, lifc, ip);
+	poperror();
 }
 
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
-	USED(ifc, argv, argc);
-	return nil;
-}
-
 static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
 {
-	Conv **cp, **e;
-	Ipifc *nifc;
+	uchar a[IPaddrlen];
 	Iplifc *lifc;
-	Medium *m;
-	uchar net[IPaddrlen];
+	Ipifc *nifc;
+	Conv **cp;
 
-	/* register the address on any network that will proxy for us */
-	e = &f->ipifc->conv[f->ipifc->nc];
+	/* register the address on any interface that will proxy for the ip */
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
+		nifc = (Ipifc*)(*cp)->ptcl;
+		if(nifc == ifc || !canrlock(nifc))
+			continue;
 
-	if(!isv4(ip)) { // V6
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->addmulti == nil) {
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
-					ipv62smcast(net, ip);
-					addselfcache(f, nifc, lifc, net, Rmulti);
-					arpenter(f, V6, ip, nifc->mac, 6, 0);
-					//(*m->addmulti)(nifc, net, ip);
-					break;
-				}
-			}
+		if(nifc->m == nil
+		|| (lifc = ipremoteonifc(nifc, ip)) == nil
+		|| (lifc->type & Rptpt) != 0
+		|| waserror()){
 			runlock(nifc);
+			continue;
 		}
-		return;
-	}
-	else { // V4
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->areg == nil){
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0){
-					(*m->areg)(nifc, ip);
-					break;
-				}
-			}
-			runlock(nifc);
+		if((lifc->type & Rv4) == 0){
+			/* add solicited-node multicast addr */
+			ipv62smcast(a, ip);
+			if(add)
+				addselfcache(f, nifc, lifc, a, Rmulti);
+			else
+				remselfcache(f, nifc, lifc, a);
 		}
+		if(add)
+			ipifcregisteraddr(f, nifc, lifc, ip);
+		runlock(nifc);
+		poperror();
 	}
 }
 
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
-	Route *r;
-
-	r = v6lookup(f, v6Unspecified, nil);
-	if(r!=nil)
-	if(!(force) && (strcmp(r->tag,"ra")!=0))	// route entries generated
-		return;			// by all other means take
-					// precedence over router annc
-
-	v6delroute(f, v6Unspecified, v6Unspecified, 1);
-	v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
-	Ngates = 3,
-};
-
 char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
 {
-	uchar	onlink = 1;
-	uchar	autoflag = 1;
-	long 	validlt = 0xffffffff;
-	long 	preflt = 0xffffffff;
-	long	origint = NOW / 10^3;
-	uchar	prefix[IPaddrlen];
-	int	plen = 64;
-	Iplifc	*lifc;
-	char	addr[40], preflen[6];
-	char	*params[3];
+	int plen = 64;
+	char addr[40], preflen[6];
+	char *params[3];
+	uchar prefix[IPaddrlen];
+	Iplifc lifc;
+	Medium *m;
 
+	lifc.onlink = 1;
+	lifc.autoflag = 1;
+	lifc.validlt = lifc.preflt = ~0UL;
+	lifc.origint = NOW / 1000;
+
 	switch(argc) {
 	case 7:
-		preflt = atoi(argv[6]);
+		lifc.preflt = strtoul(argv[6], 0, 10);
 		/* fall through */
 	case 6:
-		validlt = atoi(argv[5]);
+		lifc.validlt = strtoul(argv[5], 0, 10);
 		/* fall through */
 	case 5:
-		autoflag =  atoi(argv[4]);
+		lifc.autoflag = atoi(argv[4]) != 0;
 		/* fall through */
 	case 4:
-		onlink = atoi(argv[3]);
+		lifc.onlink = atoi(argv[3]) != 0;
 		/* fall through */
 	case 3:
 		plen = atoi(argv[2]);
+		/* fall through */
 	case 2:
 		break;
 	default:
@@ -1737,25 +1609,16 @@
 		return Ebadarg;
 	}
 
-	if((parseip(prefix, argv[1])!=6) ||
-	 	(validlt < preflt) ||
-		(plen < 0) || (plen > 64) ||
-		(islinklocal(prefix))
-	)
+	if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+	    plen > 64 || islinklocal(prefix))
 		return Ebadarg;
 
-	lifc = smalloc(sizeof(Iplifc));
-	lifc->onlink = (onlink!=0);
-	lifc->autoflag = (autoflag!=0);
-	lifc->validlt = validlt;
-	lifc->preflt = preflt;
-	lifc->origint = origint;
+	/* issue "add" ctl msg for v6 link-local addr and prefix len */
+	m = ifc->m;
+	if(m == nil || m->pref2addr == nil)
+		return Eunbound;
+	(*m->pref2addr)(prefix, ifc->mac);	/* mac → v6 link-local addr */
 
-	if(ifc->m->pref2addr!=nil)
-		ifc->m->pref2addr(prefix, ifc->mac);
-	else
-		return Ebadarg;
-	
 	sprint(addr, "%I", prefix);
 	sprint(preflen, "/%d", plen);
 	params[0] = "add";
@@ -1762,6 +1625,28 @@
 	params[1] = addr;
 	params[2] = preflen;
 
-	return ipifcadd(ifc, params, 3, 0, lifc);
+	return ipifcadd(ifc, params, 3, 0, &lifc);
 }
 
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+	Iplifc *lifc, **l;
+	ulong now;
+
+	if(argc != 1)
+		return Ebadarg;
+
+	wlock(ifc);
+	now = NOW/1000;
+	for(l = &ifc->lifc; (lifc = *l) != nil;) {
+		if((lifc->type & Rv4) == 0)
+		if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+			if(ipifcremlifc(ifc, l) == nil)
+				continue;
+		l = &lifc->next;
+	}
+	wunlock(ifc);
+
+	return nil;
+}
diff -u a/os/ip//ipmux.c b/os/ip//ipmux.c
--- a/os/ip//ipmux.c
+++ b/os/ip//ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -6,30 +9,14 @@
 #include "../port/error.h"
 
 #include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
 
 typedef struct Ipmuxrock  Ipmuxrock;
 typedef struct Ipmux      Ipmux;
-typedef struct Ip6hdr     Ip6hdr;
 
 enum
 {
-	IPHDR		= 20,		/* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
-	uchar vcf[4];		/* version, class label, and flow label */ 
-	uchar ploadlen[2];	/* payload length */
-	uchar proto;		/* next header, i.e. proto */
-	uchar ttl;		/* hop limit, i.e. ttl */
-	uchar src[16];		/* IP source */
-	uchar dst[16];		/* IP destination */
-};
-
-
-enum
-{
+	Tver,
 	Tproto,
 	Tdata,
 	Tiph,
@@ -36,28 +23,8 @@
 	Tdst,
 	Tsrc,
 	Tifc,
-
-	Cother = 0,
-	Cbyte,		/* single byte */
-	Cmbyte,		/* single byte with mask */
-	Cshort,		/* single short */
-	Cmshort,	/* single short with mask */
-	Clong,		/* single long */
-	Cmlong,		/* single long with mask */
-	Cifc,
-	Cmifc,
 };
 
-char *ftname[] = 
-{
-[Tproto]	"proto",
-[Tdata]		"data",
-[Tiph]	 	"iph",
-[Tdst]		"dst",
-[Tsrc]		"src",
-[Tifc]		"ifc",
-};
-
 /*
  *  a node in the decision tree
  */
@@ -66,16 +33,12 @@
 	Ipmux	*yes;
 	Ipmux	*no;
 	uchar	type;		/* type of field(Txxxx) */
-	uchar	ctype;		/* tupe of comparison(Cxxxx) */
 	uchar	len;		/* length in bytes of item to compare */
 	uchar	n;		/* number of items val points to */
-	short	off;		/* offset of comparison */
-	short	eoff;		/* end offset of comparison */
-	uchar	skiphdr;	/* should offset start after ipheader */
+	int	off;		/* offset of comparison */
 	uchar	*val;
 	uchar	*mask;
 	uchar	*e;		/* val+n*len*/
-
 	int	ref;		/* so we can garbage collect */
 	Conv	*conv;
 };
@@ -90,6 +53,7 @@
 
 static int	ipmuxsprint(Ipmux*, int, char*, int);
 static void	ipmuxkick(void *x);
+static void	ipmuxfree(Ipmux *f);
 
 static char*
 skipwhite(char *p)
@@ -122,27 +86,33 @@
 	Ipmux *f;
 
 	p = skipwhite(p);
-	if(strncmp(p, "dst", 3) == 0){
+	if(strncmp(p, "ver", 3) == 0){
+		type = Tver;
+		off = 0;
+		len = 1;
+		p += 3;
+	}
+	else if(strncmp(p, "dst", 3) == 0){
 		type = Tdst;
-		off = offsetof(Ip4hdr, dst[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, dst[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "src", 3) == 0){
 		type = Tsrc;
-		off = offsetof(Ip4hdr, src[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, src[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "ifc", 3) == 0){
 		type = Tifc;
-		off = -IPv4addrlen;
-		len = IPv4addrlen;
+		off = -IPaddrlen;
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "proto", 5) == 0){
 		type = Tproto;
-		off = offsetof(Ip4hdr, proto);
+		off = offsetof(Ip6hdr, proto);
 		len = 1;
 		p += 5;
 	}
@@ -160,7 +130,7 @@
 			return nil;
 		p++;
 		off = strtoul(p, &p, 0);
-		if(off < 0 || off > (64-IPHDR))
+		if(off < 0)
 			return nil;
 		p = skipwhite(p);
 		if(*p != ':')
@@ -189,11 +159,6 @@
 	f->mask = nil;
 	f->n = 1;
 	f->ref = 1;
-	if(type == Tdata)
-		f->skiphdr = 1;
-	else
-		f->skiphdr = 0;
-
 	return f;	
 }
 
@@ -229,7 +194,7 @@
 static Ipmux*
 parsemux(char *p)
 {
-	int n, nomask;
+	int n;
 	Ipmux *f;
 	char *val;
 	char *mask;
@@ -247,7 +212,7 @@
 		goto parseerror;
 
 	/* parse mask */
-	mask = follows(val, '&');
+	mask = follows(p, '&');
 	if(mask != nil){
 		switch(f->type){
 		case Tsrc:
@@ -254,7 +219,7 @@
 		case Tdst:
 		case Tifc:
 			f->mask = smalloc(f->len);
-			v4parseip(f->mask, mask);
+			parseipmask(f->mask, mask, 0);
 			break;
 		case Tdata:
 		case Tiph:
@@ -264,15 +229,13 @@
 		default:
 			goto parseerror;
 		}
-		nomask = 0;
-	} else {
-		nomask = 1;
+	} else if(f->type == Tver){
 		f->mask = smalloc(f->len);
-		memset(f->mask, 0xff, f->len);
+		f->mask[0] = 0xF0;
 	}
 
 	/* parse vals */
-	f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+	f->n = getfields(val, vals, nelem(vals), 1, "|");
 	if(f->n == 0)
 		goto parseerror;
 	f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
 	v = f->val;
 	for(n = 0; n < f->n; n++){
 		switch(f->type){
+		case Tver:
+			if(f->n != 1)
+				goto parseerror;
+			if(strcmp(vals[n], "6") == 0)
+				*v = IP_VER6;
+			else if(strcmp(vals[n], "4") == 0)
+				*v = IP_VER4;
+			else
+				goto parseerror;
+			break;
 		case Tsrc:
 		case Tdst:
 		case Tifc:
-			v4parseip(v, vals[n]);
+			if(parseip(v, vals[n]) == -1)
+				goto parseerror;
 			break;
 		case Tproto:
 		case Tdata:
@@ -292,34 +266,11 @@
 		}
 		v += f->len;
 	}
-
-	f->eoff = f->off + f->len;
 	f->e = f->val + f->n*f->len;
-	f->ctype = Cother;
-	if(f->n == 1){
-		switch(f->len){
-		case 1:
-			f->ctype = nomask ? Cbyte : Cmbyte;
-			break;
-		case 2:
-			f->ctype = nomask ? Cshort : Cmshort;
-			break;
-		case 4:
-			if(f->type == Tifc)
-				f->ctype = nomask ? Cifc : Cmifc;
-			else
-				f->ctype = nomask ? Clong : Cmlong;
-			break;
-		}
-	}
 	return f;
 
 parseerror:
-	if(f->mask)
-		free(f->mask);
-	if(f->val)
-		free(f->val);
-	free(f);
+	ipmuxfree(f);
 	return nil;
 }
 
@@ -342,8 +293,7 @@
 		return n;
 
 	/* compare offsets, call earlier ones more specific */
-	n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) - 
-		(b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+	n = a->off - b->off;
 	if(n != 0)
 		return n;
 
@@ -413,6 +363,10 @@
 	*nf = *f;
 	nf->no = ipmuxcopy(f->no);
 	nf->yes = ipmuxcopy(f->yes);
+	if(f->mask != nil){
+		nf->mask = smalloc(f->len);
+		memmove(nf->mask, f->mask, f->len);
+	}
 	nf->val = smalloc(f->n*f->len);
 	nf->e = nf->val + f->len*f->n;
 	memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
 static void
 ipmuxfree(Ipmux *f)
 {
-	if(f->val != nil)
-		free(f->val);
+	if(f == nil)
+		return;
+	free(f->val);
+	free(f->mask);
 	free(f);
 }
 
@@ -432,10 +388,8 @@
 {
 	if(f == nil)
 		return;
-	if(f->no != nil)
-		ipmuxfree(f->no);
-	if(f->yes != nil)
-		ipmuxfree(f->yes);
+	ipmuxfree(f->no);
+	ipmuxfree(f->yes);
 	ipmuxfree(f);
 }
 
@@ -510,6 +464,8 @@
 		return ipmuxremove(&ft->no, f);
 	}
 
+	ipmuxremove(&ft->no, f->no);
+
 	/* we found a match */
 	if(--(ft->ref) == 0){
 		/*
@@ -531,8 +487,55 @@
 }
 
 /*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+	int i, n;
+
+	if(f == nil)
+		return nil;
+
+	switch(f->type){
+	case Tproto:
+		f->off = offsetof(Ip4hdr, proto);
+		break;
+	case Tdst:
+		f->off = offsetof(Ip4hdr, dst[0]);
+		if(0){
+	case Tsrc:
+		f->off = offsetof(Ip4hdr, src[0]);
+		}
+		if(f->len != IPaddrlen)
+			break;
+		n = 0;
+		for(i = 0; i < f->n; i++){
+			if(isv4(f->val + i*IPaddrlen)){
+				memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+				n++;
+			}
+		}
+		if(n == 0){
+			ipmuxtreefree(f);
+			return nil;
+		}
+		f->n = n;
+		f->len = IPv4addrlen;
+		if(f->mask != nil)
+			memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+	}
+	f->e = f->val + f->n*f->len;
+
+	f->yes = ipmuxconv4(f->yes);
+	f->no = ipmuxconv4(f->no);
+
+	return f;
+}
+
+/*
  *  connection request is a semi separated list of filters
- *  e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *  e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
  *
  *  there's no protection against overlapping specs.
  */
@@ -568,6 +571,18 @@
 		return Ebadarg;
 	mux->conv = c;
 
+	if(chain->type != Tver) {
+		char ver6[] = "ver=6";
+		mux = parsemux(ver6);
+		mux->yes = chain;
+		mux->no = ipmuxcopy(chain);
+		chain = mux;
+	}
+	if(*chain->val == IP_VER4)
+		chain->yes = ipmuxconv4(chain->yes);
+	else
+		chain->no = ipmuxconv4(chain->no);
+
 	/* save a copy of the chain so we can later remove it */
 	mux = ipmuxcopy(chain);
 	r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
 	Block *bp;
 
 	bp = qget(c->wq);
-	if(bp == nil)
-		return;
-	else {
+	if(bp != nil) {
 		Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
-		if((ih4->vihl)&0xF0 != 0x60)
+
+		if((ih4->vihl & 0xF0) != IP_VER6)
 			ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
-		else {
-			Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
-			ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
-		}
+		else
+			ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
 	}
 }
 
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+	int i;
+
+	if(m == nil)
+		return memcmp(v, c, n) != 0;
+
+	for(i = 0; i < n; i++)
+		if((v[i] & m[i]) != c[i])
+			return 1;
+	return 0;
+}
+
 static void
 ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
 {
-	int len, hl;
 	Fs *f = p->f;
-	uchar *m, *h, *v, *e, *ve, *hp;
 	Conv *c;
+	Iplifc *lifc;
 	Ipmux *mux;
-	Ip4hdr *ip;
+	uchar *v;
+	Ip4hdr *ip4;
 	Ip6hdr *ip6;
+	int off, hl;
 
-	ip = (Ip4hdr*)bp->rp;
-	hl = (ip->vihl&0x0F)<<2;
+	ip4 = (Ip4hdr*)bp->rp;
+	if((ip4->vihl & 0xF0) == IP_VER4) {
+		hl = (ip4->vihl&0x0F)<<2;
+		ip6 = nil;
+	} else {
+		hl = IP6HDR;
+		ip6 = (Ip6hdr*)ip4;
+	}
 
 	if(p->priv == nil)
 		goto nomatch;
 
-	h = bp->rp;
-	len = BLEN(bp);
+	c = nil;
+	lifc = nil;
 
-	/* run the v4 filter */
+	/* run the filter */
 	rlock(f);
-	c = nil;
 	mux = f->ipmux->priv;
 	while(mux != nil){
-		if(mux->eoff > len){
-			mux = mux->no;
-			continue;
-		}
-		hp = h + mux->off + ((int)mux->skiphdr)*hl;
-		switch(mux->ctype){
-		case Cbyte:
-			if(*mux->val == *hp)
-				goto yes;
+		switch(mux->type){
+		case Tifc:
+			if(mux->len != IPaddrlen)
+				goto no;
+			for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+				for(v = mux->val; v < mux->e; v += IPaddrlen)
+					if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+						goto yes;
+			goto no;
+		case Tdata:
+			off = hl;
 			break;
-		case Cmbyte:
-			if((*hp & *mux->mask) == *mux->val)
-				goto yes;
-			break;
-		case Cshort:
-			if(*((ushort*)mux->val) == *(ushort*)hp)
-				goto yes;
-			break;
-		case Cmshort:
-			if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
-				goto yes;
-			break;
-		case Clong:
-			if(*((ulong*)mux->val) == *(ulong*)hp)
-				goto yes;
-			break;
-		case Cmlong:
-			if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
-		case Cifc:
-			if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
-				goto yes;
-			break;
-		case Cmifc:
-			if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
 		default:
-			v = mux->val;
-			for(e = mux->e; v < e; v = ve){
-				m = mux->mask;
-				hp = h + mux->off;
-				for(ve = v + mux->len; v < ve; v++){
-					if((*hp++ & *m++) != *v)
-						break;
-				}
-				if(v == ve)
-					goto yes;
-			}
+			off = 0;
+			break;
 		}
+		off += mux->off;
+		if(off < 0 || off + mux->len > BLEN(bp))
+			goto no;
+		for(v = mux->val; v < mux->e; v += mux->len)
+			if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+				goto yes;
+no:
 		mux = mux->no;
 		continue;
 yes:
@@ -743,28 +747,24 @@
 	if(c != nil){
 		/* tack on interface address */
 		bp = padblock(bp, IPaddrlen);
-		ipmove(bp->rp, ifc->lifc->local);
-		bp = concatblock(bp);
-		if(bp != nil)
-			if(qpass(c->rq, bp) < 0)
-				print("Q");
+		if(lifc == nil)
+			lifc = ifc->lifc;
+		ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+		qpass(c->rq, concatblock(bp));
 		return;
 	}
 
 nomatch:
 	/* doesn't match any filter, hand it to the specific protocol handler */
-	ip = (Ip4hdr*)bp->rp;
-	if((ip->vihl&0xF0)==0x40) {
-		p = f->t2p[ip->proto];
-	} else {
-		ip6 = (Ip6hdr*)bp->rp;
+	if(ip6 != nil)
 		p = f->t2p[ip6->proto];
-	}
-	if(p && p->rcv)
-		(*p->rcv)(p, ifc, bp);
 	else
-		freeblist(bp);
-	return;
+		p = f->t2p[ip4->proto];
+	if(p != nil && p->rcv != nil){
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+	freeblist(bp);
 }
 
 static int
@@ -780,11 +780,14 @@
 		n += snprint(buf+n, len-n, "\n");
 		return n;
 	}
-	n += snprint(buf+n, len-n, "h[%d:%d]&", 
-               mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])), 
-               mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
-	for(i = 0; i < mux->len; i++)
-		n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	n += snprint(buf+n, len-n, "%s[%d:%d]", 
+		mux->type == Tdata ? "data": "iph",
+		mux->off, mux->off+mux->len-1);
+	if(mux->mask != nil){
+		n += snprint(buf+n, len-n, "&");
+		for(i = 0; i < mux->len; i++)
+			n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	}
 	n += snprint(buf+n, len-n, "=");
 	v = mux->val;
 	for(j = 0; j < mux->n; j++){
diff -u a/os/ip//iproute.c b/os/ip//iproute.c
--- a/os/ip//iproute.c
+++ b/os/ip//iproute.c
@@ -12,10 +12,10 @@
 static void	calcd(Route*);
 
 /* these are used for all instances of IP */
-Route*	v4freelist;
-Route*	v6freelist;
-RWlock	routelock;
-ulong	v4routegeneration, v6routegeneration;
+static Route*	v4freelist;
+static Route*	v6freelist;
+static RWlock	routelock;
+static ulong	v4routegeneration, v6routegeneration;
 
 static void
 freeroute(Route *r)
@@ -22,6 +22,7 @@
 {
 	Route **l;
 
+	r->ref = 0;
 	r->left = nil;
 	r->right = nil;
 	if(r->type & Rv4)
@@ -35,9 +36,8 @@
 static Route*
 allocroute(int type)
 {
-	Route *r;
+	Route *r, **l;
 	int n;
-	Route **l;
 
 	if(type & Rv4){
 		n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
 		return;
 
 	l = allocroute(r->type);
+	l->left = r;
 	l->mid = *q;
 	*q = l;
-	l->left = r;
 }
 
 /*
@@ -99,11 +99,11 @@
  */
 enum
 {
-	Rpreceeds,
-	Rfollows,
-	Requals,
-	Rcontains,
-	Rcontained,
+	Rpreceeds,	/* a left of b */
+	Rfollows,	/* a right of b */
+	Requals,	/* a equals b */
+	Rcontains,	/* a contians b */
+	Roverlaps,	/* a overlaps b */
 };
 
 static int
@@ -112,44 +112,88 @@
 	if(a->type & Rv4){
 		if(a->v4.endaddress < b->v4.address)
 			return Rpreceeds;
-
 		if(a->v4.address > b->v4.endaddress)
 			return Rfollows;
-
 		if(a->v4.address <= b->v4.address
 		&& a->v4.endaddress >= b->v4.endaddress){
 			if(a->v4.address == b->v4.address
-			&& a->v4.endaddress == b->v4.endaddress)
-				return Requals;
+			&& a->v4.endaddress == b->v4.endaddress){
+				if(a->v4.source <= b->v4.source
+				&& a->v4.endsource >= b->v4.endsource){
+					if(a->v4.source == b->v4.source
+					&& a->v4.endsource == b->v4.endsource)
+						return Requals;
+					return Rcontains;
+				}
+				return Roverlaps;
+			}
 			return Rcontains;
 		}
-		return Rcontained;
+		return Roverlaps;
 	}
 
 	if(lcmp(a->v6.endaddress, b->v6.address) < 0)
 		return Rpreceeds;
-
 	if(lcmp(a->v6.address, b->v6.endaddress) > 0)
 		return Rfollows;
-
 	if(lcmp(a->v6.address, b->v6.address) <= 0
 	&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
 		if(lcmp(a->v6.address, b->v6.address) == 0
-		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
-				return Requals;
+		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+			if(lcmp(a->v6.source, b->v6.source) <= 0
+			&& lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+				if(lcmp(a->v6.source, b->v6.source) == 0
+				&& lcmp(a->v6.endsource, b->v6.endsource) == 0)
+					return Requals;
+				return Rcontains;
+			}
+			return Roverlaps;
+		}
 		return Rcontains;
 	}
+	return Roverlaps;
+}
 
-	return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+	if(a == b)
+		return 1;
+
+	if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+		return 0;
+
+	if(a->type & Rv4){
+		if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+		&& memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+			return 0;
+	} else {
+		if(ipcmp(a->v6.gate, IPnoaddr) != 0
+		&& ipcmp(a->v6.gate, b->v6.gate) != 0)
+			return 0;
+	}
+
+	if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+		return 0;
+
+	if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+		return 0;
+
+	return 1;
 }
 
 static void
 copygate(Route *old, Route *new)
 {
+	old->type = new->type;
+	old->ifc = new->ifc;
+	old->ifcid = new->ifcid;
 	if(new->type & Rv4)
 		memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
 	else
-		memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+		ipmove(old->v6.gate, new->v6.gate);
+	strncpy(old->tag, new->tag, sizeof(new->tag));
 }
 
 /*
@@ -162,12 +206,12 @@
 
 	l = p->left;
 	r = p->right;
-	p->left = 0;
-	p->right = 0;
+	p->left = nil;
+	p->right = nil;
 	addnode(f, root, p);
-	if(l)
+	if(l != nil)
 		walkadd(f, root, l);
-	if(r)
+	if(r != nil)
 		walkadd(f, root, r);
 }
 
@@ -180,16 +224,16 @@
 	Route *q;
 	int d;
 
-	if(p) {
+	if(p != nil) {
 		d = 0;
 		q = p->left;
-		if(q)
+		if(q != nil)
 			d = q->depth;
 		q = p->right;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		q = p->mid;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		p->depth = d+1;
 	}
@@ -210,8 +254,8 @@
 	 * rotate tree node
 	 */
 	p = *cur;
-	dl = 0; if(l = p->left) dl = l->depth;
-	dr = 0; if(r = p->right) dr = r->depth;
+	dl = 0; if((l = p->left) != nil) dl = l->depth;
+	dr = 0; if((r = p->right) != nil) dr = r->depth;
 
 	if(dl > dr+1) {
 		p->left = l->right;
@@ -239,7 +283,7 @@
 	Route *p;
 
 	p = *cur;
-	if(p == 0) {
+	if(p == nil) {
 		*cur = new;
 		new->depth = 1;
 		return;
@@ -269,15 +313,13 @@
 		 *  supercede the old entry if the old one isn't
 		 *  a local interface.
 		 */
-		if((p->type & Rifc) == 0){
-			p->type = new->type;
-			p->ifcid = -1;
+		if((p->type & Rifc) == 0)
 			copygate(p, new);
-		} else if(new->type & Rifc)
+		else if(new->type & Rifc)
 			p->ref++;
 		freeroute(new);
 		break;
-	case Rcontained:
+	case Roverlaps:
 		addnode(f, &p->mid, new);
 		break;
 	}
@@ -285,241 +327,316 @@
 	balancetree(cur);
 }
 
-#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ *  find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
 {
 	Route *p;
-	ulong sa;
-	ulong m;
-	ulong ea;
-	int h, eh;
 
-	m = nhgetl(mask);
-	sa = nhgetl(a) & m;
-	ea = sa | ~m;
+	for(;;){
+		p = *cur;
+		if(p == nil)
+			return nil;
+		switch(rangecompare(r, p)){
+		case Rcontains:
+			return nil;
+		case Rpreceeds:
+			cur = &p->left;
+			break;
+		case Rfollows:
+			cur = &p->right;
+			break;
+		case Roverlaps:
+			cur = &p->mid;
+			break;
+		case Requals:
+			if((p->type & Rifc) == 0 && !matchroute(r, p))
+				return nil;
+			return cur;
+		}
+	}
+}
 
-	eh = V4H(ea);
-	for(h=V4H(sa); h<=eh; h++) {
-		p = allocroute(Rv4 | type);
-		p->v4.address = sa;
-		p->v4.endaddress = ea;
-		memmove(p->v4.gate, gate, sizeof(p->v4.gate));
-		memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+	Route *x;
 
-		wlock(&routelock);
-		addnode(f, &f->v4root[h], p);
-		while(p = f->queue) {
-			f->queue = p->mid;
-			walkadd(f, &f->v4root[h], p->left);
-			freeroute(p);
-		}
-		wunlock(&routelock);
+	if(r == nil)
+		return nil;
+
+	if((x = looknodetag(r->mid, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->left, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->right, tag)) != nil)
+		return x;
+
+	if((r->type & Rifc) == 0){
+		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+			return r;
 	}
-	v4routegeneration++;
 
-	ipifcaddroute(f, Rv4, a, mask, gate, type);
+	return nil;
 }
 
-#define	V6H(a)	(((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
+#define	V6H(a)	(((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
 
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
 {
-	Route *p;
-	ulong sa[IPllen], ea[IPllen];
-	ulong x, y;
-	int h, eh;
+	Route **h, **e, *p;
 
-	/*
-	if(ISDFLT(a, mask, tag))
-		f->v6p->cdrouter = -1;
-	*/
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
 
+	for(; h <= e; h++) {
+		p = allocroute(r->type);
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		sa[h] = x & y;
-		ea[h] = x | ~y;
-	}
+		p->ifc = r->ifc;
+		p->ifcid = r->ifcid;
 
-	eh = V6H(ea);
-	for(h = V6H(sa); h <= eh; h++) {
-		p = allocroute(type);
-		memmove(p->v6.address, sa, IPaddrlen);
-		memmove(p->v6.endaddress, ea, IPaddrlen);
-		memmove(p->v6.gate, gate, IPaddrlen);
-		memmove(p->tag, tag, sizeof(p->tag));
+		if(r->type & Rv4)
+			memmove(&p->v4, &r->v4, sizeof(r->v4));
+		else
+			memmove(&p->v6, &r->v6, sizeof(r->v6));
 
-		wlock(&routelock);
-		addnode(f, &f->v6root[h], p);
-		while(p = f->queue) {
+		memmove(p->tag, r->tag, sizeof(r->tag));
+
+		addnode(f, h, p);
+		while((p = f->queue) != nil) {
 			f->queue = p->mid;
-			walkadd(f, &f->v6root[h], p->left);
+			walkadd(f, h, p->left);
 			freeroute(p);
 		}
-		wunlock(&routelock);
 	}
-	v6routegeneration++;
 
-	ipifcaddroute(f, 0, a, mask, gate, type);
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
 {
-	Route *p;
+	Route **h, **e, **l, *p;
 
-	for(;;){
-		p = *cur;
-		if(p == 0)
-			return 0;
-	
-		switch(rangecompare(r, p)){
-		case Rcontains:
-			return 0;
-		case Rpreceeds:
-			cur = &p->left;
-			break;
-		case Rfollows:
-			cur = &p->right;
-			break;
-		case Rcontained:
-			cur = &p->mid;
-			break;
-		case Requals:
-			return cur;
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
+
+	for(; h <= e; h++) {
+		if((l = looknode(h, r)) == nil)
+			continue;
+		p = *l;
+		if(--(p->ref) != 0)
+			continue;
+		*l = nil;
+		addqueue(&f->queue, p->left);
+		addqueue(&f->queue, p->mid);
+		addqueue(&f->queue, p->right);
+		freeroute(p);
+
+		while((p = f->queue) != nil) {
+			f->queue = p->mid;
+			walkadd(f, h, p->left);
+			freeroute(p);
 		}
 	}
+
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong m;
+	ulong x, y;
+	Route r;
+	int h;
 
-	m = nhgetl(mask);
-	rt.v4.address = nhgetl(a) & m;
-	rt.v4.endaddress = rt.v4.address | ~m;
-	rt.type = Rv4;
+	memset(&r, 0, sizeof(r));
 
-	eh = V4H(rt.v4.endaddress);
-	for(h=V4H(rt.v4.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v4root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v4root[h], p->left);
-					freeroute(p);
-				}
-			}
+	r.type = type;
+
+	if(type & Rv4){
+		x = nhgetl(a+IPv4off);
+		y = nhgetl(mask+IPv4off);
+		r.v4.address = x & y;
+		r.v4.endaddress = x | ~y;
+
+		x = nhgetl(s+IPv4off);
+		y = nhgetl(smask+IPv4off);
+		if(y != 0)
+			r.type |= Rsrc;
+		r.v4.source = x & y;
+		r.v4.endsource = x | ~y;
+
+		memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+	} else {
+		for(h = 0; h < IPllen; h++){
+			x = nhgetl(a+4*h);
+			y = nhgetl(mask+4*h);
+			r.v6.address[h] = x & y;
+			r.v6.endaddress[h] = x | ~y;
+
+			x = nhgetl(s+4*h);
+			y = nhgetl(smask+4*h);
+			if(y != 0)
+				r.type |= Rsrc;
+			r.v6.source[h] = x & y;
+			r.v6.endsource[h] = x | ~y;
 		}
-		if(dolock)
-			wunlock(&routelock);
+
+		memmove(r.v6.gate, gate, IPaddrlen);
 	}
-	v4routegeneration++;
 
-	ipifcremroute(f, Rv4, a, mask);
+	if(ifc != nil){
+		r.ifc = ifc;
+		r.ifcid = ifc->ifcid;
+	}
+
+	if(tag != nil)
+		strncpy(r.tag, tag, sizeof(r.tag));
+
+	return r;
 }
 
 void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong x, y;
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routeadd(f, &r);
+	wunlock(&routelock);
+}
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		rt.v6.address[h] = x & y;
-		rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routerem(f, &r);
+	wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+	uchar local[IPaddrlen], gate[IPaddrlen];
+	Ipifc *ifc;
+	int i;
+
+	ifc = r->ifc;
+	if(ifc != nil && ifc->ifcid == r->ifcid)
+		return ifc;
+
+	if(r->type & Rsrc) {
+		if(r->type & Rv4) {
+			hnputl(local+IPv4off, r->v4.source);
+			memmove(local, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(local+4*i, r->v6.source[i]);
+		}
+	} else {
+		ipmove(local, IPnoaddr);
 	}
-	rt.type = 0;
 
-	eh = V6H(rt.v6.endaddress);
-	for(h=V6H(rt.v6.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v6root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v6root[h], p->left);
-					freeroute(p);
-				}
-			}
+	if(r->type & Rifc) {
+		if(r->type & Rv4) {
+			hnputl(gate+IPv4off, r->v4.address);
+			memmove(gate, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(gate+4*i, r->v6.address[i]);
 		}
-		if(dolock)
-			wunlock(&routelock);
+	} else {
+		if(r->type & Rv4)
+			v4tov6(gate, r->v4.gate);
+		else
+			ipmove(gate, r->v6.gate);
 	}
-	v6routegeneration++;
 
-	ipifcremroute(f, 0, a, mask);
+	if((ifc = findipifc(f, local, gate, r->type)) == nil)
+		return nil;
+
+	r->ifc = ifc;
+	r->ifcid = ifc->ifcid;
+	return ifc;
 }
 
+/*
+ * v4lookup, v6lookup:
+ *  lookup a route to destination address a from source address s
+ *  and return the route. returns nil if no route was found.
+ *  an optional Routehint can be passed in rh to cache the lookup.
+ *
+ *  for v4lookup, addresses are in 4 byte format.
+ */
 Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
+	ulong la, ls;
 	Route *p, *q;
-	ulong la;
-	uchar gate[IPaddrlen];
 	Ipifc *ifc;
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v4routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
 	la = nhgetl(a);
+	ls = nhgetl(s);
 	q = nil;
-	for(p=f->v4root[V4H(la)]; p;)
-		if(la >= p->v4.address) {
-			if(la <= p->v4.endaddress) {
-				q = p;
-				p = p->mid;
-			} else
-				p = p->right;
-		} else
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
 			p = p->left;
-
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			hnputl(gate+IPv4off, q->v4.address);
-			memmove(gate, v4prefix, IPv4off);
-		} else
-			v4tov6(gate, q->v4.gate);
-		ifc = findipifc(f, gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		if(p->type & Rsrc){
+			if(ls < p->v4.source){
+				p = p->mid;
+				continue;
+			}
+			if(ls > p->v4.endsource){
+				p = p->mid;
+				continue;
+			}
+		}
+		q = p;
+		p = p->mid;
 	}
 
-	if(c != nil){
-		c->r = q;
-		c->rgen = v4routegeneration;
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v4routegeneration;
 	}
 
 	return q;
@@ -526,29 +643,35 @@
 }
 
 Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
-	Route *p, *q;
-	ulong la[IPllen];
-	int h;
+	ulong la[IPllen], ls[IPllen];
 	ulong x, y;
-	uchar gate[IPaddrlen];
+	Route *p, *q;
 	Ipifc *ifc;
+	int h;
 
-	if(memcmp(a, v4prefix, IPv4off) == 0){
-		q = v4lookup(f, a+IPv4off, c);
-		if(q != nil)
-			return q;
+	if(isv4(s)){
+		if(isv4(a))
+			return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+		return nil;
 	}
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v6routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
-	for(h = 0; h < IPllen; h++)
+	for(h = 0; h < IPllen; h++){
 		la[h] = nhgetl(a+4*h);
+		ls[h] = nhgetl(s+4*h);
+	}
 
-	q = 0;
-	for(p=f->v6root[V6H(la)]; p;){
+	q = nil;
+	for(p = f->v6root[V6H(la)]; p != nil;){
 		for(h = 0; h < IPllen; h++){
 			x = la[h];
 			y = p->v6.address[h];
@@ -571,42 +694,202 @@
 			}
 			break;
 		}
+		if(p->type & Rsrc){
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.source[h];
+				if(x == y)
+					continue;
+				if(x < y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.endsource[h];
+				if(x == y)
+					continue;
+				if(x > y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+		}
 		q = p;
 		p = p->mid;
 next:		;
 	}
 
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			for(h = 0; h < IPllen; h++)
-				hnputl(gate+4*h, q->v6.address[h]);
-			ifc = findipifc(f, gate, q->type);
-		} else
-			ifc = findipifc(f, q->v6.gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v6routegeneration;
 	}
-	if(c != nil){
-		c->r = q;
-		c->rgen = v6routegeneration;
-	}
 	
 	return q;
 }
 
+/*
+ * v4source, v6source:
+ *  lookup a route to destination address a and also find
+ *  a suitable source address s on the outgoing interface.
+ *  return the route on success or nil when no route
+ *  was found.
+ *
+ *  for v4source, addresses are in 4 byte format.
+ */
+Route*
+v4source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPv4addrlen];
+	int splen;
+	ulong x, la;
+	Route *p, *q;
+	Ipifc *ifc;
+
+	q = nil;
+	la = nhgetl(a);
+	rlock(&routelock);
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
+			p = p->left;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+				splen++;
+			hnputl(src, p->v4.source);
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+			p = p->mid;
+			continue;
+		}
+		memmove(s, src, IPv4addrlen);
+		q = p;
+		p = p->mid;
+	}
+	runlock(&routelock);
+	return q;
+}
+
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPaddrlen];
+	int splen, h;
+	ulong x, y, la[IPllen];
+	Route *p, *q;
+	Ipifc *ifc;
+
+	q = nil;
+	for(h = 0; h < IPllen; h++)
+		la[h] = nhgetl(a+4*h);
+	rlock(&routelock);
+	for(p = f->v6root[V6H(la)]; p != nil;){
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.address[h];
+			if(x == y)
+				continue;
+			if(x < y){
+				p = p->left;
+				goto next;
+			}
+			break;
+		}
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.endaddress[h];
+			if(x == y)
+				continue;
+			if(x > y){
+				p = p->right;
+				goto next;
+			}
+			break;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(h = 0; h < IPllen; h++){
+				hnputl(src+4*h, p->v6.source[h]);
+				if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+					for(; x & 0x80000000UL; x <<= 1)
+						splen++;
+					break;
+				}
+				splen += 32;
+			}
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv6local(ifc, src, splen, a)){
+			p = p->mid;
+			continue;
+		}
+		ipmove(s, src);
+		q = p;
+		p = p->mid;
+next:		;
+	}
+	runlock(&routelock);
+	return q;
+}
+
+static int
+parseroutetype(char *p)
+{
+	int type = 0;
+	switch(*p++){
+	default:	return -1;	
+	case '4':	type |= Rv4;
+	case '6':	break;
+	}
+	for(;;) switch(*p++){
+	default: 
+		return -1;
+	case 'i':
+		if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+		break;
+	case 'u':
+		if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+		break;
+	case 'b':
+		if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+		break;
+	case 'm':
+		if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+		break;
+	case 'p':
+		if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+		break;
+	case '\0':
+		return type;
+	}
+}
+
 void
-routetype(int type, char *p)
+routetype(int type, char p[8])
 {
-	memset(p, ' ', 4);
-	p[4] = 0;
 	if(type & Rv4)
 		*p++ = '4';
 	else
 		*p++ = '6';
+
 	if(type & Rifc)
 		*p++ = 'i';
+
 	if(type & Runi)
 		*p++ = 'u';
 	else if(type & Rbcast)
@@ -613,14 +896,14 @@
 		*p++ = 'b';
 	else if(type & Rmulti)
 		*p++ = 'm';
+
 	if(type & Rptpt)
-		*p = 'p';
+		*p++ = 'p';
+	*p = 0;
 }
 
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
 {
 	int i;
 
@@ -627,8 +910,16 @@
 	if(r->type & Rv4){
 		memmove(addr, v4prefix, IPv4off);
 		hnputl(addr+IPv4off, r->v4.address);
+
 		memset(mask, 0xff, IPv4off);
 		hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+		memmove(src, v4prefix, IPv4off);
+		hnputl(src+IPv4off, r->v4.source);
+
+		memset(smask, 0xff, IPv4off);
+		hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
 		memmove(gate, v4prefix, IPv4off);
 		memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
 	} else {
@@ -635,162 +926,186 @@
 		for(i = 0; i < IPllen; i++){
 			hnputl(addr + 4*i, r->v6.address[i]);
 			hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+			hnputl(src + 4*i, r->v6.source[i]);
+			hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
 		}
 		memmove(gate, r->v6.gate, IPaddrlen);
 	}
+}
 
-	routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+	uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+	char type[8], ifbuf[4], *iname;
 
-	if(r->ifc)
-		*nifc = r->ifc->conv->x;
+	convroute(r, addr, mask, src, smask, gate);
+	routetype(r->type, type);
+	if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+		snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
 	else
-		*nifc = -1;
+		iname = "-";
+	return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+		addr, mask, gate, type, r->tag, iname, src, smask);
 }
 
-/*
- *  this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
 {
-	int nifc, n;
-	char t[5], *iname, ifbuf[5];
-	uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
-	char *p;
+	int	o;
+	int	h;
+	char*	p;
+	char*	e;
+};
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	iname = "-";
-	if(nifc != -1) {
-		iname = ifbuf;
-		snprint(ifbuf, sizeof ifbuf, "%d", nifc);
-	}
-	p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+	int n = seprintroute(rw->p, rw->e, r) - rw->p;
 	if(rw->o < 0){
-		n = p - rw->p;
 		if(n > -rw->o){
-			memmove(rw->p, rw->p-rw->o, n+rw->o);
-			rw->p = p + rw->o;
+			memmove(rw->p, rw->p - rw->o, n + rw->o);
+			rw->p += n + rw->o;
 		}
 		rw->o += n;
 	} else
-		rw->p = p;
+		rw->p += n;
+	return rw->p < rw->e;
 }
 
-/*
- *  recurse descending tree, applying the function in Routewalk
- */
 static int
 rr(Route *r, Routewalk *rw)
 {
 	int h;
 
-	if(rw->e <= rw->p)
-		return 0;
 	if(r == nil)
 		return 1;
-
 	if(rr(r->left, rw) == 0)
 		return 0;
-
 	if(r->type & Rv4)
 		h = V4H(r->v4.address);
 	else
 		h = V6H(r->v6.address);
-
-	if(h == rw->h)
-		rw->walk(r, rw);
-
+	if(h == rw->h){
+		if(rr1(rw, r) == 0)
+			return 0;
+	}
 	if(rr(r->mid, rw) == 0)
 		return 0;
-
 	return rr(r->right, rw);
 }
 
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
 {
+	Routewalk rw[1];
+
+	rw->p = p;
+	rw->e = p+n;
+	rw->o = -offset;
+	if(rw->o > 0)
+		return 0;
+
 	rlock(&routelock);
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
 			if(rr(f->v4root[rw->h], rw) == 0)
 				break;
 	}
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
 			if(rr(f->v6root[rw->h], rw) == 0)
 				break;
 	}
 	runlock(&routelock);
-}
 
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
-	Routewalk rw;
-
-	rw.p = p;
-	rw.e = p+n;
-	rw.o = -offset;
-	rw.walk = sprintroute;
-
-	ipwalkroutes(f, &rw);
-
-	return rw.p - p;
+	return rw->p - p;
 }
 
 /*
- *  this code is not in routeflush to reduce stack size
+ *	4	add	addr	mask	gate
+ *	5	add	addr	mask	gate			ifc
+ *	6	add	addr	mask	gate				src	smask
+ *	7	add	addr	mask	gate			ifc	src	smask
+ *	8	add	addr	mask	gate		tag	ifc	src	smask
+ *	9	add	addr	mask	gate	type	tag	ifc	src	smask
+ *	3	remove	addr	mask
+ *	4	remove	addr	mask	gate
+ *	5	remove	addr	mask					src	smask
+ *	6	remove	addr	mask	gate				src	smask
+ *	7	remove	addr	mask	gate			ifc	src	smask
+ *	8	remove	addr	mask	gate		tag	ifc	src	smask
+ *	9	remove	addr	mask	gate	type	tag	ifc	src	smask
  */
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
 {
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
+	uchar addr[IPaddrlen], mask[IPaddrlen];
+	uchar src[IPaddrlen], smask[IPaddrlen];
 	uchar gate[IPaddrlen];
-	char t[5];
-	int nifc;
+	Ipifc *ifc;
+	char *tag;
+	int type;
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	if(r->type & Rv4)
-		v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
-	else
-		v6delroute(f, addr, mask, dolock);
-}
+	type = 0;
+	tag = nil;
+	ifc = nil;
+	ipmove(gate, IPnoaddr);
+	ipmove(src, IPnoaddr);
+	ipmove(smask, IPnoaddr);
 
-/*
- *  recurse until one route is deleted
- *    returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
-	if(r == nil)
-		return 0;
-	if(routeflush(f, r->mid, tag))
-		return 1;
-	if(routeflush(f, r->left, tag))
-		return 1;
-	if(routeflush(f, r->right, tag))
-		return 1;
-	if((r->type & Rifc) == 0){
-		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
-			delroute(f, r, 0);
-			return 1;
-		}
+	if(argc < 3)
+		error(Ebadctl);
+	if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+		error(Ebadip);
+
+	if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+		if(argc < 4)
+			error(Ebadctl);
+		if(parseip(gate, argv[3]) == -1)
+			error(Ebadip);
 	}
-	return 0;
+	if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+		if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+			error(Ebadip);
+	}
+	if(argc == 5 && strcmp(argv[0], "add") == 0)
+		ifc = findipifcstr(f, argv[4]);
+	if(argc > 6)
+		ifc = findipifcstr(f, argv[argc-3]);
+	if(argc > 7)
+		tag = argv[argc-4];
+	if(argc > 8){
+		if((type = parseroutetype(argv[argc-5])) < 0)
+			error(Ebadctl);
+	} else {
+		if(isv4(addr))
+			type |= Rv4;
+	}
+	if(argc > 9)
+		error(Ebadctl);
+
+	if(type & Rv4){
+		if(!isv4(addr))
+			error(Ebadip);
+		if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+			error(Ebadip);
+		if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+			error(Ebadip);
+	} else {
+		if(isv4(addr))
+			error(Ebadip);
+	}
+
+	return mkroute(addr, mask, src, smask, gate, type, ifc, tag);	
 }
 
 long
 routewrite(Fs *f, Chan *c, char *p, int n)
 {
-	int h, changed;
-	char *tag;
 	Cmdbuf *cb;
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar gate[IPaddrlen];
-	IPaux *a, *na;
+	IPaux *a;
+	Route *x, r;
 
 	cb = parsecmd(p, n);
 	if(waserror()){
@@ -797,54 +1112,44 @@
 		free(cb);
 		nexterror();
 	}
-
+	if(cb->nf < 1)
+		error("short control request");
 	if(strcmp(cb->f[0], "flush") == 0){
-		tag = cb->f[1];
+		char *tag = cb->nf < 2 ? nil : cb->f[1];
+		int h;
+
+		wlock(&routelock);
 		for(h = 0; h < nelem(f->v4root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v4root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v4root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+				routerem(f, &r);
 			}
 		for(h = 0; h < nelem(f->v6root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v6root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v6root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+				routerem(f, &r);
 			}
-	} else if(strcmp(cb->f[0], "remove") == 0){
-		if(cb->nf < 3)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
-		else
-			v6delroute(f, addr, mask, 1);
-	} else if(strcmp(cb->f[0], "add") == 0){
-		if(cb->nf < 4)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		parseip(gate, cb->f[3]);
-		tag = "none";
-		if(c != nil){
+		wunlock(&routelock);
+	} else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+		r = parseroute(f, cb->f, cb->nf);
+		if(*r.tag == 0){
 			a = c->aux;
-			tag = a->tag;
+			strncpy(r.tag, a->tag, sizeof(r.tag));
 		}
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+		wlock(&routelock);
+		if(strcmp(cb->f[0], "add") == 0)
+			routeadd(f, &r);
 		else
-			v6addroute(f, tag, addr, mask, gate, 0);
+			routerem(f, &r);
+		wunlock(&routelock);
 	} else if(strcmp(cb->f[0], "tag") == 0) {
 		if(cb->nf < 2)
 			error(Ebadarg);
-
 		a = c->aux;
-		na = newipaux(a->owner, cb->f[1]);
-		c->aux = na;
+		c->aux = newipaux(a->owner, cb->f[1]);
 		free(a);
-	}
+	} else
+		error(Ebadctl);
 
 	poperror();
 	free(cb);
diff -u a/os/ip//ipv6.c b/os/ip//ipv6.c
--- a/os/ip//ipv6.c
+++ b/os/ip//ipv6.c
@@ -8,250 +8,127 @@
 #include	"ip.h"
 #include	"ipv6.h"
 
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= (32*1024),	/* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp)	(((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
-
-typedef struct	IP	IP;
-typedef struct	Fragment4	Fragment4;
-typedef struct	Fragment6	Fragment6;
-typedef struct	Ipfrag	Ipfrag;
-
-Block*		ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void		ipfragfree6(IP*, Fragment6*);
-Fragment6*	ipfragallo6(IP*);
+static Block*		ip6reassemble(IP*, int, Block*);
+static Fragment6*	ipfragallo6(IP*);
+static void		ipfragfree6(IP*, Fragment6*);
+static Block*		procopts(Block *bp);
 static Block*		procxtns(IP *ip, Block *bp, int doreasm);
-int		unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block*		procopts(Block *bp);
+static int		unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
 
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
 {
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
+	v6params *v6p;
 
-	Nstats,
-};
+	v6p = smalloc(sizeof(v6params));
 
-static char *statnames[] =
-{
-[Forwarding]	"Forwarding",
-[DefaultTTL]	"DefaultTTL",
-[InReceives]	"InReceives",
-[InHdrErrors]	"InHdrErrors",
-[InAddrErrors]	"InAddrErrors",
-[ForwDatagrams]	"ForwDatagrams",
-[InUnknownProtos]	"InUnknownProtos",
-[InDiscards]	"InDiscards",
-[InDelivers]	"InDelivers",
-[OutRequests]	"OutRequests",
-[OutDiscards]	"OutDiscards",
-[OutNoRoutes]	"OutNoRoutes",
-[ReasmTimeout]	"ReasmTimeout",
-[ReasmReqds]	"ReasmReqds",
-[ReasmOKs]	"ReasmOKs",
-[ReasmFails]	"ReasmFails",
-[FragOKs]	"FragOKs",
-[FragFails]	"FragFails",
-[FragCreates]	"FragCreates",
-};
+	v6p->rp.mflag		= 0;		/* default not managed */
+	v6p->rp.oflag		= 0;
+	v6p->rp.maxraint	= 600000;	/* millisecs */
+	v6p->rp.minraint	= 200000;
+	v6p->rp.linkmtu		= 0;		/* no mtu sent */
+	v6p->rp.reachtime	= 0;
+	v6p->rp.rxmitra		= 0;
+	v6p->rp.ttl		= MAXTTL;
+	v6p->rp.routerlt	= (3 * v6p->rp.maxraint) / 1000;
 
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
+	v6p->hp.rxmithost	= 1000;		/* v6 RETRANS_TIMER */
 
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
+	f->v6p			= v6p;
+}
 
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
-	int tentative;
-	Ipifc *ifc;
+	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+	int morefrags, blklen, rv = 0;
 	uchar *gate, nexthdr;
-	Ip6hdr *eh;
-	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
-	Route *r, *sr;
-	Fraghdr6 fraghdr;
 	Block *xp, *nb;
+	Fraghdr6 fraghdr;
 	IP *ip;
-	int rv = 0;
+	Ip6hdr *eh;
+	Ipifc *ifc;
+	Route *r;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip6hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip6hdr*)bp->rp;
+	assert(BLEN(bp) >= IP6HDR);
 	len = blocklen(bp);
-	
-	tentative = iptentative(f, eh->src);
-	if(tentative){
-		netlog(f, Logip, "reject tx of packet with tentative src address\n");
-		goto free;
-	}
-
-	if(gating){
-		chunk = nhgets(eh->ploadlen);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk + IPV6HDR_LEN < len)
-			len = chunk + IPV6HDR_LEN;
-	}
-
 	if(len >= IP_MAX){
-//		print("len > IP_MAX, free\n");
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v6lookup(f, eh->dst, c);
-	if(r == nil){
-//		print("no route for %I, src %I free\n", eh->dst, eh->src);
+	r = v6lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v6lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v6.gate;
 
-	if(!gating)
-		eh->vcf[0] = IP_VER6;
-	eh->ttl = ttl;
-	if(!gating) {
-		eh->vcf[0] |= (tos >> 4);
-		eh->vcf[1] = (tos << 4);
-	}
-
-	if(!canrlock(ifc)) {
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
 	}
-
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
 
-	if(ifc->m == nil) {
+	if(ifc->m == nil)
 		goto raise;
+
+	if(!gating){
+		eh->vcf[0] = IP_VER6;
+		eh->vcf[0] |= tos >> 4;
+		eh->vcf[1]  = tos << 4;
 	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		hnputs(eh->ploadlen, len-IPV6HDR_LEN);
-		ifc->m->bwrite(ifc, bp, V6, gate);
+		hnputs(eh->ploadlen, len - IP6HDR);
+		ipifcoput(ifc, bp, V6, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-	if(gating) 
-	if(ifc->reassemble <= 0) {
-
-		/* v6 intermediate nodes are not supposed to fragment pkts;
-		   we fragment if ifc->reassemble is turned on; an exception
-		   needed for nat.
+	if(gating && !ifc->reassemble) {
+		/*
+		 * v6 intermediate nodes are not supposed to fragment pkts;
+		 * we fragment if ifc->reassemble is turned on; an exception
+		 * needed for nat.
 		 */
-
 		ip->stats[OutDiscards]++;
 		icmppkttoobig6(f, ifc, bp);
-		netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
 		goto raise;
 	}
-		
+
 	/* start v6 fragmentation */
-	uflen = unfraglen(bp, &nexthdr, 1);
+	uflen = unfraglen(bp, &nexthdr, 1, 0);
+	if(uflen < IP6HDR || nexthdr == FH) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+		goto raise;
+	}
 	if(uflen > medialen) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
 		goto raise;
 	}
 
@@ -260,7 +137,7 @@
 	if(seglen < 8) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
@@ -271,13 +148,13 @@
 
 	xp = bp;
 	offset = uflen;
-	while (xp != nil && offset && offset >= BLEN(xp)) {
+	while (offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
 	xp->rp += offset;
 
-	fragoff = 0; 
+	fragoff = 0;
 	morefrags = 1;
 
 	for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
 		memmove(nb->wp, eh, uflen);
 		nb->wp += uflen;
 
-		hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+		hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
 		fraghdr.offsetRM[1] |= morefrags;
 		memmove(nb->wp, &fraghdr, IP6FHDR);
 		nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
 		/* Copy data */
 		chunk = seglen;
 		while (chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -316,10 +193,9 @@
 			xp->rp += blklen;
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
-				xp = xp->next; 
+				xp = xp->next;
 		}
-
-		ifc->m->bwrite(ifc, nb, V6, gate);
+		ipifcoput(ifc, nb, V6, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
 	runlock(ifc);
 	poperror();
 free:
-	freeblist(bp);	
+	freeblist(bp);
 	return rv;
 }
 
@@ -335,16 +211,10 @@
 void
 ipiput6(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos;
-	uchar proto;
+	int hl, len, hop, tos;
+	IP *ip;
 	Ip6hdr *h;
 	Proto *p;
-	int notforme;
-	int tentative;
-	uchar v6dst[IPaddrlen];
-	IP *ip;
-	Route *r, *sr;
 
 	ip = f->ip;
 	ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
 			return;
 	}
 
-	h = (Ip6hdr *)(bp->rp);
-
-	memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
-	notforme = ipforme(f, v6dst) == 0;
-	tentative = iptentative(f, v6dst);
-  
-	if(tentative && (h->proto != ICMPv6)) {
-		print("tentative addr, drop\n");
-		freeblist(bp);
-		return;
-	}
-
 	/* Check header version */
-	if(BLKIPVER(bp) != IP_VER6) {
+	h = (Ip6hdr*)bp->rp;
+	if((h->vcf[0] & 0xF0) != IP_VER6) {
 		ip->stats[InHdrErrors]++;
 		netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
-		freeblist(bp);
+		goto drop;
+	}
+	len = IP6HDR + nhgets(h->ploadlen);
+	if((bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
 		return;
 	}
+	h = (Ip6hdr*)bp->rp;
 
 	/* route */
-	if(notforme) {
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
+	if(!ipforme(f, h->dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
+
+		if(!ip->iprouting)
+			goto drop;
+
+		/* don't forward to link-local destinations */
+		if(islinklocal(h->dst) ||
+		   (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+			ip->stats[OutDiscards]++;
+			goto drop;
 		}
+			
 		/* don't forward to source's network */
-		sr = v6lookup(f, h->src, nil);
-		r = v6lookup(f, h->dst, nil);
-
-		if(r == nil || sr == r){
+		rh.r = nil;
+		r  = v6lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
 			icmpttlexceeded6(f, ifc, bp);
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* process headers & reassemble if the interface expects it */
-		bp = procxtns(ip, bp, r->ifc->reassemble);
-
+		bp = procxtns(ip, bp, nifc->reassemble);
 		if(bp == nil)
 			return;
 
 		ip->stats[ForwDatagrams]++;
-		h = (Ip6hdr *) (bp->rp);
-		tos = IPV6CLASS(h);
+		h = (Ip6hdr*)bp->rp;
+		tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
 		hop = h->ttl;
-		ipoput6(f, bp, 1, hop-1, tos, nil);
+		ipoput6(f, bp, 1, hop-1, tos, &rh);
 		return;
 	}
 
 	/* reassemble & process headers if needed */
 	bp = procxtns(ip, bp, 1);
-
 	if(bp == nil)
 		return;
 
-	h = (Ip6hdr *) (bp->rp);
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	h = (Ip6hdr*)bp->rp;
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
 
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -447,20 +318,20 @@
 /*
  * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
  */
-void
+static void
 ipfragfree6(IP *ip, Fragment6 *frag)
 {
 	Fragment6 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	memset(frag->src, 0, IPaddrlen);
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	memset(frag->src, 0, IPaddrlen);
+	memset(frag->dst, 0, IPaddrlen);
 
 	l = &ip->flisthead6;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -470,13 +341,12 @@
 
 	frag->next = ip->fragfree6;
 	ip->fragfree6 = frag;
-
 }
 
 /*
  * ipfragallo6 - copied from ipfragalloc4
  */
-Fragment6*
+static Fragment6*
 ipfragallo6(IP *ip)
 {
 	Fragment6 *f;
@@ -483,7 +353,7 @@
 
 	while(ip->fragfree6 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead6; f->next; f = f->next)
+		for(f = ip->flisthead6; f->next != nil; f = f->next)
 			;
 		ipfragfree6(ip, f);
 	}
@@ -497,108 +367,109 @@
 }
 
 static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
-	int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
 	uchar proto;
-	Ip6hdr *h;
+	int offset;
 
-	h = (Ip6hdr *) (bp->rp);
-	offset = unfraglen(bp, &proto, 0);
-
-	if((proto == FH) && (doreasm != 0)) {
-		bp = ip6reassemble(ip, offset, bp, h);
-		if(bp == nil) 
-			return nil; 
-		offset = unfraglen(bp, &proto, 0);
+	offset = unfraglen(bp, &proto, 0, doreasm);
+	if(offset >= IP6HDR && proto == FH && doreasm) {
+		bp = ip6reassemble(ip, offset, bp);
+		if(bp == nil)
+			return nil;
+		offset = unfraglen(bp, &proto, 0, 0);
+		if(proto == FH)
+			offset = -1;
 	}
-
-	if(proto == DOH || offset > IP6HDR) 
+	if(offset < IP6HDR){
+		ip->stats[InHdrErrors]++;
+		ip->stats[InDiscards]++;
+		freeblist(bp);
+		return nil;
+	}
+	if(proto == DOH || offset > IP6HDR)
 		bp = procopts(bp);
-
 	return bp;
 }
 
-
-/*	returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- *	hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- *	of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- *	field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
  */
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
 {
-	uchar *p, *q;
-	int ufl, hs;
+	uchar *e, *p, *q;
 
+	e = bp->wp;
 	p = bp->rp;
-	q = p+6;	/* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+	q = p+6;   /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
 	*nexthdr = *q;
-	ufl = IP6HDR;
-	p += ufl;
-
-	for(;;) {
-		if(*nexthdr == HBH || *nexthdr == RH) {
-			*nexthdr = *p;
-			hs = ((int)*(p+1) + 1) * 8;
-			ufl += hs;
-			q = p;
-			p += hs;
-		}
-		else
-			break;
+	p += IP6HDR;
+	while(*nexthdr == HBH || *nexthdr == RH){
+		if(p+2 > e)
+			return -1;
+		q = p;
+		*nexthdr = *q;
+		p += ((int)p[1] + 1) * 8;
 	}
-
-	if(*nexthdr == FH)
-		*q = *p;
-
-	if(setfh)
+	if(p > e)
+		return -1;
+	if(*nexthdr == FH){
+		if(p+IP6FHDR > e || *p == FH)
+			return -1;
+		if(popfh)
+			*q = *p;
+	} else if(setfh)
 		*q = FH;
-
-	return ufl;
+	return p - bp->rp;
 }
 
-Block*
+static Block*
 procopts(Block *bp)
 {
 	return bp;
 }
 
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
 {
-
-	int fend, offset;
+	int offset, ovlap, fragsize, len;
+	uchar src[IPaddrlen], dst[IPaddrlen];
 	uint id;
-	Fragment6 *f, *fnext;
+	Block *bl, **l, *prev;
 	Fraghdr6 *fraghdr;
-	uchar src[IPaddrlen], dst[IPaddrlen];
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Fragment6 *f, *fnext;
+	Ipfrag *fp, *fq;
+	Ip6hdr* ih;
 
-	fraghdr = (Fraghdr6 *) (bp->rp + uflen);
-	memmove(src, ih->src, IPaddrlen);
-	memmove(dst, ih->dst, IPaddrlen);
-	id = nhgetl(fraghdr->id);
-	offset = nhgets(fraghdr->offsetRM) & ~7;
-
 	/*
-	 *  block lists are too hard, pullupblock into a single block
+	 *  block lists are too hard, concatblock into a single block
 	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip6hdr *)(bp->rp);
-	}
+	bp = concatblock(bp);
 
+	ih = (Ip6hdr*)bp->rp;
+	fraghdr = (Fraghdr6*)(bp->rp + uflen);
+	id = nhgetl(fraghdr->id);
+	offset = nhgets(fraghdr->offsetRM);
+	fragsize = BLEN(bp) - uflen - IP6FHDR;
 
+	memmove(src, ih->src, IPaddrlen);
+	memmove(dst, ih->dst, IPaddrlen);
+
 	qlock(&ip->fraglock6);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead6; f; f = fnext){
+	for(f = ip->flisthead6; f != nil; f = fnext){
 		fnext = f->next;
-		if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+		if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
 		}
 	}
 
-
 	/*
 	 *  if this isn't a fragmented packet, accept it
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(nhgets(fraghdr->offsetRM)==0) {	// first frag is also the last
+	if((offset & ~6) == 0) {	/* 1st frag is also last */
 		if(f != nil) {
-			ipfragfree6(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree6(ip, f);
 		}
 		qunlock(&ip->fraglock6);
+
+		/* get rid of frag header */
+		memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+		bp->rp += IP6FHDR;
+		ih = (Ip6hdr*)bp->rp;
+		hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset;
-	BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = offset & ~7;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -638,8 +516,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock6);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock6);
+
 		return nil;
 	}
 
@@ -649,7 +528,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -656,15 +535,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock6);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -673,29 +553,27 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 
 		/* Take completely covered segments out */
-
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
-
-			if(ovlap <= 0) 
-				break; 
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, uflen);
-				(*l)->rp += ovlap;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
+			if(ovlap <= 0)
 				break;
+			if(ovlap < fq->flen) {
+				/* move up ip and frag header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
+				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -703,45 +581,55 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-	
-		fraghdr = (Fraghdr6 *) (bl->rp + uflen);
-		if((fraghdr->offsetRM[1] & 1) == 0) {
 
-			bl = f->blist;
+		fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+		if(fraghdr->offsetRM[1] & 1)
+			continue;
 
-			/* get rid of frag header in first fragment */
+		bl = f->blist;
+		fq = (Ipfrag*)bl->base;
 
-			memmove(bl->rp + IP6FHDR, bl->rp, uflen);
-			bl->rp += IP6FHDR;
-			len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
-			bl->wp = bl->rp + len + IP6HDR;
+		/* get rid of frag header in first fragment */
+		memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+		bl->rp += IP6FHDR;
+		len = BLEN(bl);
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += uflen + IP6FHDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
 
-			bl = f->blist;
-			f->blist = nil;
+		if(len >= IP_MAX){
 			ipfragfree6(ip, f);
-			ih = (Ip6hdr*)(bl->rp);
-			hnputs(ih->ploadlen, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock6);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree6(ip, f);
+
+		ih = (Ip6hdr*)bl->rp;
+		hnputs(ih->ploadlen, len-IP6HDR);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock6);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock6);
 	return nil;
 }
-
diff -u a/os/ip//ipv6.h b/os/ip//ipv6.h
--- a/os/ip//ipv6.h
+++ b/os/ip//ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration.  rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
 #define isv6mcast(addr)	  ((addr)[0] == 0xff)
 #define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
 
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np)	(nhgets((np)->ploadlen) > 24)
+#define issmcast(addr)	(memcmp((addr), v6solicitednode, 13) == 0)
 
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
 
-typedef struct Ip6hdr     Ip6hdr;
-typedef struct Opthdr     Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6    Fraghdr6;
-
-struct Ip6hdr {
-	uchar vcf[4];       	// version:4, traffic class:8, flow label:20
-	uchar ploadlen[2];  	// payload length: packet length - 40
-	uchar proto;		// next header type
-	uchar ttl;          	// hop limit
-	uchar src[IPaddrlen];
-	uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
-	uchar nexthdr;
-	uchar len;
-};
-
-struct Routinghdr {
-	uchar nexthdr;
-	uchar len;
-	uchar rtetype;
-	uchar segrem;
-};
-
-struct Fraghdr6 {
-	uchar nexthdr;
-	uchar res;
-	uchar offsetRM[2];	// Offset, Res, M flag
-	uchar id[4];
-};
-
-
-enum {			/* Header Types */
-	HBH		= 0,	//?
+enum {				/* Header Types */
+	HBH		= 0,	/* hop-by-hop multicast routing protocol */
 	ICMP		= 1,
 	IGMP		= 2,
 	GGP		= 3,
@@ -72,89 +50,113 @@
 	Maxhdrtype	= 256,
 };
 
-
 enum {
-	//	multicast flgs and scop
+	/* multicast flags and scopes */
 
-	well_known_flg				= 0,
-	transient_flg				= 1,
+//	Well_known_flg	= 0,
+//	Transient_flg	= 1,
 
-	node_local_scop 			= 1,
-	link_local_scop 			= 2,
-	site_local_scop 			= 5,
-	org_local_scop				= 8,
-	global_scop				= 14,
+//	Interface_local_scop = 1,
+	Link_local_scop	= 2,
+//	Site_local_scop	= 5,
+//	Org_local_scop	= 8,
+	Global_scop	= 14,
 
-	//	various prefix lengths
+	/* various prefix lengths */
+	SOLN_PREF_LEN	= 13,
 
-	SOLN_PREF_LEN				= 13,
+	/* icmpv6 unreachability codes */
+	Icmp6_no_route		= 0,
+	Icmp6_ad_prohib		= 1,
+	Icmp6_out_src_scope	= 2,
+	Icmp6_adr_unreach	= 3,
+	Icmp6_port_unreach	= 4,
+	Icmp6_gress_src_fail	= 5,
+	Icmp6_rej_route		= 6,
+	Icmp6_unknown		= 7,  /* our own invention for internal use */
 
-	//	icmpv6 unreach codes
-	icmp6_no_route				= 0,
-	icmp6_ad_prohib				= 1,
-	icmp6_unassigned			= 2,
-	icmp6_adr_unreach			= 3,
-	icmp6_port_unreach			= 4,
-	icmp6_unkn_code				= 5,
+	/* various flags & constants */
+	v6MINTU		= 1280,
+	HOP_LIMIT	= 255,
+	IP6HDR		= 40,		/* sizeof(Ip6hdr) = 8 + 2*16 */
+	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
 
-	// 	various flags & constants
+	/* option types */
 
-	v6MINTU      				= 1280,
-	HOP_LIMIT    				= 255,
-	ETHERHDR_LEN 				= 14,
-	IPV6HDR_LEN  				= 40,
-	IPV4HDR_LEN  				= 20,
+	/* neighbour discovery */
+	SRC_LLADDR	= 1,
+	TARGET_LLADDR	= 2,
+	PREFIX_INFO	= 3,
+	REDIR_HEADER	= 4,
+	MTU_OPTION	= 5,
+	/* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+	V6nd_home	= 8,
+	V6nd_srcaddrs	= 9,		/* rfc3122 */
+	V6nd_ip		= 17,
+	/* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+	V6nd_rdns	= 25,
+	/* plan 9 extensions */
+	V6nd_9fs	= 250,
+	V6nd_9auth	= 251,
 
-	// 	option types
+	SRC_UNSPEC	= 0,
+	SRC_UNI		= 1,
+	TARG_UNI	= 2,
+	TARG_MULTI	= 3,
 
-	SRC_LLADDRESS    			= 1,
-	TARGET_LLADDRESS 			= 2,
-	PREFIX_INFO      			= 3,
-	REDIR_HEADER     			= 4,
-	MTU_OPTION       			= 5,
+	Tunitent	= 1,
+	Tuniproxy	= 2,
+	Tunirany	= 3,
 
-	SRC_UNSPEC  				= 0,
-	SRC_UNI     				= 1,
-	TARG_UNI    				= 2,
-	TARG_MULTI  				= 3,
+	/* Node constants */
+	MAX_MULTICAST_SOLICIT	= 3,
+	RETRANS_TIMER		= 1000,
+};
 
-	t_unitent   				= 1,
-	t_uniproxy  				= 2,
-	t_unirany   				= 3,
+typedef struct Ip6hdr	Ip6hdr;
+typedef struct Opthdr	Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6	Fraghdr6;
 
-	//	Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */\
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */ \
+	uchar	proto;		/* next header type */ \
+	uchar	ttl;		/* hop limit */ \
+	uchar	src[IPaddrlen]; \
+	uchar	dst[IPaddrlen]
 
-	MAX_INITIAL_RTR_ADVERT_INTERVAL 	= 16000,
-	MAX_INITIAL_RTR_ADVERTISEMENTS  	= 3,
-	MAX_FINAL_RTR_ADVERTISEMENTS    	= 3,
-	MIN_DELAY_BETWEEN_RAS 			= 3000,
-	MAX_RA_DELAY_TIME     			= 500,
+struct	Ip6hdr {
+	IPV6HDR;
+	uchar	payload[];
+};
 
-	//	Host constants
+struct	Opthdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+};
 
-	MAX_RTR_SOLICITATION_DELAY 		= 1000,
-	RTR_SOLICITATION_INTERVAL  		= 4000,
-	MAX_RTR_SOLICITATIONS      		= 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused.  Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct	Routinghdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+	uchar	rtetype;
+	uchar	segrem;
+};
 
-	//	Node constants
-
-	MAX_MULTICAST_SOLICIT   		= 3,
-	MAX_UNICAST_SOLICIT     		= 3,
-	MAX_ANYCAST_DELAY_TIME  		= 1000,
-	MAX_NEIGHBOR_ADVERTISEMENT 		= 3,
-	REACHABLE_TIME 				= 30000,
-	RETRANS_TIMER  				= 1000,
-	DELAY_FIRST_PROBE_TIME 			= 5000,
-
+struct	Fraghdr6 {
+	uchar	nexthdr;
+	uchar	res;
+	uchar	offsetRM[2];	/* Offset, Res, M flag */
+	uchar	id[4];
 };
 
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
 extern uchar v6allnodesN[IPaddrlen];
 extern uchar v6allnodesL[IPaddrlen];
 extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
 extern uchar v6allroutersL[IPaddrlen];
 extern uchar v6allnodesNmask[IPaddrlen];
 extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
 extern uchar v6solicitednode[IPaddrlen];
 extern uchar v6solicitednodemask[IPaddrlen];
 extern uchar v6Unspecified[IPaddrlen];
 extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
 extern uchar v6linklocal[IPaddrlen];
 extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
 extern uchar v6multicast[IPaddrlen];
 extern uchar v6multicastmask[IPaddrlen];
 
 extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
 extern int v6mcpreflen;
 extern int v6snpreflen;
 extern int v6aNpreflen;
@@ -184,3 +179,10 @@
 extern int v6aLpreflen;
 
 extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
diff -u a/os/ip//loopbackmedium.c b/os/ip//loopbackmedium.c
--- a/os/ip//loopbackmedium.c
+++ b/os/ip//loopbackmedium.c
@@ -28,13 +28,12 @@
 	LB *lb;
 
 	lb = smalloc(sizeof(*lb));
+	lb->readp = (void*)-1;
 	lb->f = ifc->conv->p->f;
-	/* TO DO: make queue size a function of kernel memory */
-	lb->q = qopen(128*1024, Qmsg, nil, nil);
+	lb->q = qopen(1024*1024, Qmsg, nil, nil);
 	ifc->arg = lb;
-	ifc->mbps = 1000;
 
-	kproc("loopbackread", loopbackread, ifc, 0);
+	kproc("loopbackread", loopbackread, ifc);
 
 }
 
@@ -43,13 +42,29 @@
 {
 	LB *lb = ifc->arg;
 
-	if(lb->readp)
+	while(waserror())
+		;
+
+	/* wat for reader to start */
+	while(lb->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+		
+	if(lb->readp != nil)
 		postnote(lb->readp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for reader to die */
-	while(lb->readp != 0)
+	while(lb->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	/* clean up */
 	qfree(lb->q);
 	free(lb);
@@ -76,23 +91,14 @@
 	ifc = a;
 	lb = ifc->arg;
 	lb->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		lb->readp = 0;
-		pexit("hangup", 1);
-	}
-	for(;;){
-		bp = qbread(lb->q, Maxtu);
-		if(bp == nil)
-			continue;
-		ifc->in++;
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+	if(!waserror())
+	while((bp = qbread(lb->q, Maxtu)) != nil){
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
+		ifc->in++;
 		if(ifc->lifc == nil)
 			freeb(bp);
 		else
@@ -100,6 +106,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	lb->readp = nil;
+	pexit("hangup", 1);
 }
 
 Medium loopbackmedium =
diff -u a/os/ip//netdevmedium.c b/os/ip//netdevmedium.c
--- a/os/ip//netdevmedium.c
+++ b/os/ip//netdevmedium.c
@@ -49,12 +49,13 @@
 	mchan = namec(argv[2], Aopen, ORDWR, 0);
 
 	er = smalloc(sizeof(*er));
+	er->readp = (void*)-1;
 	er->mchan = mchan;
 	er->f = ifc->conv->p->f;
 
 	ifc->arg = er;
 
-	kproc("netdevread", netdevread, ifc, 0);
+	kproc("netdevread", netdevread, ifc);
 }
 
 /*
@@ -65,13 +66,29 @@
 {
 	Netdevrock *er = ifc->arg;
 
+	while(waserror())
+		;
+
+	/* wait for reader to start */
+	while(er->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
 	if(er->readp != nil)
 		postnote(er->readp, 1, "unbind", 0);
 
-	/* wait for readers to die */
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
+	/* wait for reader to die */
 	while(er->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan != nil)
 		cclose(er->mchan);
 
@@ -86,8 +103,6 @@
 {
 	Netdevrock *er = ifc->arg;
 
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 
@@ -104,34 +119,22 @@
 	Ipifc *ifc;
 	Block *bp;
 	Netdevrock *er;
-	char *argv[1];
 
 	ifc = a;
 	er = ifc->arg;
 	er->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->readp = nil;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
 		if(bp == nil){
-			/*
-			 * get here if mchan is a pipe and other side hangs up
-			 * clean up this interface & get out
-ZZZ is this a good idea?
-			 */
 			poperror();
-			er->readp = nil;
-			argv[0] = "unbind";
-			if(!waserror())
+			if(!waserror()){
+				static char *argv[]  = { "unbind" };
 				ifc->conv->p->ctl(ifc->conv, argv, 1);
-			pexit("hangup", 1);
+			}
+			break;
 		}
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
@@ -144,6 +147,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	er->readp = nil;
+	pexit("hangup", 1);
 }
 
 void
diff -u a/os/ip//netlog.c b/os/ip//netlog.c
--- a/os/ip//netlog.c
+++ b/os/ip//netlog.c
@@ -7,7 +7,7 @@
 #include	"../ip/ip.h"
 
 enum {
-	Nlog		= 4*1024,
+	Nlog		= 16*1024,
 };
 
 /*
@@ -39,12 +39,12 @@
 	{ "ppp",	Logppp, },
 	{ "ip",		Logip, },
 	{ "fs",		Logfs, },
-	{ "tcp",	Logtcp, },
 	{ "il",		Logil, },
+	{ "tcp",	Logtcp, },
 	{ "icmp",	Logicmp, },
 	{ "udp",	Logudp, },
 	{ "compress",	Logcompress, },
-	{ "ilmsg",	Logil|Logilmsg, },
+	{ "logilmsg",	Logilmsg, },
 	{ "gre",	Loggre, },
 	{ "tcpwin",	Logtcp|Logtcpwin, },
 	{ "tcprxmt",	Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
 		nexterror();
 	}
 	if(f->alog->opens == 0){
-		if(f->alog->buf == nil)
+		if(f->alog->buf == nil){
 			f->alog->buf = malloc(Nlog);
+			if(f->alog->buf == nil)
+				error(Enomem);
+		}
 		f->alog->rptr = f->alog->buf;
 		f->alog->end = f->alog->buf + Nlog;
 	}
@@ -202,6 +205,7 @@
 		else
 			f->alog->iponlyset = 1;
 		free(cb);
+		poperror();
 		return;
 
 	default:
@@ -227,7 +231,7 @@
 void
 netlog(Fs *f, int mask, char *fmt, ...)
 {
-	char buf[128], *t, *fp;
+	char buf[256], *t, *fp;
 	int i, n;
 	va_list arg;
 
diff -u a/os/ip//nullmedium.c b/os/ip//nullmedium.c
--- a/os/ip//nullmedium.c
+++ b/os/ip//nullmedium.c
@@ -19,8 +19,9 @@
 }
 
 static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
 {
+	freeb(bp);
 	error("nullbwrite");
 }
 
diff -u a/os/ip//pktmedium.c b/os/ip//pktmedium.c
--- a/os/ip//pktmedium.c
+++ b/os/ip//pktmedium.c
@@ -16,10 +16,10 @@
 Medium pktmedium =
 {
 .name=		"pkt",
-.hsize=		14,
-.mintu=		40,
+.hsize=		0,
+.mintu=		0,
 .maxtu=		4*1024,
-.maclen=	6,
+.maclen=	0,
 .bind=		pktbind,
 .unbind=	pktunbind,
 .bwrite=	pktbwrite,
@@ -28,12 +28,13 @@
 };
 
 /*
- *  called to bind an IP ifc to an ethernet device
+ *  called to bind an IP ifc to an packet device
  *  called with ifc wlock'd
  */
 static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
 {
+	USED(argc, argv);
 }
 
 /*
@@ -51,7 +52,6 @@
 pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
 {
 	/* enqueue onto the conversation's rq */
-	bp = concatblock(bp);
 	if(ifc->conv->snoopers.ref > 0)
 		qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
 	qpass(ifc->conv->rq, bp);
diff -u a/os/ip//rudp.c b/os/ip//rudp.c
--- a/os/ip//rudp.c
+++ b/os/ip//rudp.c
@@ -1,4 +1,5 @@
 /*
+ *  Reliable User Datagram Protocol, currently only for IPv4.
  *  This protocol is compatible with UDP's packet format.
  *  It could be done over UDP if need be.
  */
@@ -25,20 +26,17 @@
 
 enum
 {
-	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_PHDRSIZE	= 12,	/* pseudo header */
+//	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_RHDRSIZE	= 36,	/* pseudo header + udp header + rudp header */
 	UDP_IPHDR	= 8,	/* ip header */
 	IP_UDPPROTO	= 254,
-	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
-	UDP_USEAD4	= 12,
+	UDP_USEAD7	= 52,	/* size of new ipv6 headers struct */
 
 	Rudprxms	= 200,
 	Rudptickms	= 50,
 	Rudpmaxxmit	= 10,
 	Maxunacked	= 100,
-
 };
 
 #define Hangupgen	0xffffffff	/* used only in hangup messages */
@@ -205,7 +203,7 @@
 		qlock(&rpriv->apl);
 		if(rpriv->ackprocstarted == 0){
 			sprint(kpname, "#I%drudpack", rudp->f->dev);
-			kproc(kpname, relackproc, rudp, 0);
+			kproc(kpname, relackproc, rudp);
 			rpriv->ackprocstarted = 1;
 		}
 		qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
 	qlock(ucb);
 	for(r = ucb->r; r; r = r->next)
 		m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+	m += snprint(state+m, n-m, "\n");
 	qunlock(ucb);
 	return m;
 }
@@ -281,7 +280,7 @@
 	/* force out any delayed acks */
 	ucb = (Rudpcb*)c->ptcl;
 	qlock(ucb);
-	for(r = ucb->r; r; r = r->next){
+	for(r = ucb->r; r != nil; r = r->next){
 		if(r->acksent != r->rcvseq)
 			relsendack(c, r, 0);
 	}
@@ -374,27 +373,10 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-
-		bp->rp += 4;			/* Igonore local port */
-		break;
 	default:
 		ipmove(raddr, c->raddr);
 		ipmove(laddr, c->laddr);
 		rport = c->rport;
-
 		break;
 	}
 
@@ -402,9 +384,6 @@
 
 	/* Make space to fit rudp & ip header */
 	bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
-
 	uh = (Udphdr *)(bp->rp);
 	uh->vihl = IP_VER4;
 
@@ -417,7 +396,6 @@
 	uh->frag[1] = 0;
 	hnputs(uh->udpplen, ptcllen);
 	switch(ucb->headers){
-	case 6:
 	case 7:
 		v6tov4(uh->udpdst, raddr);
 		hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.rudpNoPorts++;
 		qunlock(rudp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	default:
 		/* connection oriented rudp */
 		if(ipcmp(c->raddr, IPnoaddr) == 0){
-			/* save the src address in the conversation */
+			/* reply with the same ip address (if not broadcast) */
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
+			ipmove(c->laddr, laddr);
 		 	ipmove(c->raddr, raddr);
 			c->rport = rport;
-
-			/* reply with the same ip address (if not broadcast) */
-			if(ipforme(f, laddr) == Runi)
-				ipmove(c->laddr, laddr);
-			else
-				v4tov6(c->laddr, ifc->lifc->local);
 		}
 		break;
 	}
-	if(bp->next)
-		bp = concatblock(bp);
 
 	if(qfull(c->rq)) {
-		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
-			laddr, lport);
+		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-	else
-		qpass(c->rq, bp);
-	
 	qunlock(ucb);
 }
 
@@ -629,16 +594,14 @@
 	if(n < 1)
 		return rudpunknown;
 
-	if(strcmp(f[0], "headers++4") == 0){
-		ucb->headers = 7;
+	if(strcmp(f[0], "headers") == 0){
+		ucb->headers = 7;		/* new headers format */
 		return nil;
-	} else if(strcmp(f[0], "headers") == 0){
-		ucb->headers = 6;
-		return nil;
 	} else if(strcmp(f[0], "hangup") == 0){
 		if(n < 3)
 			return "bad syntax";
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			return Ebadip;
 		x = atoi(f[2]);
 		qlock(ucb);
 		relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
 		qunlock(ucb);
 		return nil;
 	} else if(strcmp(f[0], "randdrop") == 0){
-		x = 10;		/* default is 10% */
+		x = 10;			/* default is 10% */
 		if(n > 1)
 			x = atoi(f[1]);
 		if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
 	pdest = nhgets(h->udpdport);
 
 	/* Look for a connection */
-	for(p = rudp->conv; *p; p++) {
-		s = *p;
+	for(p = rudp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -701,12 +665,6 @@
 		upriv->orders);
 }
 
-int
-rudpgc(Proto *rudp)
-{
-	return natgc(rudp->ipproto);
-}
-
 void
 rudpinit(Fs *fs)
 {
@@ -725,9 +683,8 @@
 	rudp->rcv = rudpiput;
 	rudp->advise = rudpadvise;
 	rudp->stats = rudpstats;
-	rudp->gc = rudpgc;
 	rudp->ipproto = IP_UDPPROTO;
-	rudp->nc = 16;
+	rudp->nc = 32;
 	rudp->ptclsize = sizeof(Rudpcb);
 
 	Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
 
 	rudp = (Proto *)a;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Rudptickms);
 
@@ -989,8 +948,6 @@
 	Fs *f;
 
 	bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
 	bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
 	f = c->p->f;
 	uh = (Udphdr *)(bp->rp);
diff -u a/os/ip//tcp.c b/os/ip//tcp.c
--- a/os/ip//tcp.c
+++ b/os/ip//tcp.c
@@ -41,13 +41,13 @@
 	EOLOPT		= 0,
 	NOOPOPT		= 1,
 	MSSOPT		= 2,
-	MSS_LENGTH	= 4,		/* Mean segment size */
+	MSS_LENGTH	= 4,		/* Maximum segment size */
 	WSOPT		= 3,
 	WS_LENGTH	= 3,		/* Bits to scale window size by */
 	MSL2		= 10,
 	MSPTICK		= 50,		/* Milliseconds per timer tick */
-	DEF_MSS		= 1460,		/* Default mean segment */
-	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
+	DEF_MSS		= 1460,		/* Default maximum segment */
+	DEF_MSS6	= 1220,		/* Default maximum segment (min) for v6 */
 	DEF_RTT		= 500,		/* Default round trip */
 	DEF_KAT		= 120000,	/* Default time (ms) between keep alives */
 	TCP_LISTEN	= 0,		/* Listen connection */
@@ -81,7 +81,13 @@
 	NLHT		= 256,		/* hash table size, must be a power of 2 */
 	LHTMASK		= NLHT-1,
 
-	HaveWS		= 1<<8,
+	/*
+	 * window is 64kb · 2ⁿ
+	 * these factors determine the ultimate bandwidth-delay product.
+	 * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+	 */
+	Maxqscale	= 4,		/* maximum queuing scale */
+	Defadvscale	= 4,		/* default advertisement */
 };
 
 /* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
 	ulong	seq;
 	ulong	ack;
 	uchar	flags;
-	ushort	ws;	/* window scale option (if not zero) */
-	ulong	wnd;
+	uchar	update;
+	ushort	ws;	/* window scale option */
+	ulong	wnd;	/* prescaled window*/
 	ushort	urg;
 	ushort	mss;	/* max segment size option (if not zero) */
 	ushort	len;	/* size of data */
@@ -205,44 +212,53 @@
 		ulong	wnd;		/* Tcp send window */
 		ulong	urg;		/* Urgent data pointer */
 		ulong	wl2;
-		int	scale;		/* how much to right shift window in xmitted packets */
+		uint	scale;		/* how much to right shift window in xmitted packets */
 		/* to implement tahoe and reno TCP */
 		ulong	dupacks;	/* number of duplicate acks rcvd */
+		ulong	partialack;
 		int	recovery;	/* loss recovery flag */
-		ulong	rxt;		/* right window marker for recovery */
+		int	retransmit;	/* retransmit 1 packet @ una flag */
+		int	rto;
+		ulong	rxt;		/* right window marker for recovery "recover" rfc3782 */
 	} snd;
 	struct {
 		ulong	nxt;		/* Receive pointer to next uchar slot */
 		ulong	wnd;		/* Receive window incoming */
+		ulong	wsnt;		/* Last wptr sent.  important to track for large bdp */
+		ulong	wptr;
 		ulong	urg;		/* Urgent pointer */
+		ulong	ackptr;		/* last acked sequence */
 		int	blocked;
-		int	una;		/* unacked data segs */
-		int	scale;		/* how much to left shift window in rcved packets */
+		uint	scale;		/* how much to left shift window in rcv'd packets */
 	} rcv;
 	ulong	iss;			/* Initial sequence number */
-	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
 	ulong	cwind;			/* Congestion window */
-	int	scale;			/* desired snd.scale */
-	ushort	ssthresh;		/* Slow start threshold */
+	ulong	abcbytes;		/* appropriate byte counting rfc 3465 */
+	uint	scale;			/* desired snd.scale */
+	ulong	ssthresh;		/* Slow start threshold */
 	int	resent;			/* Bytes just resent */
 	int	irs;			/* Initial received squence */
-	ushort	mss;			/* Mean segment size */
+	ushort	mss;			/* Maximum segment size */
 	int	rerecv;			/* Overlap of data rerecevived */
-	ulong	window;			/* Recevive window */
+	ulong	window;			/* Our receive window (queue) */
+	uint	qscale;			/* Log2 of our receive window (queue) */
 	uchar	backoff;		/* Exponential backoff counter */
 	int	backedoff;		/* ms we've backed off for rexmits */
 	uchar	flags;			/* State flags */
 	Reseq	*reseq;			/* Resequencing queue */
+	int	nreseq;
+	int	reseqlen;
 	Tcptimer	timer;			/* Activity timer */
 	Tcptimer	acktimer;		/* Acknowledge timer */
 	Tcptimer	rtt_timer;		/* Round trip timer */
 	Tcptimer	katimer;		/* keep alive timer */
 	ulong	rttseq;			/* Round trip sequence */
-	int	srtt;			/* Shortened round trip */
+	int	srtt;			/* Smoothed round trip */
 	int	mdev;			/* Mean deviation of round trip */
 	int	kacounter;		/* count down for keep alive */
 	uint	sndsyntime;		/* time syn sent */
 	ulong	time;			/* time Finwait2 or Syn_received was sent */
+	ulong	timeuna;			/* snd.una when time was set */
 	int	nochecksum;		/* non-zero means don't send checksums */
 	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
 
@@ -285,11 +301,11 @@
 };
 
 int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
-ushort	tcp_mss = DEF_MSS;	/* Maximum segment size to be sent */
 
 enum {
 	/* MIB stats */
 	MaxConn,
+	Mss,
 	ActiveOpens,
 	PassiveOpens,
 	EstabResets,
@@ -297,6 +313,7 @@
 	InSegs,
 	OutSegs,
 	RetransSegs,
+	RetransSegsSent,
 	RetransTimeouts,
 	InErrs,
 	OutRsts,
@@ -305,14 +322,27 @@
 	CsumErrs,
 	HlenErrs,
 	LenErrs,
+	Resequenced,
 	OutOfOrder,
+	ReseqBytelim,
+	ReseqPktlim,
+	Delayack,
+	Wopenack,
 
+	Recovery,
+	RecoveryDone,
+	RecoveryRTO,
+	RecoveryNoSeq,
+	RecoveryCwind,
+	RecoveryPA,
+
 	Nstats
 };
 
-static char *statnames[] =
+static char *statnames[Nstats] =
 {
 [MaxConn]	"MaxConn",
+[Mss]		"MaxSegment",
 [ActiveOpens]	"ActiveOpens",
 [PassiveOpens]	"PassiveOpens",
 [EstabResets]	"EstabResets",
@@ -320,6 +350,7 @@
 [InSegs]	"InSegs",
 [OutSegs]	"OutSegs",
 [RetransSegs]	"RetransSegs",
+[RetransSegsSent]	"RetransSegsSent",
 [RetransTimeouts]	"RetransTimeouts",
 [InErrs]	"InErrs",
 [OutRsts]	"OutRsts",
@@ -327,6 +358,19 @@
 [HlenErrs]	"HlenErrs",
 [LenErrs]	"LenErrs",
 [OutOfOrder]	"OutOfOrder",
+[Resequenced]	"Resequenced",
+[ReseqBytelim]	"ReseqBytelim",
+[ReseqPktlim]	"ReseqPktlim",
+[Delayack]	"Delayack",
+[Wopenack]	"Wopenack",
+
+[Recovery]	"Recovery",
+[RecoveryDone]	"RecoveryDone",
+[RecoveryRTO]	"RecoveryRTO",
+
+[RecoveryNoSeq]	"RecoveryNoSeq",
+[RecoveryCwind]	"RecoveryCwind",
+[RecoveryPA]	"RecoveryPA",
 };
 
 typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
 	QLock	apl;
 	int	ackprocstarted;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 };
 
 /*
@@ -356,34 +400,34 @@
  *  of DoS attack.
  *
  *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
- *  it that number gets acked by the other end, we shut down the connection.
- *  Look for tcpporthogedefense in the code.
+ *  that number gets acked by the other end, we shut down the connection.
+ *  Look for tcpporthogdefense in the code.
  */
 int tcpporthogdefense = 0;
 
-int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void	localclose(Conv*, char*);
-void	procsyn(Conv*, Tcp*);
-void	tcpiput(Proto*, Ipifc*, Block*);
-void	tcpoutput(Conv*);
-int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void	tcpstart(Conv*, int);
-void	tcptimeout(void*);
-void	tcpsndsyn(Conv*, Tcpctl*);
-void	tcprcvwin(Conv*);
-void	tcpacktimer(void*);
-void	tcpkeepalive(void*);
-void	tcpsetkacounter(Tcpctl*);
-void	tcprxmit(Conv*);
-void	tcpsettimer(Tcpctl*);
-void	tcpsynackrtt(Conv*);
-void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	int	addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static	int	dumpreseq(Tcpctl*);
+static	void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static	void	limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static	void	limborexmit(Proto*);
+static	void	localclose(Conv*, char*);
+static	void	procsyn(Conv*, Tcp*);
+static	void	tcpacktimer(void*);
+static	void	tcpiput(Proto*, Ipifc*, Block*);
+static	void	tcpkeepalive(void*);
+static	void	tcpoutput(Conv*);
+static	void	tcprcvwin(Conv*);
+static	void	tcprxmit(Conv*);
+static	void	tcpsetkacounter(Tcpctl*);
+static	void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	void	tcpsettimer(Tcpctl*);
+static	void	tcpsndsyn(Conv*, Tcpctl*);
+static	void	tcpstart(Conv*, int);
+static	void	tcpsynackrtt(Conv*);
+static	void	tcptimeout(void*);
+static	int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
 
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
 tcpsetstate(Conv *s, uchar newstate)
 {
 	Tcpctl *tcb;
@@ -403,11 +447,6 @@
 	if(newstate == Established)
 		tpriv->stats[CurrEstab]++;
 
-	/**
-	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
-		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
-	**/
-
 	switch(newstate) {
 	case Closed:
 		qclose(s->rq);
@@ -430,7 +469,12 @@
 tcpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -447,12 +491,14 @@
 	s = (Tcpctl*)(c->ptcl);
 
 	return snprint(state, n,
-		"%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+		"%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
 		tcpstates[s->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
-		s->srtt, s->mdev,
-		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+		s->nreseq, s->reseqlen,
+		s->srtt, s->mdev, s->ssthresh,
+		s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+		s->qscale,
 		s->timer.start, s->timer.count, s->rerecv,
 		s->katimer.start, s->katimer.count);
 }
@@ -470,7 +516,12 @@
 tcpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdannounce(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -524,7 +575,7 @@
 	}
 }
 
-void
+static void
 tcpkick(void *x)
 {
 	Conv *s = x;
@@ -546,7 +597,6 @@
 		/*
 		 * Push data
 		 */
-		tcprcvwin(s);
 		tcpoutput(s);
 		break;
 	default:
@@ -558,7 +608,9 @@
 	poperror();
 }
 
-void
+static int seq_lt(ulong, ulong);
+
+static void
 tcprcvwin(Conv *s)				/* Call with tcb locked */
 {
 	int w;
@@ -568,12 +620,20 @@
 	w = tcb->window - qlen(s->rq);
 	if(w < 0)
 		w = 0;
-	tcb->rcv.wnd = w;
-	if(w == 0)
+	/* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+	if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+		w = tcb->rcv.wptr - tcb->rcv.nxt;
+	if(w != tcb->rcv.wnd)
+	if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
 		tcb->rcv.blocked = 1;
+		netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+			tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+	}
+	tcb->rcv.wnd = w;
+	tcb->rcv.wptr = tcb->rcv.nxt + w;
 }
 
-void
+static void
 tcpacktimer(void *v)
 {
 	Tcpctl *tcb;
@@ -589,7 +649,6 @@
 	qlock(s);
 	if(tcb->state != Closed){
 		tcb->flags |= FORCE;
-		tcprcvwin(s);
 		tcpoutput(s);
 	}
 	qunlock(s);
@@ -597,10 +656,52 @@
 }
 
 static void
+tcpcongestion(Tcpctl *tcb)
+{
+	ulong inflight;
+
+	inflight = tcb->snd.nxt - tcb->snd.una;
+	if(inflight > tcb->cwind)
+		inflight = tcb->cwind;
+	tcb->ssthresh = inflight / 2;
+	if(tcb->ssthresh < 2*tcb->mss)
+		tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+	L		= 2,		/* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+	uint limit;
+
+	tcb->abcbytes += acked;
+	if(tcb->cwind < tcb->ssthresh){
+		/* slow start */
+		if(tcb->snd.rto)
+			limit = 1*tcb->mss;
+		else
+			limit = L*tcb->mss;
+		tcb->cwind += MIN(tcb->abcbytes, limit);
+		tcb->abcbytes = 0;
+	}
+	else{
+		tcb->snd.rto = 0;
+		/* avoidance */
+		if(tcb->abcbytes >= tcb->cwind){
+			tcb->abcbytes -= tcb->cwind;
+			tcb->cwind += tcb->mss;
+		}
+	}
+}
+
+static void
 tcpcreate(Conv *c)
 {
 	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
-	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+	c->wq = qopen(QMAX, Qkick, tcpkick, c);
 }
 
 static void
@@ -608,7 +709,7 @@
 {
 	if(newstate != TcptimerON){
 		if(t->state == TcptimerON){
-			// unchain
+			/* unchain */
 			if(priv->timers == t){
 				priv->timers = t->next;
 				if(t->prev != nil)
@@ -622,7 +723,7 @@
 		}
 	} else {
 		if(t->state != TcptimerON){
-			// chain
+			/* chain */
 			if(t->prev != nil || t->next != nil)
 				panic("timerstate2");
 			t->prev = nil;
@@ -635,7 +736,7 @@
 	t->state = newstate;
 }
 
-void
+static void
 tcpackproc(void *a)
 {
 	Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
 	tcp = a;
 	priv = tcp->priv;
 
+	while(waserror())
+		;
+
 	for(;;) {
 		tsleep(&up->sleep, return0, 0, MSPTICK);
 
@@ -681,7 +785,7 @@
 	}
 }
 
-void
+static void
 tcpgo(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
 	qunlock(&priv->tl);
 }
 
-void
+static void
 tcphalt(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil)
@@ -704,17 +808,16 @@
 	qunlock(&priv->tl);
 }
 
-int
+static int
 backoff(int n)
 {
 	return 1 << n;
 }
 
-void
+static void
 localclose(Conv *s, char *reason)	/* called with tcb locked */
 {
 	Tcpctl *tcb;
-	Reseq *rp,*rp1;
 	Tcppriv *tpriv;
 
 	tpriv = s->p->priv;
@@ -728,12 +831,7 @@
 	tcphalt(tpriv, &tcb->katimer);
 
 	/* Flush reassembly queue; nothing more can arrive */
-	for(rp = tcb->reseq; rp != nil; rp = rp1) {
-		rp1 = rp->next;
-		freeblist(rp->bp);
-		free(rp);
-	}
-	tcb->reseq = nil;
+	dumpreseq(tcb);
 
 	if(tcb->state == Syn_sent)
 		Fsconnected(s, reason);
@@ -747,45 +845,46 @@
 }
 
 /* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
 {
 	Ipifc *ifc;
 	int mtu;
 
-	ifc = findipifc(tcp->f, addr, 0);
-	switch(version){
-	default:
-	case V4:
-		mtu = DEF_MSS;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
-		break;
-	case V6:
-		mtu = DEF_MSS6;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
-		break;
-	}
-	if(ifc != nil){
-		if(ifc->mbps > 100)
-			*scale = HaveWS | 3;
-		else if(ifc->mbps > 10)
-			*scale = HaveWS | 1;
-		else
-			*scale = HaveWS | 0;
-	} else
-		*scale = HaveWS | 0;
+	/*
+	 * set the ws.  it doesn't commit us to anything.
+	 * ws is the ultimate limit to the bandwidth-delay product.
+	 */
+	*scale = Defadvscale;
 
-	return mtu;
+	/*
+	 * currently we do not implement path MTU discovery
+	 * so use interface MTU *only* if directly reachable
+	 * or when we use V4 which allows routers to fragment.
+	 * otherwise, we use the default MSS which assumes a
+	 * safe minimum MTU of 1280 bytes for V6.
+	 */  
+	if(r != nil && (ifc = r->ifc) != nil){
+		mtu = ifc->maxtu - ifc->m->hsize;
+		if(version == V4)
+			return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+		mtu -= TCP6_PKT + TCP6_HDRSIZE;
+		if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+			return mtu;
+	}
+	if(version == V6)
+		return DEF_MSS6;
+	else
+		return DEF_MSS;
 }
 
-void
+static void
 inittcpctl(Conv *s, int mode)
 {
 	Tcpctl *tcb;
 	Tcp4hdr* h4;
 	Tcp6hdr* h6;
+	Tcppriv *tpriv;
 	int mss;
 
 	tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
 
 	memset(tcb, 0, sizeof(Tcpctl));
 
-	tcb->ssthresh = 65535;
+	tcb->ssthresh = QMAX;			/* reset by tcpsetscale() */
 	tcb->srtt = tcp_irtt<<LOGAGAIN;
 	tcb->mdev = 0;
 
@@ -841,19 +940,18 @@
 	}
 
 	tcb->mss = tcb->cwind = mss;
+	tcb->abcbytes = 0;
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* default is no window scaling */
-	tcb->window = QMAX;
-	tcb->rcv.wnd = QMAX;
-	tcb->rcv.scale = 0;
-	tcb->snd.scale = 0;
-	qsetlimit(s->rq, QMAX);
+	tcpsetscale(s, tcb, 0, 0);
 }
 
 /*
  *  called with s qlocked
  */
-void
+static void
 tcpstart(Conv *s, int mode)
 {
 	Tcpctl *tcb;
@@ -865,8 +963,8 @@
 	if(tpriv->ackprocstarted == 0){
 		qlock(&tpriv->apl);
 		if(tpriv->ackprocstarted == 0){
-			sprint(kpname, "#I%dtcpack", s->p->f->dev);
-			kproc(kpname, tcpackproc, s->p, 0);
+			snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+			kproc(kpname, tcpackproc, s->p);
 			tpriv->ackprocstarted = 1;
 		}
 		qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
 }
 
 static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
 {
-	static char buf[128];
+	char *p;
 
-	sprint(buf, "%d", flag>>10);	/* Head len */
+	p = seprint(buf, e, "%d", flag>>10);	/* Head len */
 	if(flag & URG)
-		strcat(buf, " URG");
+		p = seprint(p, e, " URG");
 	if(flag & ACK)
-		strcat(buf, " ACK");
+		p = seprint(p, e, " ACK");
 	if(flag & PSH)
-		strcat(buf, " PSH");
+		p = seprint(p, e, " PSH");
 	if(flag & RST)
-		strcat(buf, " RST");
+		p = seprint(p, e, " RST");
 	if(flag & SYN)
-		strcat(buf, " SYN");
+		p = seprint(p, e, " SYN");
 	if(flag & FIN)
-		strcat(buf, " FIN");
-
+		p = seprint(p, e, " FIN");
+	USED(p);
 	return buf;
 }
 
-Block *
+static Block*
 htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -940,14 +1038,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP6_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP6_PKT;
 	}
 
@@ -1000,7 +1094,7 @@
 	return data;
 }
 
-Block *
+static Block*
 htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -1013,7 +1107,7 @@
 	if(tcph->flags & SYN){
 		if(tcph->mss)
 			hdrlen += MSS_LENGTH;
-		if(tcph->ws)
+		if(1)
 			hdrlen += WS_LENGTH;
 		optpad = hdrlen & 3;
 		if(optpad)
@@ -1024,14 +1118,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP4_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP4_PKT;
 	}
 
@@ -1055,7 +1145,8 @@
 			hnputs(opt, tcph->mss);
 			opt += 2;
 		}
-		if(tcph->ws != 0){
+		/* always offer.  rfc1323 §2.2 */
+		if(1){
 			*opt++ = WSOPT;
 			*opt++ = WS_LENGTH;
 			*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
 	return data;
 }
 
-int
+static int
 ntohtcp6(Tcp *tcph, Block **bpp)
 {
 	Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->ploadlen) - hdrlen;
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1136,7 +1228,7 @@
 	return hdrlen;
 }
 
-int
+static int
 ntohtcp4(Tcp *tcph, Block **bpp)
 {
 	Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1200,16 +1293,19 @@
 }
 
 /*
- *  For outgiing calls, generate an initial sequence
+ *  For outgoing calls, generate an initial sequence
  *  number and put a SYN on the send queue
  */
-void
+static void
 tcpsndsyn(Conv *s, Tcpctl *tcb)
 {
+	Tcppriv *tpriv;
+
 	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
 	tcb->rttseq = tcb->iss;
 	tcb->snd.wl2 = tcb->iss;
 	tcb->snd.una = tcb->iss;
+	tcb->snd.rxt = tcb->iss;
 	tcb->snd.ptr = tcb->rttseq;
 	tcb->snd.nxt = tcb->rttseq;
 	tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
 	tcb->sndsyntime = NOW;
 
 	/* set desired mss and scale */
-	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+	tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 }
 
 void
@@ -1229,7 +1327,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 
-	netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+	netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
 
 	tpriv = tcp->priv;
 
@@ -1307,7 +1405,7 @@
  *  send a reset to the remote side and close the conversation
  *  called with s qlocked
  */
-char*
+static char*
 tcphangup(Conv *s)
 {
 	Tcp seg;
@@ -1322,7 +1420,7 @@
 			memset(&seg, 0, sizeof seg);
 			seg.flags = RST | ACK;
 			seg.ack = tcb->rcv.nxt;
-			tcb->rcv.una = 0;
+			tcb->rcv.ackptr = seg.ack;
 			seg.seq = tcb->snd.ptr;
 			seg.wnd = 0;
 			seg.urg = 0;
@@ -1353,7 +1451,7 @@
 /*
  *  (re)send a SYN ACK
  */
-int
+static int
 sndsynack(Proto *tcp, Limbo *lp)
 {
 	Block *hbp;
@@ -1360,7 +1458,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 	Tcp seg;
-	int scale;
+	uint scale;
 
 	/* make pseudo header */
 	switch(lp->version) {
@@ -1388,11 +1486,12 @@
 		panic("sndrst: version %d", lp->version);
 	}
 
+	memset(&seg, 0, sizeof seg);
 	seg.seq = lp->iss;
 	seg.ack = lp->irs+1;
 	seg.flags = SYN|ACK;
 	seg.urg = 0;
-	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+	seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
 	seg.wnd = QMAX;
 
 	/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
 	}
 }
 
+static void
+initialwindow(Tcpctl *tcb)
+{
+	/* RFC 3390 initial window */
+	if(tcb->mss < 1095)
+		tcb->cwind = 4*tcb->mss;
+	else if(tcb->mss < 2190)
+		tcb->cwind = 4380;
+	else
+		tcb->cwind = 2*tcb->mss;
+}
+
 /*
  *  come here when we finally get an ACK to our SYN-ACK.
  *  lookup call in limbo.  if found, create a new conversation
@@ -1596,7 +1707,7 @@
 	/* find a call in limbo */
 	h = hashipa(src, segp->source);
 	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
-		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+		netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
 			src, segp->source, lp->raddr, lp->rport,
 			dst, segp->dest, lp->laddr, lp->lport,
 			version, lp->version
@@ -1611,7 +1722,7 @@
 
 		/* we're assuming no data with the initial SYN */
 		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
-			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
 				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
 			lp = nil;
 		} else {
@@ -1641,6 +1752,8 @@
 
 	tcb->irs = lp->irs;
 	tcb->rcv.nxt = tcb->irs+1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 
 	tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
 	tcb->snd.una = tcb->iss+1;
 	tcb->snd.ptr = tcb->iss+1;
 	tcb->snd.nxt = tcb->iss+1;
+	tcb->snd.rxt = tcb->iss+1;
 	tcb->flgcnt = 0;
 	tcb->flags |= SYNACK;
 
+	/* set desired mss and scale */
+	tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
 	/* our sending max segment size cannot be bigger than what he asked for */
 	if(lp->mss != 0 && lp->mss < tcb->mss)
 		tcb->mss = lp->mss;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* window scaling */
 	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
 
-	/* the congestion window always starts out as a single segment */
+	/* congestion window */
 	tcb->snd.wnd = segp->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 
 	/* set initial round trip time */
 	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
 	return new;
 }
 
-int
+static int
 seq_within(ulong x, ulong low, ulong high)
 {
 	if(low <= high){
@@ -1714,25 +1832,25 @@
 	return 0;
 }
 
-int
+static int
 seq_lt(ulong x, ulong y)
 {
 	return (int)(x-y) < 0;
 }
 
-int
+static int
 seq_le(ulong x, ulong y)
 {
 	return (int)(x-y) <= 0;
 }
 
-int
+static int
 seq_gt(ulong x, ulong y)
 {
 	return (int)(x-y) > 0;
 }
 
-int
+static int
 seq_ge(ulong x, ulong y)
 {
 	return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
  *  use the time between the first SYN and it's ack as the
  *  initial round trip time
  */
-void
+static void
 tcpsynackrtt(Conv *s)
 {
 	Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
 	tcphalt(tpriv, &tcb->rtt_timer);
 }
 
-void
+static void
 update(Conv *s, Tcp *seg)
 {
 	int rtt, delta;
 	Tcpctl *tcb;
 	ulong acked;
-	ulong expand;
 	Tcppriv *tpriv;
 
+	if(seg->update)
+		return;
+	seg->update = 1;
+
 	tpriv = s->p->priv;
 	tcb = (Tcpctl*)s->ptcl;
 
-	/* if everything has been acked, force output(?) */
-	if(seq_gt(seg->ack, tcb->snd.nxt)) {
-		tcb->flags |= FORCE;
-		return;
+	/* catch zero-window updates, update window & recover */
+	if(tcb->snd.wnd == 0 && seg->wnd > 0)
+	if(seq_lt(seg->ack,  tcb->snd.ptr)){
+		netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+			seg->ack,  tcb->snd.una, tcb->snd.ptr, seg->wnd);
+		tcb->snd.wnd = seg->wnd;
+		goto recovery;
 	}
 
-	/* added by Dong Lin for fast retransmission */
-	if(seg->ack == tcb->snd.una
-	&& tcb->snd.una != tcb->snd.nxt
-	&& seg->len == 0
-	&& seg->wnd == tcb->snd.wnd) {
-
-		/* this is a pure ack w/o window update */
-		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
-			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
-		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
-			/*
-			 *  tahoe tcp rxt the packet, half sshthresh,
- 			 *  and set cwnd to one packet
-			 */
+	/* newreno fast retransmit */
+	if(seg->ack == tcb->snd.una)
+	if(tcb->snd.una != tcb->snd.nxt)
+	if(++tcb->snd.dupacks == 3){
+recovery:
+		if(tcb->snd.recovery){
+			tpriv->stats[RecoveryCwind]++;
+			tcb->cwind += tcb->mss;
+		}else if(seq_le(tcb->snd.rxt, seg->ack)){
+			tpriv->stats[Recovery]++;
+			tcb->abcbytes = 0;
 			tcb->snd.recovery = 1;
+			tcb->snd.partialack = 0;
 			tcb->snd.rxt = tcb->snd.nxt;
-			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+			tcpcongestion(tcb);
+			tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+			netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+				tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
 			tcprxmit(s);
-		} else {
-			/* do reno tcp here. */
+		}else{
+			tpriv->stats[RecoveryNoSeq]++;
+			netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+				tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+			/* do not enter fast retransmit */
+			/* do not change ssthresh */
 		}
+	}else if(tcb->snd.recovery){
+		tpriv->stats[RecoveryCwind]++;
+		tcb->cwind += tcb->mss;
 	}
 
 	/*
@@ -1807,6 +1938,9 @@
 	 */
 	if(seq_gt(seg->ack, tcb->snd.wl2)
 	|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+		/* clear dupack if we advance wl2 */
+		if(tcb->snd.wl2 != seg->ack)
+			tcb->snd.dupacks = 0;
 		tcb->snd.wnd = seg->wnd;
 		tcb->snd.wl2 = seg->ack;
 	}
@@ -1816,22 +1950,11 @@
 		 *  don't let us hangup if sending into a closed window and
 		 *  we're still getting acks
 		 */
-		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
 			tcb->backedoff = MAXBACKMS/4;
-		}
 		return;
 	}
 
-	/*
-	 *  any positive ack turns off fast rxt,
-	 *  (should we do new-reno on partial acks?)
-	 */
-	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
-		tcb->snd.dupacks = 0;
-		tcb->snd.recovery = 0;
-	} else
-		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
 	/* Compute the new send window size */
 	acked = seg->ack - tcb->snd.una;
 
@@ -1843,24 +1966,41 @@
 		goto done;
 	}
 
-	/* slow start as long as we're not recovering from lost packets */
-	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
-		if(tcb->cwind < tcb->ssthresh) {
-			expand = tcb->mss;
-			if(acked < expand)
-				expand = acked;
+	/*
+	 *  congestion control
+	 */
+	if(tcb->snd.recovery){
+		if(seq_ge(seg->ack, tcb->snd.rxt)){
+			/* recovery finished; deflate window */
+			tpriv->stats[RecoveryDone]++;
+			tcb->snd.dupacks = 0;
+			tcb->snd.recovery = 0;
+			tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+			if(tcb->ssthresh < tcb->cwind)
+				tcb->cwind = tcb->ssthresh;
+			netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+				tcb->cwind, tcb->ssthresh);
+		} else {
+			/* partial ack; we lost more than one segment */
+			tpriv->stats[RecoveryPA]++;
+			if(tcb->cwind > acked)
+				tcb->cwind -= acked;
+			else{
+				netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+				tcb->cwind = tcb->mss;
+			}
+			netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+				acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+			if(acked >= tcb->mss)
+				tcb->cwind += tcb->mss;
+			tcb->snd.partialack++;
 		}
-		else
-			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+	} else
+		tcpabcincr(tcb, acked);
 
-		if(tcb->cwind + expand < tcb->cwind)
-			expand = tcb->snd.wnd - tcb->cwind;
-		if(tcb->cwind + expand > tcb->snd.wnd)
-			expand = tcb->snd.wnd - tcb->cwind;
-		tcb->cwind += expand;
-	}
-
 	/* Adjust the timers according to the round trip time */
+	/* todo: fix sloppy treatment of overflow cases here. */
 	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
 		tcphalt(tpriv, &tcb->rtt_timer);
 		if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
 done:
 	if(qdiscard(s->wq, acked) < acked)
 		tcb->flgcnt--;
-
 	tcb->snd.una = seg->ack;
+
+	/* newreno fast recovery */
+	if(tcb->snd.recovery)
+		tcprxmit(s);
+
 	if(seq_gt(seg->ack, tcb->snd.urg))
 		tcb->snd.urg = seg->ack;
 
-	if(tcb->snd.una != tcb->snd.nxt)
-		tcpgo(tpriv, &tcb->timer);
+	if(tcb->snd.una != tcb->snd.nxt){
+		/* “impatient” variant */
+		if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+			tcb->time = NOW;
+			tcb->timeuna = tcb->snd.una;
+			tcpgo(tpriv, &tcb->timer);
+		}
+	}
 	else
 		tcphalt(tpriv, &tcb->timer);
 
@@ -1904,12 +2054,13 @@
 	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
 		tcb->snd.ptr = tcb->snd.una;
 
-	tcb->flags &= ~RETRAN;
+	if(!tcb->snd.recovery)
+		tcb->flags &= ~RETRAN;
 	tcb->backoff = 0;
 	tcb->backedoff = 0;
 }
 
-void
+static void
 tcpiput(Proto *tcp, Ipifc*, Block *bp)
 {
 	Tcp seg;
@@ -1917,7 +2068,7 @@
 	Tcp6hdr *h6;
 	int hdrlen;
 	Tcpctl *tcb;
-	ushort length;
+	ushort length, csum;
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	Conv *s;
 	Fs *f;
@@ -1980,10 +2131,12 @@
 		h6->ttl = proto;
 		hnputl(h6->vcf, length);
 		if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
-			ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+		    (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
 			tpriv->stats[CsumErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp proto cksum\n");
+			netlog(f, Logtcp,
+			    "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+				h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
 			freeblist(bp);
 			return;
 		}
@@ -1995,7 +2148,7 @@
 		if(hdrlen < 0){
 			tpriv->stats[HlenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp hdr len\n");
+			netlog(f, Logtcp, "bad tcpv6 hdr len\n");
 			return;
 		}
 
@@ -2005,7 +2158,7 @@
 		if(bp == nil){
 			tpriv->stats[LenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "tcp len < 0 after trim\n");
+			netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
 			return;
 		}
 	}
@@ -2016,7 +2169,8 @@
 	/* Look for a matching conversation */
 	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
 	if(s == nil){
-		netlog(f, Logtcp, "iphtlook failed");
+		netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+			source, seg.source, dest, seg.dest);
 reset:
 		qunlock(tcp);
 		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
 	}
 
 	/* Cut the data to fit the receive window */
+	tcprcvwin(s);
 	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
-		netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+		if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+		netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n", 
+			seg.seq, seg.seq + length - 1, 
+			tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
 		update(s, &seg);
 		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
 			tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
 	if(seg.seq != tcb->rcv.nxt)
 	if(length != 0 || (seg.flags & (SYN|FIN))) {
 		update(s, &seg);
-		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+		if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
 			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
-		tcb->flags |= FORCE;
+		tcb->flags |= FORCE;		/* force duplicate ack; RFC 5681 §3.2 */
 		goto output;
 	}
 
+	if(tcb->nreseq > 0)
+		tcb->flags |= FORCE;		/* filled hole in sequence space; RFC 5681 §3.2 */
+
 	/*
 	 *  keep looping till we've processed this packet plus any
 	 *  adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
 				goto raise;
 			}
 		case Time_wait:
-			tcb->flags |= FORCE;
+			if(seg.flags & FIN)
+				tcb->flags |= FORCE;
 			if(tcb->timer.state != TcptimerON)
 				tcpgo(tpriv, &tcb->timer);
 		}
@@ -2272,34 +2434,12 @@
 				 * receive queue
 				 */
 				if(bp) {
-					bp = packblock(bp);
-					if(bp == nil)
-						panic("tcp packblock");
-					qpassnolim(s->rq, bp);
+					qpassnolim(s->rq, packblock(bp));
 					bp = nil;
-
-					/*
-					 *  Force an ack every 2 data messages.  This is
-					 *  a hack for rob to make his home system run
-					 *  faster.
-					 *
-					 *  this also keeps the standard TCP congestion
-					 *  control working since it needs an ack every
-					 *  2 max segs worth.  This is not quite that,
-					 *  but under a real stream is equivalent since
-					 *  every packet has a max seg in it.
-					 */
-					if(++(tcb->rcv.una) >= 2)
-						tcb->flags |= FORCE;
 				}
 				tcb->rcv.nxt += length;
 
 				/*
-				 *  update our rcv window
-				 */
-				tcprcvwin(s);
-
-				/*
 				 *  turn on the acktimer if there's something
 				 *  to ack
 				 */
@@ -2373,8 +2513,11 @@
 
 			getreseq(tcb, &seg, &bp, &length);
 
-			if(tcptrim(tcb, &seg, &bp, &length) == 0)
+			tcprcvwin(s);
+			if(tcptrim(tcb, &seg, &bp, &length) == 0){
+				tcb->flags |= FORCE;
 				break;
+			}
 		}
 	}
 output:
@@ -2394,15 +2537,15 @@
  *  the lock to ipoput the packet so some care has to be
  *  taken by callers.
  */
-void
+static void
 tcpoutput(Conv *s)
 {
 	Tcp seg;
-	int msgs;
+	uint msgs;
 	Tcpctl *tcb;
 	Block *hbp, *bp;
-	int sndcnt, n;
-	ulong ssize, dsize, usable, sent;
+	int sndcnt;
+	ulong ssize, dsize, sent;
 	Fs *f;
 	Tcppriv *tpriv;
 	uchar version;
@@ -2411,9 +2554,26 @@
 	tpriv = s->p->priv;
 	version = s->ipversion;
 
-	for(msgs = 0; msgs < 100; msgs++) {
-		tcb = (Tcpctl*)s->ptcl;
+	tcb = (Tcpctl*)s->ptcl;
 
+	/* force ack every 2*mss */
+	if((tcb->flags & FORCE) == 0)
+	if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+		tpriv->stats[Delayack]++;
+		tcb->flags |= FORCE;
+	}
+
+	/* force ack if window opening */
+	if(0)
+	if((tcb->flags & FORCE) == 0){
+		tcprcvwin(s);
+		if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+			tpriv->stats[Wopenack]++;
+			tcb->flags |= FORCE;
+		}
+	}
+
+	for(msgs = 0; msgs < 100; msgs++) {
 		switch(tcb->state) {
 		case Listen:
 		case Closed:
@@ -2421,7 +2581,12 @@
 			return;
 		}
 
+		/* Don't send anything else until our SYN has been acked */
+		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+			break;
+
 		/* force an ack when a window has opened up */
+		tcprcvwin(s);
 		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
 			tcb->rcv.blocked = 0;
 			tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
 
 		sndcnt = qlen(s->wq)+tcb->flgcnt;
 		sent = tcb->snd.ptr - tcb->snd.una;
-
-		/* Don't send anything else until our SYN has been acked */
-		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
-			break;
-
-		/* Compute usable segment based on offered window and limit
-		 * window probes to one
-		 */
+		ssize = sndcnt;
 		if(tcb->snd.wnd == 0){
-			if(sent != 0) {
-				if((tcb->flags&FORCE) == 0)
-					break;
-//				tcb->snd.ptr = tcb->snd.una;
+			/* zero window probe */
+			if(sent > 0)
+			if(!(tcb->flags & FORCE))
+				break;	/* already probing, rto re-probes */
+			if(ssize < sent)
+				ssize = 0;
+			else{
+				ssize -= sent;
+				if(ssize > 0)
+					ssize = 1;
 			}
-			usable = 1;
+		} else {
+			/* calculate usable segment size */
+			if(ssize > tcb->cwind)
+				ssize = tcb->cwind;
+			if(ssize > tcb->snd.wnd)
+				ssize = tcb->snd.wnd;
+
+			if(ssize < sent)
+				ssize = 0;
+			else {
+				ssize -= sent;
+				if(ssize > tcb->mss)
+					ssize = tcb->mss;
+			}
 		}
-		else {
-			usable = tcb->cwind;
-			if(tcb->snd.wnd < usable)
-				usable = tcb->snd.wnd;
-			usable -= sent;
-		}
-		ssize = sndcnt-sent;
-		if(ssize && usable < 2)
-			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
-				tcb->snd.wnd, tcb->cwind);
-		if(usable < ssize)
-			ssize = usable;
-		if(tcb->mss < ssize)
-			ssize = tcb->mss;
+
 		dsize = ssize;
 		seg.urg = 0;
 
-		if(ssize == 0)
-		if((tcb->flags&FORCE) == 0)
-			break;
+		if(!(tcb->flags & FORCE)){
+			if(ssize == 0)
+				break;
+			if(ssize < tcb->mss)
+			if(tcb->snd.nxt == tcb->snd.ptr)
+			if(sent > TCPREXMTTHRESH*tcb->mss)
+				break;
+		}
 
 		tcb->flags &= ~FORCE;
-		tcprcvwin(s);
 
 		/* By default we will generate an ack */
 		tcphalt(tpriv, &tcb->acktimer);
-		tcb->rcv.una = 0;
 		seg.source = s->lport;
 		seg.dest = s->rport;
 		seg.flags = ACK;
 		seg.mss = 0;
 		seg.ws = 0;
+		seg.update = 0;
 		switch(tcb->state){
 		case Syn_sent:
 			seg.flags = 0;
@@ -2516,20 +2684,9 @@
 			}
 		}
 
-		if(sent+dsize == sndcnt)
+		if(sent+dsize == sndcnt && dsize)
 			seg.flags |= PSH;
 
-		/* keep track of balance of resent data */
-		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
-			n = tcb->snd.nxt - tcb->snd.ptr;
-			if(ssize < n)
-				n = ssize;
-			tcb->resent += n;
-			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
-				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
-			tpriv->stats[RetransSegs]++;
-		}
-
 		tcb->snd.ptr += ssize;
 
 		/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
 		 * expect acknowledges
 		 */
 		if(ssize != 0){
-			if(tcb->timer.state != TcptimerON)
+			if(tcb->timer.state != TcptimerON){
+				tcb->time = NOW;
+				tcb->timeuna = tcb->snd.una;
 				tcpgo(tpriv, &tcb->timer);
+			}
 
 			/*  If round trip timer isn't running, start it.
 			 *  measure the longest packet only in case the
 			 *  transmission time dominates RTT
 			 */
+			if(tcb->snd.retransmit == 0)
 			if(tcb->rtt_timer.state != TcptimerON)
 			if(ssize == tcb->mss) {
 				tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
 		}
 
 		tpriv->stats[OutSegs]++;
+		if(tcb->snd.retransmit)
+			tpriv->stats[RetransSegsSent]++;
+		tcb->rcv.ackptr = seg.ack;
+		tcb->rcv.wsnt = tcb->rcv.wptr;
 
 		/* put off the next keep alive */
 		tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
 		default:
 			panic("tcpoutput2: version %d", version);
 		}
-		if((msgs%4) == 1){
+		if((msgs%4) == 3){
 			qunlock(s);
-			sched();
 			qlock(s);
 		}
 	}
@@ -2611,7 +2775,7 @@
 /*
  *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
  */
-void
+static void
 tcpsendka(Conv *s)
 {
 	Tcp seg;
@@ -2621,6 +2785,7 @@
 	tcb = (Tcpctl*)s->ptcl;
 
 	dbp = nil;
+	memset(&seg, 0, sizeof seg);
 	seg.urg = 0;
 	seg.source = s->lport;
 	seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
 	else
 		seg.seq = tcb->snd.una-1;
 	seg.ack = tcb->rcv.nxt;
-	tcb->rcv.una = 0;
+	tcb->rcv.ackptr = seg.ack;
+	tcprcvwin(s);
 	seg.wnd = tcb->rcv.wnd;
 	if(tcb->state == Finwait2){
 		seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
 /*
  *  set connection to time out after 12 minutes
  */
-void
+static void
 tcpsetkacounter(Tcpctl *tcb)
 {
 	tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
  *  if we've timed out, close the connection
  *  otherwise, send a keepalive and restart the timer
  */
-void
+static void
 tcpkeepalive(void *v)
 {
 	Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
 /*
  *  start keepalive timer
  */
-char*
+static char*
 tcpstartka(Conv *s, char **f, int n)
 {
 	Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
 /*
  *  turn checksums on/off
  */
-char*
+static char*
 tcpsetchecksum(Conv *s, char **f, int)
 {
 	Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
 	return nil;
 }
 
-void
+/*
+ *  retransmit (at most) one segment at snd.una.
+ *  preserve cwind & snd.ptr
+ */
+static void
 tcprxmit(Conv *s)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	ulong tcwind, tptr;
 
 	tcb = (Tcpctl*)s->ptcl;
-
 	tcb->flags |= RETRAN|FORCE;
-	tcb->snd.ptr = tcb->snd.una;
 
-	/*
-	 *  We should be halving the slow start threshhold (down to one
-	 *  mss) but leaving it at mss seems to work well enough
-	 */
- 	tcb->ssthresh = tcb->mss;
-
-	/*
-	 *  pull window down to a single packet
-	 */
+	tptr = tcb->snd.ptr;
+	tcwind = tcb->cwind;
+	tcb->snd.ptr = tcb->snd.una;
 	tcb->cwind = tcb->mss;
+	tcb->snd.retransmit = 1;
 	tcpoutput(s);
+	tcb->snd.retransmit = 0;
+	tcb->cwind = tcwind;
+	tcb->snd.ptr = tptr;
+
+	tpriv = s->p->priv;
+	tpriv->stats[RetransSegs]++;
 }
 
-void
+/*
+ *  todo: RFC 4138 F-RTO
+ */
+static void
 tcptimeout(void *arg)
 {
 	Conv *s;
@@ -2792,11 +2966,29 @@
 			localclose(s, Etimedout);
 			break;
 		}
-		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+		netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+			tcb->srtt, tcb->mdev, NOW-tcb->time,
+			tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+			tcpstates[s->state]);
 		tcpsettimer(tcb);
+		if(tcb->snd.rto == 0)
+			tcpcongestion(tcb);
 		tcprxmit(s);
+		tcb->snd.ptr = tcb->snd.una;
+		tcb->cwind = tcb->mss;
+		tcb->snd.rto = 1;
 		tpriv->stats[RetransTimeouts]++;
-		tcb->snd.dupacks = 0;
+
+		if(tcb->snd.recovery){
+			tcb->snd.dupacks = 0;			/* reno rto */
+			tcb->snd.recovery = 0;
+			tpriv->stats[RecoveryRTO]++;
+			tcb->snd.rxt = tcb->snd.nxt;
+			netlog(s->p->f, Logtcpwin,
+				"rto recovery rxt @%lud\n", tcb->snd.nxt);
+		}
+
+		tcb->abcbytes = 0;
 		break;
 	case Time_wait:
 		localclose(s, nil);
@@ -2808,7 +3000,7 @@
 	poperror();
 }
 
-int
+static int
 inwindow(Tcpctl *tcb, int seq)
 {
 	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
 /*
  *  set up state for a received SYN (or SYN ACK) packet
  */
-void
+static void
 procsyn(Conv *s, Tcp *seg)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
 
 	tcb = (Tcpctl*)s->ptcl;
 	tcb->flags |= FORCE;
 
 	tcb->rcv.nxt = seg->seq + 1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 	tcb->irs = seg->seq;
 
 	/* our sending max segment size cannot be bigger than what he asked for */
-	if(seg->mss != 0 && seg->mss < tcb->mss)
+	if(seg->mss != 0 && seg->mss < tcb->mss) {
 		tcb->mss = seg->mss;
+		tpriv = s->p->priv;
+		tpriv->stats[Mss] = tcb->mss;
+	}
 
-	/* the congestion window always starts out as a single segment */
+	/* if the server does not support ws option, disable window scaling */
+	if(seg->ws == 0){
+		tcb->scale = 0;
+		tcb->snd.scale = 0;
+	}
+
 	tcb->snd.wnd = seg->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 }
 
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
 {
-	Reseq *rp, *rp1;
-	int i, rqlen, qmax;
+	Reseq *r, *next;
 
+	for(r = tcb->reseq; r != nil; r = next){
+		next = r->next;
+		freeblist(r->bp);
+		free(r);
+	}
+	tcb->reseq = nil;
+	tcb->nreseq = 0;
+	tcb->reseqlen = 0;
+	return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+	char *s;
+
+	for(; r != nil; r = r->next){
+		s = nil;
+		if(r->next == nil && r->seg.seq != n)
+			s = "hole/end";
+		else if(r->next == nil)
+			s = "end";
+		else if(r->seg.seq != n)
+			s = "hole";
+		if(s != nil)
+			netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+				n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+		n = r->seg.seq + r->seg.len;
+	}
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+	Reseq *rp, **rr;
+	int qmax;
+
 	rp = malloc(sizeof(Reseq));
 	if(rp == nil){
-		freeblist(bp);	/* bp always consumed by add_reseq */
+		freeblist(bp);	/* bp always consumed by addreseq */
 		return 0;
 	}
 
@@ -2854,56 +3093,39 @@
 	rp->bp = bp;
 	rp->length = length;
 
-	/* Place on reassembly list sorting by starting seq number */
-	rp1 = tcb->reseq;
-	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
-		rp->next = rp1;
-		tcb->reseq = rp;
-		if(rp->next != nil)
-			tpriv->stats[OutOfOrder]++;
-		return 0;
-	}
+	tcb->reseqlen += length;
+	tcb->nreseq++;
 
-	rqlen = 0;
-	for(i = 0;; i++) {
-		rqlen += rp1->length;
-		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
-			rp->next = rp1->next;
-			rp1->next = rp;
+	/* Place on reassembly list sorting by starting seq number */
+	for(rr = &tcb->reseq;; rr = &(*rr)->next)
+		if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+			rp->next = *rr;
+			*rr = rp;
+			tpriv->stats[Resequenced]++;
 			if(rp->next != nil)
 				tpriv->stats[OutOfOrder]++;
 			break;
 		}
-		rp1 = rp1->next;
-	}
-	qmax = QMAX<<tcb->rcv.scale;
-	if(rqlen > qmax){
-		print("resequence queue > window: %d > %d\n", rqlen, qmax);
-		i = 0;
-	  	for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
-	  		print("%#lux %#lux %#ux\n", rp1->seg.seq,
-	  			rp1->seg.ack, rp1->seg.flags);
-			if(i++ > 10){
-				print("...\n");
-				break;
-			}
-		}
 
-		// delete entire reassembly queue; wait for retransmit.
-		// - should we be smarter and only delete the tail?
-		for(rp = tcb->reseq; rp != nil; rp = rp1){
-			rp1 = rp->next;
-			freeblist(rp->bp);
-			free(rp);
-		}
-		tcb->reseq = nil;
-
-	  	return -1;
+	qmax = tcb->window;
+	if(tcb->reseqlen > qmax){
+		netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqBytelim]++;
+		return dumpreseq(tcb);
 	}
+	qmax = tcb->window / tcb->mss;		/* ~190 for qscale==2, 390 for qscale=3 */
+	if(tcb->nreseq > qmax){
+		netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqPktlim]++;
+		return dumpreseq(tcb);
+	}
+
 	return 0;
 }
 
-void
+static void
 getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	Reseq *rp;
@@ -2918,10 +3140,13 @@
 	*bp = rp->bp;
 	*length = rp->length;
 
+	tcb->nreseq--;
+	tcb->reseqlen -= rp->length;
+
 	free(rp);
 }
 
-int
+static int
 tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	ushort len;
@@ -2992,7 +3217,7 @@
 	return 0;
 }
 
-void
+static void
 tcpadvise(Proto *tcp, Block *bp, char *msg)
 {
 	Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
 		v4tov6(source, h4->tcpsrc);
 		psource = nhgets(h4->tcpsport);
 		pdest = nhgets(h4->tcpdport);
-	}
-	else {
+	} else {
 		ipmove(dest, h6->tcpdst);
 		ipmove(source, h6->tcpsrc);
 		psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
 
 	/* Look for a connection */
 	qlock(tcp);
-	for(p = tcp->conv; *p; p++) {
-		s = *p;
+	for(p = tcp->conv; (s = *p) != nil; p++) {
 		tcb = (Tcpctl*)s->ptcl;
 		if(s->rport == pdest)
 		if(s->lport == psource)
@@ -3029,6 +3252,8 @@
 		if(tcb->state != Closed)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qlock(s);
 			qunlock(tcp);
 			switch(tcb->state){
@@ -3058,9 +3283,11 @@
 }
 
 /* called with c qlocked */
-char*
+static char*
 tcpctl(Conv* c, char** f, int n)
 {
+	if(n == 1 && strcmp(f[0], "close") == 0)
+		return tcpclose(c), nil;
 	if(n == 1 && strcmp(f[0], "hangup") == 0)
 		return tcphangup(c);
 	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
 	return "unknown control request";
 }
 
-int
+static int
 tcpstats(Proto *tcp, char *buf, int len)
 {
 	Tcppriv *priv;
@@ -3083,7 +3310,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -3096,7 +3323,7 @@
  *  of questionable validity so we try to use them only when we're
  *  up against the wall.
  */
-int
+static int
 tcpgc(Proto *tcp)
 {
 	Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
 	Tcpctl *tcb;
 
 
-	n = natgc(tcp->ipproto);
+	n = 0;
 	ep = &tcp->conv[tcp->nc];
 	for(pp = tcp->conv; pp < ep; pp++) {
 		c = *pp;
@@ -3116,13 +3343,13 @@
 		switch(tcb->state){
 		case Syn_received:
 			if(NOW - tcb->time > 5000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
 		case Finwait2:
 			if(NOW - tcb->time > 5*60*1000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
@@ -3132,7 +3359,7 @@
 	return n;
 }
 
-void
+static void
 tcpsettimer(Tcpctl *tcb)
 {
 	int x;
@@ -3141,9 +3368,9 @@
 	x = backoff(tcb->backoff) *
 		(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
 
-	/* bounded twixt 1/2 and 64 seconds */
-	if(x < 500/MSPTICK)
-		x = 500/MSPTICK;
+	/* bounded twixt 0.3 and 64 seconds */
+	if(x < 300/MSPTICK)
+		x = 300/MSPTICK;
 	else if(x > (64000/MSPTICK))
 		x = 64000/MSPTICK;
 	tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
 	Fsproto(fs, tcp);
 }
 
-void
+static void
 tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
 {
-	if(rcvscale){
-		tcb->rcv.scale = rcvscale & 0xff;
-		tcb->snd.scale = sndscale & 0xff;
-		tcb->window = QMAX<<tcb->snd.scale;
-		qsetlimit(s->rq, tcb->window);
-	} else {
-		tcb->rcv.scale = 0;
-		tcb->snd.scale = 0;
-		tcb->window = QMAX;
-		qsetlimit(s->rq, tcb->window);
-	}
+	/*
+	 * guess at reasonable queue sizes.  there's no current way 
+	 * to know how many nic receive buffers we can safely tie up in the
+	 * tcp stack, and we don't adjust our queues to maximize throughput
+	 * and minimize bufferbloat.  n.b. the offer (rcvscale) needs to be
+	 * respected, but we still control our own buffer commitment by
+	 * keeping a seperate qscale.
+	 */
+	tcb->rcv.scale = rcvscale & 0xff;
+	tcb->snd.scale = sndscale & 0xff;
+	tcb->qscale = rcvscale & 0xff;
+	if(rcvscale > Maxqscale)
+		tcb->qscale = Maxqscale;
+
+	if(rcvscale != tcb->rcv.scale)
+		netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+			tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+	tcb->window = QMAX<<tcb->qscale;
+	tcb->ssthresh = tcb->window;
+
+	/*
+	 * it's important to set wq large enough to cover the full
+	 * bandwidth-delay product.  it's possible to be in loss
+	 * recovery with a big window, and we need to keep sending
+	 * into the inflated window.  the difference can be huge
+	 * for even modest (70ms) ping times.
+	 */
+	qsetlimit(s->rq, QMAX<<tcb->qscale);
+	qsetlimit(s->wq, QMAX<<tcb->qscale);
+	tcprcvwin(s);
 }
diff -u a/os/ip//udp.c b/os/ip//udp.c
--- a/os/ip//udp.c
+++ b/os/ip//udp.c
@@ -24,7 +24,6 @@
 
 	IP_UDPPROTO	= 17,
 	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
 
 	Udprxms		= 200,
 	Udptickms	= 100,
@@ -40,7 +39,7 @@
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	udpproto;	/* Protocol */
 	uchar	udpplen[2];	/* Header plus data length */
 	uchar	udpsrc[IPv4addrlen];	/* Ip source */
@@ -73,10 +72,10 @@
 typedef struct Udpstats Udpstats;
 struct Udpstats
 {
-	ulong	udpInDatagrams;
+	uvlong	udpInDatagrams;
 	ulong	udpNoPorts;
 	ulong	udpInErrors;
-	ulong	udpOutDatagrams;
+	uvlong	udpOutDatagrams;
 };
 
 typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
 typedef struct Udpcb Udpcb;
 struct Udpcb
 {
-	QLock;
 	uchar	headers;
 };
 
@@ -125,7 +123,7 @@
 static int
 udpstate(Conv *c, char *state, int n)
 {
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		c->inuse ? "Open" : "Closed",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
 static void
 udpcreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->rq = qopen(512*1024, Qmsg, 0, 0);
 	c->wq = qbypass(udpkick, c);
 }
 
@@ -175,8 +173,6 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	ucb->headers = 0;
-
-	qunlock(c);
 }
 
 void
@@ -192,12 +188,13 @@
 	Udppriv *upriv;
 	Fs *f;
 	int version;
-	Conv *rc;
+	Routehint *rh;
+	ushort csum;
 
 	upriv = c->p->priv;
 	f = c->p->f;
 
-	netlog(c->p->f, Logudp, "udp: kick\n");
+//	netlog(c->p->f, Logudp, "udp: kick\n");	/* frequent and uninteresting */
 	if(bp == nil)
 		return;
 
@@ -219,21 +216,6 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-		bp->rp += 2+2;			/* Ignore local port */
-		break;
 	default:
 		rport = 0;
 		break;
@@ -240,18 +222,12 @@
 	}
 
 	if(ucb->headers) {
-		if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
-		    ipcmp(laddr, IPnoaddr) == 0)
+		if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
 			version = V4;
 		else
 			version = V6;
 	} else {
-		if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-			memcmp(c->laddr, v4prefix, IPv4off) == 0)
-			|| ipcmp(c->raddr, IPnoaddr) == 0)
-			version = V4;
-		else
-			version = V6;
+		version = convipvers(c);
 	}
 
 	dlen = blocklen(bp);
@@ -260,9 +236,6 @@
 	switch(version){
 	case V4:
 		bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
 		uh4 = (Udp4hdr *)(bp->rp);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
 		uh4->Unused = 0;
@@ -274,7 +247,7 @@
 			v6tov4(uh4->udpdst, raddr);
 			hnputs(uh4->udpdport, rport);
 			v6tov4(uh4->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			v6tov4(uh4->udpdst, c->raddr);
 			hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			v6tov4(uh4->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh4->udpsport, c->lport);
 		hnputs(uh4->udplen, ptcllen);
 		uh4->udpcksum[0] = 0;
 		uh4->udpcksum[1] = 0;
-		hnputs(uh4->udpcksum, 
-		       ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+		csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh4->udpcksum, csum);
 		uh4->vihl = IP_VER4;
-		ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput4(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	case V6:
+		/*
+		 * using the v6 ip header to create pseudo header
+		 * first then reset it to the normal ip header
+		 */
 		bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
-		// using the v6 ip header to create pseudo header 
-		// first then reset it to the normal ip header
 		uh6 = (Udp6hdr *)(bp->rp);
 		memset(uh6, 0, 8);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
 			ipmove(uh6->udpdst, raddr);
 			hnputs(uh6->udpdport, rport);
 			ipmove(uh6->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			ipmove(uh6->udpdst, c->raddr);
 			hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			ipmove(uh6->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh6->udpsport, c->lport);
 		hnputs(uh6->udplen, ptcllen);
 		uh6->udpcksum[0] = 0;
 		uh6->udpcksum[1] = 0;
-		hnputs(uh6->udpcksum, 
-		       ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+		csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh6->udpcksum, csum);
 		memset(uh6, 0, 8);
 		uh6->viclfl[0] = IP_VER6;
 		hnputs(uh6->len, ptcllen);
 		uh6->nextheader = IP_UDPPROTO;
-		ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput6(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	default:
@@ -360,10 +336,8 @@
 	uh4 = (Udp4hdr*)(bp->rp);
 	version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
 
-	/*
-	 * Put back pseudo header for checksum 
-	 * (remember old values for icmpnoconv())
-	 */
+	/* Put back pseudo header for checksum
+	 * (remember old values for icmpnoconv()) */
 	switch(version) {
 	case V4:
 		ottl = uh4->Unused;
@@ -423,7 +397,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.udpNoPorts++;
 		qunlock(udp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
 			icmpnoconv(f, bp);
 			break;
 		case V6:
-			icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+			icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
 			break;
 		default:
 			panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
 	if(c->state == Announced){
 		if(ucb->headers == 0){
 			/* create a new conversation */
-			if(ipforme(f, laddr) != Runi) {
-				switch(version){
-				case V4:
-					v4tov6(laddr, ifc->lifc->local);
-					break;
-				case V6:
-					ipmove(laddr, ifc->lifc->local);
-					break;
-				default:
-					panic("udpiput3: version %d", version);
-				}
-			}
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
 			c = Fsnewcall(c, raddr, rport, laddr, lport, version);
 			if(c == nil){
 				qunlock(udp);
@@ -507,33 +471,21 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	}
 
-	if(bp->next)
-		bp = concatblock(bp);
-
 	if(qfull(c->rq)){
-		qunlock(c);
-		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
-		       laddr, lport);
+		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
-		return;
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-
-	qpass(c->rq, bp);
 	qunlock(c);
 
 }
@@ -545,11 +497,13 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	if(n == 1){
-		if(strcmp(f[0], "oldheaders") == 0){
-			ucb->headers = 6;
+		if(strcmp(f[0], "hangup") == 0){
+			qhangup(c->rq, nil);
+			qhangup(c->wq, nil);
 			return nil;
-		} else if(strcmp(f[0], "headers") == 0){
-			ucb->headers = 7;
+		}
+		if(strcmp(f[0], "headers") == 0){
+			ucb->headers = 7;	/* new headers format */
 			return nil;
 		}
 	}
@@ -564,34 +518,25 @@
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	ushort psource, pdest;
 	Conv *s, **p;
-	int version;
 
 	h4 = (Udp4hdr*)(bp->rp);
-	version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+	h6 = (Udp6hdr*)(bp->rp);
 
-	switch(version) {
-	case V4:
+	if((h4->vihl&0xF0)==IP_VER4) {
 		v4tov6(dest, h4->udpdst);
 		v4tov6(source, h4->udpsrc);
 		psource = nhgets(h4->udpsport);
 		pdest = nhgets(h4->udpdport);
-		break;
-	case V6:
-		h6 = (Udp6hdr*)(bp->rp);
+	} else {
 		ipmove(dest, h6->udpdst);
 		ipmove(source, h6->udpsrc);
 		psource = nhgets(h6->udpsport);
 		pdest = nhgets(h6->udpdport);
-		break;
-	default:
-		panic("udpadvise: version %d", version);
-		return;  /* to avoid a warning */
 	}
 
 	/* Look for a connection */
 	qlock(udp);
-	for(p = udp->conv; *p; p++) {
-		s = *p;
+	for(p = udp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
 	Udppriv *upriv;
 
 	upriv = udp->priv;
-	return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+	return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+		"InErrors: %lud\nOutDatagrams: %llud\n",
 		upriv->ustats.udpInDatagrams,
 		upriv->ustats.udpNoPorts,
 		upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
 		upriv->ustats.udpOutDatagrams);
 }
 
-int
-udpgc(Proto *udp)
-{
-	return natgc(udp->ipproto);
-}
-
 void
 udpinit(Fs *fs)
 {
@@ -647,7 +587,6 @@
 	udp->rcv = udpiput;
 	udp->advise = udpadvise;
 	udp->stats = udpstats;
-	udp->gc = udpgc;
 	udp->ipproto = IP_UDPPROTO;
 	udp->nc = Nchans;
 	udp->ptclsize = sizeof(Udpcb);
--- /dev/null
+++ b/os/ip.original/arp.c
@@ -1,0 +1,681 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+/*
+ *  address resolution tables
+ */
+
+enum
+{
+	NHASH		= (1<<6),
+	NCACHE		= 256,
+
+	AOK		= 1,
+	AWAIT		= 2,
+};
+
+char *arpstate[] =
+{
+	"UNUSED",
+	"OK",
+	"WAIT",
+};
+
+/*
+ *  one per Fs
+ */
+struct Arp
+{
+	QLock;
+	Fs	*f;
+	Arpent	*hash[NHASH];
+	Arpent	cache[NCACHE];
+	Arpent	*rxmt;
+	Proc	*rxmitp;	/* neib sol re-transmit proc */
+	Rendez	rxmtq;
+	Block 	*dropf, *dropl;
+};
+
+char *Ebadarp = "bad arp";
+
+#define haship(s) ((s)[IPaddrlen-1]%NHASH)
+
+extern int 	ReTransTimer = RETRANS_TIMER;
+static void 	rxmitproc(void *v);
+
+void
+arpinit(Fs *f)
+{
+	f->arp = smalloc(sizeof(Arp));
+	f->arp->f = f;
+	f->arp->rxmt = nil;
+	f->arp->dropf = f->arp->dropl = nil;
+	kproc("rxmitproc", rxmitproc, f->arp, 0);
+}
+
+/*
+ *  create a new arp entry for an ip address.
+ */
+static Arpent*
+newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+{
+	uint t;
+	Block *next, *xp;
+	Arpent *a, *e, *f, **l;
+	Medium *m = ifc->m;
+	int empty;
+
+	/* find oldest entry */
+	e = &arp->cache[NCACHE];
+	a = arp->cache;
+	t = a->utime;
+	for(f = a; f < e; f++){
+		if(f->utime < t){
+			t = f->utime;
+			a = f;
+		}
+	}
+
+	/* dump waiting packets */
+	xp = a->hold;
+	a->hold = nil;
+
+	if(isv4(a->ip)){
+		while(xp){
+			next = xp->list;
+			freeblist(xp);
+			xp = next;
+		}
+	}
+	else {	// queue icmp unreachable for rxmitproc later on, w/o arp lock
+		if(xp){
+			if(arp->dropl == nil) 
+				arp->dropf = xp;
+			else
+				arp->dropl->list = xp;
+
+			for(next = xp->list; next; next = next->list)
+				xp = next;
+			arp->dropl = xp;
+			wakeup(&arp->rxmtq);
+		}
+	}
+
+	/* take out of current chain */
+	l = &arp->hash[haship(a->ip)];
+	for(f = *l; f; f = f->hash){
+		if(f == a){
+			*l = a->hash;
+			break;
+		}
+		l = &f->hash;
+	}
+
+	/* insert into new chain */
+	l = &arp->hash[haship(ip)];
+	a->hash = *l;
+	*l = a;
+
+	memmove(a->ip, ip, sizeof(a->ip));
+	a->utime = NOW;
+	a->ctime = 0;
+	a->type = m;
+
+	a->rtime = NOW + ReTransTimer;
+	a->rxtsrem = MAX_MULTICAST_SOLICIT;
+	a->ifc = ifc;
+	a->ifcid = ifc->ifcid;
+
+	/* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
+	if(!ipismulticast(a->ip) && addrxt){
+		l = &arp->rxmt;
+		empty = (*l==nil);
+
+		for(f = *l; f; f = f->nextrxt){
+			if(f == a){
+				*l = a->nextrxt;
+				break;
+			}
+			l = &f->nextrxt;
+		}
+		for(f = *l; f; f = f->nextrxt){
+			l = &f->nextrxt;
+		}
+		*l = a;
+		if(empty) 
+			wakeup(&arp->rxmtq);
+	}
+
+	a->nextrxt = nil;
+
+	return a;
+}
+
+/* called with arp qlocked */
+
+void
+cleanarpent(Arp *arp, Arpent *a)
+{
+	Arpent *f, **l;
+
+	a->utime = 0;
+	a->ctime = 0;
+	a->type = 0;
+	a->state = 0;
+	
+	/* take out of current chain */
+	l = &arp->hash[haship(a->ip)];
+	for(f = *l; f; f = f->hash){
+		if(f == a){
+			*l = a->hash;
+			break;
+		}
+		l = &f->hash;
+	}
+
+	/* take out of re-transmit chain */
+	l = &arp->rxmt;
+	for(f = *l; f; f = f->nextrxt){
+		if(f == a){
+			*l = a->nextrxt;
+			break;
+		}
+		l = &f->nextrxt;
+	}
+	a->nextrxt = nil;
+	a->hash = nil;
+	a->hold = nil;
+	a->last = nil;
+	a->ifc = nil;
+}
+
+/*
+ *  fill in the media address if we have it.  Otherwise return an
+ *  Arpent that represents the state of the address resolution FSM
+ *  for ip.  Add the packet to be sent onto the list of packets
+ *  waiting for ip->mac to be resolved.
+ */
+Arpent*
+arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac)
+{
+	int hash;
+	Arpent *a;
+	Medium *type = ifc->m;
+	uchar v6ip[IPaddrlen];
+
+	if(version == V4){
+		v4tov6(v6ip, ip);
+		ip = v6ip;
+	}
+
+	qlock(arp);
+	hash = haship(ip);
+	for(a = arp->hash[hash]; a; a = a->hash){
+		if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
+		if(type == a->type)
+			break;
+	}
+
+	if(a == nil){
+		a = newarp6(arp, ip, ifc, (version != V4));
+		a->state = AWAIT;
+	}
+	a->utime = NOW;
+	if(a->state == AWAIT){
+		if(bp != nil){
+			if(a->hold)
+				a->last->list = bp;
+			else
+				a->hold = bp;
+			a->last = bp;
+			bp->list = nil; 
+		}
+		return a;		/* return with arp qlocked */
+	}
+
+	memmove(mac, a->mac, a->type->maclen);
+
+	/* remove old entries */
+	if(NOW - a->ctime > 15*60*1000)
+		cleanarpent(arp, a);
+
+	qunlock(arp);
+	return nil;
+}
+
+/*
+ * called with arp locked
+ */
+void
+arprelease(Arp *arp, Arpent*)
+{
+	qunlock(arp);
+}
+
+/*
+ * Copy out the mac address from the Arpent.  Return the
+ * block waiting to get sent to this mac address.
+ *
+ * called with arp locked
+ */
+Block*
+arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
+{
+	Block *bp;
+	Arpent *f, **l;
+
+	if(!isv4(a->ip)){
+		l = &arp->rxmt;
+		for(f = *l; f; f = f->nextrxt){
+			if(f == a){
+				*l = a->nextrxt;
+				break;
+			}
+			l = &f->nextrxt;
+		}
+	}
+
+	memmove(a->mac, mac, type->maclen);
+	a->type = type;
+	a->state = AOK;
+	a->utime = NOW;
+	bp = a->hold;
+	a->hold = nil;
+	qunlock(arp);
+
+	return bp;
+}
+
+void
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+{
+	Arp *arp;
+	Route *r;
+	Arpent *a, *f, **l;
+	Ipifc *ifc;
+	Medium *type;
+	Block *bp, *next;
+	uchar v6ip[IPaddrlen];
+
+	arp = fs->arp;
+
+	if(n != 6){
+//		print("arp: len = %d\n", n);
+		return;
+	}
+
+	switch(version){
+	case V4:
+		r = v4lookup(fs, ip, nil);
+		v4tov6(v6ip, ip);
+		ip = v6ip;
+		break;
+	case V6:
+		r = v6lookup(fs, ip, nil);
+		break;
+	default:
+		panic("arpenter: version %d", version);
+		return;	/* to supress warnings */
+	}
+
+	if(r == nil){
+//		print("arp: no route for entry\n");
+		return;
+	}
+
+	ifc = r->ifc;
+	type = ifc->m;
+
+	qlock(arp);
+	for(a = arp->hash[haship(ip)]; a; a = a->hash){
+		if(a->type != type || (a->state != AWAIT && a->state != AOK))
+			continue;
+
+		if(ipcmp(a->ip, ip) == 0){
+			a->state = AOK;
+			memmove(a->mac, mac, type->maclen);
+
+			if(version == V6){
+				/* take out of re-transmit chain */
+				l = &arp->rxmt;
+				for(f = *l; f; f = f->nextrxt){
+					if(f == a){
+						*l = a->nextrxt;
+						break;
+					}
+					l = &f->nextrxt;
+				}
+			}
+
+			a->ifc = ifc;
+			a->ifcid = ifc->ifcid;
+			bp = a->hold;
+			a->hold = nil;
+			if(version == V4)
+				ip += IPv4off;
+			a->utime = NOW;
+			a->ctime = a->utime;
+			qunlock(arp);
+
+			while(bp){
+				next = bp->list;
+				if(ifc != nil){
+					if(waserror()){
+						runlock(ifc);
+						nexterror();
+					}
+					rlock(ifc);
+					if(ifc->m != nil)
+						ifc->m->bwrite(ifc, bp, version, ip);
+					else
+						freeb(bp);
+					runlock(ifc);
+					poperror();
+				} else
+					freeb(bp);
+				bp = next;
+			}
+			return;
+		}
+	}
+
+	if(refresh == 0){
+		a = newarp6(arp, ip, ifc, 0);
+		a->state = AOK;
+		a->type = type;
+		a->ctime = NOW;
+		memmove(a->mac, mac, type->maclen);
+	}
+
+	qunlock(arp);
+}
+
+int
+arpwrite(Fs *fs, char *s, int len)
+{
+	int n;
+	Route *r;
+	Arp *arp;
+	Block *bp;
+	Arpent *a, *fl, **l;
+	Medium *m;
+	char *f[4], buf[256];
+	uchar ip[IPaddrlen], mac[MAClen];
+
+	arp = fs->arp;
+
+	if(len == 0)
+		error(Ebadarp);
+	if(len >= sizeof(buf))
+		len = sizeof(buf)-1;
+	strncpy(buf, s, len);
+	buf[len] = 0;
+	if(len > 0 && buf[len-1] == '\n')
+		buf[len-1] = 0;
+
+	n = getfields(buf, f, 4, 1, " ");
+	if(strcmp(f[0], "flush") == 0){
+		qlock(arp);
+		for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
+			memset(a->ip, 0, sizeof(a->ip));
+			memset(a->mac, 0, sizeof(a->mac));
+			a->hash = nil;
+			a->state = 0;
+			a->utime = 0;
+			while(a->hold != nil){
+				bp = a->hold->list;
+				freeblist(a->hold);
+				a->hold = bp;
+			}
+		}
+		memset(arp->hash, 0, sizeof(arp->hash));
+// clear all pkts on these lists (rxmt, dropf/l)
+		arp->rxmt = nil;
+		arp->dropf = nil;
+		arp->dropl = nil;
+		qunlock(arp);
+	} else if(strcmp(f[0], "add") == 0){
+		switch(n){
+		default:
+			error(Ebadarg);
+		case 3:
+			parseip(ip, f[1]);
+			if(isv4(ip))
+				r = v4lookup(fs, ip+IPv4off, nil);
+			else
+				r = v6lookup(fs, ip, nil);
+			if(r == nil)
+				error("Destination unreachable");
+			m = r->ifc->m;
+			n = parsemac(mac, f[2], m->maclen);
+			break;
+		case 4:
+			m = ipfindmedium(f[1]);
+			if(m == nil)
+				error(Ebadarp);
+			parseip(ip, f[2]);
+			n = parsemac(mac, f[3], m->maclen);
+			break;
+		}
+
+		if(m->ares == nil)
+			error(Ebadarp);
+
+		m->ares(fs, V6, ip, mac, n, 0);
+	} else if(strcmp(f[0], "del") == 0){
+		if(n != 2)
+			error(Ebadarg);
+
+		parseip(ip, f[1]);
+		qlock(arp);
+
+		l = &arp->hash[haship(ip)];
+		for(a = *l; a; a = a->hash){
+			if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
+				*l = a->hash;
+				break;
+			}
+			l = &a->hash;
+		}
+	
+		if(a){
+			/* take out of re-transmit chain */
+			l = &arp->rxmt;
+			for(fl = *l; fl; fl = fl->nextrxt){
+				if(fl == a){
+					*l = a->nextrxt;
+					break;
+				}
+				l = &fl->nextrxt;
+			}
+
+			a->nextrxt = nil;
+			a->hash = nil;
+			a->hold = nil;
+			a->last = nil;
+			a->ifc = nil;
+			memset(a->ip, 0, sizeof(a->ip));
+			memset(a->mac, 0, sizeof(a->mac));
+		}
+		qunlock(arp);
+	} else
+		error(Ebadarp);
+
+	return len;
+}
+
+enum
+{
+	Alinelen=	90,
+};
+
+char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
+
+static void
+convmac(char *p, uchar *mac, int n)
+{
+	while(n-- > 0)
+		p += sprint(p, "%2.2ux", *mac++);
+}
+
+int
+arpread(Arp *arp, char *p, ulong offset, int len)
+{
+	Arpent *a;
+	int n;
+	char mac[2*MAClen+1];
+
+	if(offset % Alinelen)
+		return 0;
+
+	offset = offset/Alinelen;
+	len = len/Alinelen;
+
+	n = 0;
+	for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
+		if(a->state == 0)
+			continue;
+		if(offset > 0){
+			offset--;
+			continue;
+		}
+		len--;
+		qlock(arp);
+		convmac(mac, a->mac, a->type->maclen);
+		n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+		qunlock(arp);
+	}
+
+	return n;
+}
+
+extern int
+rxmitsols(Arp *arp)
+{
+	uint sflag;
+	Block *next, *xp;
+	Arpent *a, *b, **l;
+	Fs *f;
+	uchar ipsrc[IPaddrlen];
+	Ipifc *ifc = nil;
+	long nrxt;
+
+	qlock(arp);
+	f = arp->f;
+
+	a = arp->rxmt;
+	if(a==nil){
+		nrxt = 0;
+		goto dodrops; 		//return nrxt;
+	}
+	nrxt = a->rtime - NOW;
+	if(nrxt > 3*ReTransTimer/4) 
+		goto dodrops; 		//return nrxt;
+
+	for(; a; a = a->nextrxt){
+		ifc = a->ifc;
+		assert(ifc != nil);
+		if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
+			xp = a->hold;
+			a->hold = nil;
+
+			if(xp){
+				if(arp->dropl == nil) 
+					arp->dropf = xp;
+				else
+					arp->dropl->list = xp;
+			}
+
+			cleanarpent(arp, a);
+		}
+		else
+			break;
+	}
+	if(a == nil)
+		goto dodrops;
+
+
+	qunlock(arp);	/* for icmpns */
+	if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
+		icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
+
+	runlock(ifc);
+	qlock(arp);	
+
+	/* put to the end of re-transmit chain */
+	l = &arp->rxmt;
+	for(b = *l; b; b = b->nextrxt){
+		if(b == a){
+			*l = a->nextrxt;
+			break;
+		}
+		l = &b->nextrxt;
+	}
+	for(b = *l; b; b = b->nextrxt){
+		l = &b->nextrxt;
+	}
+	*l = a;
+	a->rxtsrem--;
+	a->nextrxt = nil;
+	a->rtime = NOW + ReTransTimer;
+
+	a = arp->rxmt;
+	if(a==nil)
+		nrxt = 0;
+	else 
+		nrxt = a->rtime - NOW;
+
+dodrops:
+	xp = arp->dropf;
+	arp->dropf = nil;
+	arp->dropl = nil;
+	qunlock(arp);
+
+	for(; xp; xp = next){
+		next = xp->list;
+		icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+	}
+
+	return nrxt;
+
+}
+
+static int
+rxready(void *v)
+{
+	Arp *arp = (Arp *) v;
+	int x;
+
+	x = ((arp->rxmt != nil) || (arp->dropf != nil));
+
+	return x;
+}
+
+static void
+rxmitproc(void *v)
+{
+	Arp *arp = v;
+	long wakeupat;
+
+	arp->rxmitp = up;
+	//print("arp rxmitproc started\n");
+	if(waserror()){
+		arp->rxmitp = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		wakeupat = rxmitsols(arp);
+		if(wakeupat == 0) 
+			sleep(&arp->rxmtq, rxready, v); 
+		else if(wakeupat > ReTransTimer/4) 
+			tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+	}
+}
+
--- /dev/null
+++ b/os/ip.original/bootp.c
@@ -1,0 +1,231 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+
+static	ulong	fsip;
+static	ulong	auip;
+static	ulong	gwip;
+static	ulong	ipmask;
+static	ulong	ipaddr;
+
+enum
+{
+	Bootrequest = 1,
+	Bootreply   = 2,
+};
+
+typedef struct Bootp
+{
+	/* udp.c oldheader */
+	uchar	raddr[IPaddrlen];
+	uchar	laddr[IPaddrlen];
+	uchar	rport[2];
+	uchar	lport[2];
+	/* bootp itself */
+	uchar	op;		/* opcode */
+	uchar	htype;		/* hardware type */
+	uchar	hlen;		/* hardware address len */
+	uchar	hops;		/* hops */
+	uchar	xid[4];		/* a random number */
+	uchar	secs[2];	/* elapsed snce client started booting */
+	uchar	pad[2];
+	uchar	ciaddr[4];	/* client IP address (client tells server) */
+	uchar	yiaddr[4];	/* client IP address (server tells client) */
+	uchar	siaddr[4];	/* server IP address */
+	uchar	giaddr[4];	/* gateway IP address */
+	uchar	chaddr[16];	/* client hardware address */
+	uchar	sname[64];	/* server host name (optional) */
+	uchar	file[128];	/* boot file name */
+	uchar	vend[128];	/* vendor-specific goo */
+} Bootp;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d"
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+ */
+
+static	Bootp	req;
+static	Proc*	rcvprocp;
+static	int	recv;
+static	int	done;
+static	Rendez	bootpr;
+static	char	rcvbuf[512+2*IPaddrlen+2*2];
+
+static void
+rcvbootp(void *a)
+{
+	int n, fd;
+	Bootp *rp;
+	char *field[4];
+	uchar ip[IPaddrlen];
+
+	if(waserror())
+		pexit("", 0);
+	rcvprocp = up;	/* store for postnote below */
+	fd = (int)a;	/* at compilation: warning: ../ip/bootp.c:78 conversion of pointer to shorter integer */
+	while(done == 0) {
+		n = kread(fd, rcvbuf, sizeof(rcvbuf));
+		if(n <= 0)
+			break;
+		rp = (Bootp*)rcvbuf;
+		/* currently ignore udp's header */
+		if(memcmp(req.chaddr, rp->chaddr, 6) == 0
+		&& rp->htype == 1 && rp->hlen == 6
+		&& getfields((char*)rp->vend+4, field, 4, 1, " ") == 4
+		&& strncmp((char*)rp->vend, "p9  ", 4) == 0){
+			if(ipaddr == 0)
+				ipaddr = nhgetl(rp->yiaddr);
+			if(ipmask == 0)
+				ipmask = parseip(ip, field[0]);
+			if(fsip == 0)
+				fsip = parseip(ip, field[1]);
+			if(auip == 0)
+				auip = parseip(ip, field[2]);
+			if(gwip == 0)
+				gwip = parseip(ip, field[3]);
+			break;
+		}
+	}
+	poperror();
+	rcvprocp = nil;
+
+	recv = 1;
+	wakeup(&bootpr);
+	pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+	int cfd, dfd, tries, n;
+	char ia[5+3*24], im[16], *av[3];
+	uchar nipaddr[4], ngwip[4], nipmask[4];
+	char dir[Maxpath];
+
+	av[1] = "0.0.0.0";
+	av[2] = "0.0.0.0";
+	ipifcadd(ifc, av, 3, 0, nil);
+
+	cfd = kannounce("udp!*!68", dir);
+	if(cfd < 0)
+		return "bootp announce failed";
+	strcat(dir, "/data");
+	if(kwrite(cfd, "headers", 7) < 0){
+		kclose(cfd);
+		return "bootp ctl headers failed";
+	}
+	kwrite(cfd, "oldheaders", 10);
+	dfd = kopen(dir, ORDWR);
+	if(dfd < 0){
+		kclose(cfd);
+		return "bootp open data failed";
+	}
+	kclose(cfd);
+	
+
+	/* create request */
+	memset(&req, 0, sizeof(req));
+	ipmove(req.raddr, IPv4bcast);
+	hnputs(req.rport, 67);
+	req.op = Bootrequest;
+	req.htype = 1;			/* ethernet (all we know) */
+	req.hlen = 6;			/* ethernet (all we know) */
+
+	/* Hardware MAC address */
+	memmove(req.chaddr, ifc->mac, 6);
+	/* Fill in the local IP address if we know it */
+	ipv4local(ifc, req.ciaddr);
+	memset(req.file, 0, sizeof(req.file));
+	strcpy((char*)req.vend, "p9  ");
+
+	done = 0;
+	recv = 0;
+
+	kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+
+	/*
+	 * broadcast bootp's till we get a reply,
+	 * or fixed number of tries
+	 */
+	tries = 0;
+	while(recv == 0) {
+		if(kwrite(dfd, &req, sizeof(req)) < 0)
+			print("bootp: write: %s\n", commonerror());
+
+		tsleep(&bootpr, return0, 0, 1000);
+		if(++tries > 10) {
+			print("bootp: timed out\n");
+			break;
+		}
+	}
+	kclose(dfd);
+	done = 1;
+	if(rcvprocp != nil){
+		postnote(rcvprocp, 1, "timeout", 0);
+		rcvprocp = nil;
+	}
+
+	av[1] = "0.0.0.0";
+	av[2] = "0.0.0.0";
+	ipifcrem(ifc, av, 3);
+
+	hnputl(nipaddr, ipaddr);
+	sprint(ia, "%V", nipaddr);
+	hnputl(nipmask, ipmask);
+	sprint(im, "%V", nipmask);
+	av[1] = ia;
+	av[2] = im;
+	ipifcadd(ifc, av, 3, 0, nil);
+
+	if(gwip != 0) {
+		hnputl(ngwip, gwip);
+		n = snprint(ia, sizeof(ia), "add 0.0.0.0 0.0.0.0 %V", ngwip);
+		routewrite(ifc->conv->p->f, nil, ia, n);
+	}
+	return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+	int n;
+	char *buf;
+	uchar a[4];
+
+	buf = smalloc(READSTR);
+	if(waserror()){
+		free(buf);
+		nexterror();
+	}
+	hnputl(a, fsip);
+	n = snprint(buf, READSTR, "fsip %15V\n", a);
+	hnputl(a, auip);
+	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+	hnputl(a, gwip);
+	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+	hnputl(a, ipmask);
+	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+	hnputl(a, ipaddr);
+	snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+
+	len = readstr(offset, bp, len, buf);
+	poperror();
+	free(buf);
+	return len;
+}
+
+char*	(*bootp)(Ipifc*) = rbootp;
+int	(*bootpread)(char*, ulong, int) = rbootpread;
--- /dev/null
+++ b/os/ip.original/compress.c
@@ -1,0 +1,520 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+#include	"ppp.h"
+
+typedef struct Iphdr	Iphdr;
+typedef struct Tcphdr	Tcphdr;
+typedef struct Ilhdr	Ilhdr;
+typedef struct Hdr	Hdr;
+typedef struct Tcpc	Tcpc;
+
+struct Iphdr
+{
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	ttl;		/* Time to live */
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* Header checksum */
+	ulong	src;		/* Ip source (byte ordering unimportant) */
+	ulong	dst;		/* Ip destination (byte ordering unimportant) */
+};
+
+struct Tcphdr
+{
+	ulong	ports;		/* defined as a ulong to make comparisons easier */
+	uchar	seq[4];
+	uchar	ack[4];
+	uchar	flag[2];
+	uchar	win[2];
+	uchar	cksum[2];
+	uchar	urg[2];
+};
+
+struct Ilhdr
+{
+	uchar	sum[2];	/* Checksum including header */
+	uchar	len[2];	/* Packet length */
+	uchar	type;		/* Packet type */
+	uchar	spec;		/* Special */
+	uchar	src[2];	/* Src port */
+	uchar	dst[2];	/* Dst port */
+	uchar	id[4];	/* Sequence id */
+	uchar	ack[4];	/* Acked sequence */
+};
+
+enum
+{
+	URG		= 0x20,		/* Data marked urgent */
+	ACK		= 0x10,		/* Aknowledge is valid */
+	PSH		= 0x08,		/* Whole data pipe is pushed */
+	RST		= 0x04,		/* Reset connection */
+	SYN		= 0x02,		/* Pkt. is synchronise */
+	FIN		= 0x01,		/* Start close down */
+
+	IP_DF		= 0x4000,	/* Don't fragment */
+
+	IP_TCPPROTO	= 6,
+	IP_ILPROTO	= 40,
+	IL_IPHDR	= 20,
+};
+
+struct Hdr
+{
+	uchar	buf[128];
+	Iphdr	*ip;
+	Tcphdr	*tcp;
+	int	len;
+};
+
+struct Tcpc
+{
+	uchar	lastrecv;
+	uchar	lastxmit;
+	uchar	basexmit;
+	uchar	err;
+	uchar	compressid;
+	Hdr	t[MAX_STATES];
+	Hdr	r[MAX_STATES];
+};
+
+enum
+{	/* flag bits for what changed in a packet */
+	NEW_U=(1<<0),	/* tcp only */
+	NEW_W=(1<<1),	/* tcp only */
+	NEW_A=(1<<2),	/* il tcp */
+	NEW_S=(1<<3),	/* tcp only */
+	NEW_P=(1<<4),	/* tcp only */
+	NEW_I=(1<<5),	/* il tcp */
+	NEW_C=(1<<6),	/* il tcp */
+	NEW_T=(1<<7),	/* il only */
+	TCP_PUSH_BIT	= 0x10,
+};
+
+/* reserved, special-case values of above for tcp */
+#define SPECIAL_I (NEW_S|NEW_W|NEW_U)		/* echoed interactive traffic */
+#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U)	/* unidirectional data */
+#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
+
+int
+encode(void *p, ulong n)
+{
+	uchar	*cp;
+
+	cp = p;
+	if(n >= 256 || n == 0) {
+		*cp++ = 0;
+		cp[0] = n >> 8;
+		cp[1] = n;
+		return 3;
+	} else 
+		*cp = n;
+	return 1;
+}
+
+#define DECODEL(f) { \
+	if (*cp == 0) {\
+		hnputl(f, nhgetl(f) + ((cp[1] << 8) | cp[2])); \
+		cp += 3; \
+	} else { \
+		hnputl(f, nhgetl(f) + (ulong)*cp++); \
+	} \
+}
+#define DECODES(f) { \
+	if (*cp == 0) {\
+		hnputs(f, nhgets(f) + ((cp[1] << 8) | cp[2])); \
+		cp += 3; \
+	} else { \
+		hnputs(f, nhgets(f) + (ulong)*cp++); \
+	} \
+}
+
+ushort
+tcpcompress(Tcpc *comp, Block *b, Fs *)
+{
+	Iphdr	*ip;		/* current packet */
+	Tcphdr	*tcp;		/* current pkt */
+	ulong 	iplen, tcplen, hlen;	/* header length in bytes */
+	ulong 	deltaS, deltaA;	/* general purpose temporaries */
+	ulong 	changes;	/* change mask */
+	uchar	new_seq[16];	/* changes from last to current */
+	uchar	*cp;
+	Hdr	*h;		/* last packet */
+	int 	i, j;
+
+	/*
+	 * Bail if this is not a compressible TCP/IP packet
+	 */
+	ip = (Iphdr*)b->rp;
+	iplen = (ip->vihl & 0xf) << 2;
+	tcp = (Tcphdr*)(b->rp + iplen);
+	tcplen = (tcp->flag[0] & 0xf0) >> 2;
+	hlen = iplen + tcplen;
+	if((tcp->flag[1] & (SYN|FIN|RST|ACK)) != ACK)
+		return Pip;	/* connection control */
+
+	/*
+	 * Packet is compressible, look for a connection
+	 */
+	changes = 0;
+	cp = new_seq;
+	j = comp->lastxmit;
+	h = &comp->t[j];
+	if(ip->src != h->ip->src || ip->dst != h->ip->dst
+	|| tcp->ports != h->tcp->ports) {
+		for(i = 0; i < MAX_STATES; ++i) {
+			j = (comp->basexmit + i) % MAX_STATES;
+			h = &comp->t[j];
+			if(ip->src == h->ip->src && ip->dst == h->ip->dst
+			&& tcp->ports == h->tcp->ports)
+				goto found;
+		}
+
+		/* no connection, reuse the oldest */
+		if(i == MAX_STATES) {
+			j = comp->basexmit;
+			j = (j + MAX_STATES - 1) % MAX_STATES;
+			comp->basexmit = j;
+			h = &comp->t[j];
+			goto raise;
+		}
+	}
+found:
+
+	/*
+	 * Make sure that only what we expect to change changed. 
+	 */
+	if(ip->vihl  != h->ip->vihl || ip->tos   != h->ip->tos ||
+	   ip->ttl   != h->ip->ttl  || ip->proto != h->ip->proto)
+		goto raise;	/* headers changed */
+	if(iplen != sizeof(Iphdr) && memcmp(ip+1, h->ip+1, iplen - sizeof(Iphdr)))
+		goto raise;	/* ip options changed */
+	if(tcplen != sizeof(Tcphdr) && memcmp(tcp+1, h->tcp+1, tcplen - sizeof(Tcphdr)))
+		goto raise;	/* tcp options changed */
+
+	if(tcp->flag[1] & URG) {
+		cp += encode(cp, nhgets(tcp->urg));
+		changes |= NEW_U;
+	} else if(memcmp(tcp->urg, h->tcp->urg, sizeof(tcp->urg)) != 0)
+		goto raise;
+	if(deltaS = nhgets(tcp->win) - nhgets(h->tcp->win)) {
+		cp += encode(cp, deltaS);
+		changes |= NEW_W;
+	}
+	if(deltaA = nhgetl(tcp->ack) - nhgetl(h->tcp->ack)) {
+		if(deltaA > 0xffff)
+			goto raise;
+		cp += encode(cp, deltaA);
+		changes |= NEW_A;
+	}
+	if(deltaS = nhgetl(tcp->seq) - nhgetl(h->tcp->seq)) {
+		if (deltaS > 0xffff)
+			goto raise;
+		cp += encode(cp, deltaS);
+		changes |= NEW_S;
+	}
+
+	/*
+	 * Look for the special-case encodings.
+	 */
+	switch(changes) {
+	case 0:
+		/*
+		 * Nothing changed. If this packet contains data and the last
+		 * one didn't, this is probably a data packet following an
+		 * ack (normal on an interactive connection) and we send it
+		 * compressed. Otherwise it's probably a retransmit,
+		 * retransmitted ack or window probe.  Send it uncompressed
+		 * in case the other side missed the compressed version.
+		 */
+		if(nhgets(ip->length) == nhgets(h->ip->length) ||
+		   nhgets(h->ip->length) != hlen)
+			goto raise;
+		break;
+	case SPECIAL_I:
+	case SPECIAL_D:
+		/*
+		 * Actual changes match one of our special case encodings --
+		 * send packet uncompressed.
+		 */
+		goto raise;
+	case NEW_S | NEW_A:
+		if (deltaS == deltaA &&
+			deltaS == nhgets(h->ip->length) - hlen) {
+			/* special case for echoed terminal traffic */
+			changes = SPECIAL_I;
+			cp = new_seq;
+		}
+		break;
+	case NEW_S:
+		if (deltaS == nhgets(h->ip->length) - hlen) {
+			/* special case for data xfer */
+			changes = SPECIAL_D;
+			cp = new_seq;
+		}
+		break;
+	}
+	deltaS = nhgets(ip->id) - nhgets(h->ip->id);
+	if(deltaS != 1) {
+		cp += encode(cp, deltaS);
+		changes |= NEW_I;
+	}
+	if (tcp->flag[1] & PSH)
+		changes |= TCP_PUSH_BIT;
+	/*
+	 * Grab the cksum before we overwrite it below. Then update our
+	 * state with this packet's header.
+	 */
+	deltaA = nhgets(tcp->cksum);
+	memmove(h->buf, b->rp, hlen);
+	h->len = hlen;
+	h->tcp = (Tcphdr*)(h->buf + iplen);
+
+	/*
+	 * We want to use the original packet as our compressed packet. (cp -
+	 * new_seq) is the number of bytes we need for compressed sequence
+	 * numbers. In addition we need one byte for the change mask, one
+	 * for the connection id and two for the tcp checksum. So, (cp -
+	 * new_seq) + 4 bytes of header are needed. hlen is how many bytes
+	 * of the original packet to toss so subtract the two to get the new
+	 * packet size. The temporaries are gross -egs.
+	 */
+	deltaS = cp - new_seq;
+	cp = b->rp;
+	if(comp->lastxmit != j || comp->compressid == 0) {
+		comp->lastxmit = j;
+		hlen -= deltaS + 4;
+		cp += hlen;
+		*cp++ = (changes | NEW_C);
+		*cp++ = j;
+	} else {
+		hlen -= deltaS + 3;
+		cp += hlen;
+		*cp++ = changes;
+	}
+	b->rp += hlen;
+	hnputs(cp, deltaA);
+	cp += 2;
+	memmove(cp, new_seq, deltaS);
+	return Pvjctcp;
+
+raise:
+	/*
+	 * Update connection state & send uncompressed packet
+	 */
+	memmove(h->buf, b->rp, hlen);
+	h->tcp = (Tcphdr*)(h->buf + iplen);
+	h->len = hlen;
+	h->ip->proto = j;
+	comp->lastxmit = j;
+	return Pvjutcp;
+}
+
+Block*
+tcpuncompress(Tcpc *comp, Block *b, ushort type, Fs *f)
+{
+	uchar	*cp, changes;
+	int	i;
+	int	iplen, len;
+	Iphdr	*ip;
+	Tcphdr	*tcp;
+	Hdr	*h;
+
+	if(type == Pvjutcp) {
+		/*
+		 *  Locate the saved state for this connection. If the state
+		 *  index is legal, clear the 'discard' flag.
+		 */
+		ip = (Iphdr*)b->rp;
+		if(ip->proto >= MAX_STATES)
+			goto raise;
+		iplen = (ip->vihl & 0xf) << 2;
+		tcp = (Tcphdr*)(b->rp + iplen);
+		comp->lastrecv = ip->proto;
+		len = iplen + ((tcp->flag[0] & 0xf0) >> 2);
+		comp->err = 0;
+netlog(f, Logcompress, "uncompressed %d\n", comp->lastrecv);
+		/*
+		 * Restore the IP protocol field then save a copy of this
+		 * packet header. The checksum is zeroed in the copy so we
+		 * don't have to zero it each time we process a compressed
+		 * packet.
+		 */
+		ip->proto = IP_TCPPROTO;
+		h = &comp->r[comp->lastrecv];
+		memmove(h->buf, b->rp, len);
+		h->tcp = (Tcphdr*)(h->buf + iplen);
+		h->len = len;
+		h->ip->cksum[0] = h->ip->cksum[1] = 0;
+		return b;
+	}
+
+	cp = b->rp;
+	changes = *cp++;
+	if(changes & NEW_C) {
+		/*
+		 * Make sure the state index is in range, then grab the
+		 * state. If we have a good state index, clear the 'discard'
+		 * flag.
+		 */
+		if(*cp >= MAX_STATES)
+			goto raise;
+		comp->err = 0;
+		comp->lastrecv = *cp++;
+netlog(f, Logcompress, "newc %d\n", comp->lastrecv);
+	} else {
+		/*
+		 * This packet has no state index. If we've had a
+		 * line error since the last time we got an explicit state
+		 * index, we have to toss the packet.
+		 */
+		if(comp->err != 0){
+			freeblist(b);
+			return nil;
+		}
+netlog(f, Logcompress, "oldc %d\n", comp->lastrecv);
+	}
+
+	/*
+	 * Find the state then fill in the TCP checksum and PUSH bit.
+	 */
+	h = &comp->r[comp->lastrecv];
+	ip = h->ip;
+	tcp = h->tcp;
+	len = h->len;
+	memmove(tcp->cksum, cp, sizeof tcp->cksum);
+	cp += 2;
+	if(changes & TCP_PUSH_BIT)
+		tcp->flag[1] |= PSH;
+	else
+		tcp->flag[1] &= ~PSH;
+	/*
+	 * Fix up the state's ack, seq, urg and win fields based on the
+	 * changemask.
+	 */
+	switch (changes & SPECIALS_MASK) {
+	case SPECIAL_I:
+		i = nhgets(ip->length) - len;
+		hnputl(tcp->ack, nhgetl(tcp->ack) + i);
+		hnputl(tcp->seq, nhgetl(tcp->seq) + i);
+		break;
+
+	case SPECIAL_D:
+		hnputl(tcp->seq, nhgetl(tcp->seq) + nhgets(ip->length) - len);
+		break;
+
+	default:
+		if(changes & NEW_U) {
+			tcp->flag[1] |= URG;
+			if(*cp == 0){
+				hnputs(tcp->urg, nhgets(cp+1));
+				cp += 3;
+			}else
+				hnputs(tcp->urg, *cp++);
+		} else
+			tcp->flag[1] &= ~URG;
+		if(changes & NEW_W)
+			DECODES(tcp->win)
+		if(changes & NEW_A)
+			DECODEL(tcp->ack)
+		if(changes & NEW_S)
+			DECODEL(tcp->seq)
+		break;
+	}
+
+	/* Update the IP ID */
+	if(changes & NEW_I)
+		DECODES(ip->id)
+	else
+		hnputs(ip->id, nhgets(ip->id) + 1);
+
+	/*
+	 *  At this point, cp points to the first byte of data in the packet.
+	 *  Back up cp by the TCP/IP header length to make room for the
+	 *  reconstructed header.
+	 *  We assume the packet we were handed has enough space to prepend
+	 *  up to 128 bytes of header.
+	 */
+	b->rp = cp;
+	if(b->rp - b->base < len){
+		b = padblock(b, len);
+		b = pullupblock(b, blocklen(b));
+	} else
+		b->rp -= len;
+	hnputs(ip->length, BLEN(b));
+	memmove(b->rp, ip, len);
+	
+	/* recompute the ip header checksum */
+	ip = (Iphdr*)b->rp;
+	hnputs(ip->cksum, ipcsum(b->rp));
+	return b;
+
+raise:
+	netlog(f, Logcompress, "Bad Packet!\n");
+	comp->err = 1;
+	freeblist(b);
+	return nil;
+}
+
+Tcpc*
+compress_init(Tcpc *c)
+{
+	int i;
+	Hdr *h;
+
+	if(c == nil){
+		c = malloc(sizeof(Tcpc));
+		if(c == nil)
+			return nil;
+	}
+	memset(c, 0, sizeof(*c));
+	for(i = 0; i < MAX_STATES; i++){
+		h = &c->t[i];
+		h->ip = (Iphdr*)h->buf;
+		h->tcp = (Tcphdr*)(h->buf + 10);
+		h->len = 20;
+		h = &c->r[i];
+		h->ip = (Iphdr*)h->buf;
+		h->tcp = (Tcphdr*)(h->buf + 10);
+		h->len = 20;
+	}
+
+	return c;
+}
+
+ushort
+compress(Tcpc *tcp, Block *b, Fs *f)
+{
+	Iphdr		*ip;
+
+	/*
+	 * Bail if this is not a compressible IP packet
+	 */
+	ip = (Iphdr*)b->rp;
+	if((nhgets(ip->frag) & 0x3fff) != 0)
+		return Pip;
+
+	switch(ip->proto) {
+	case IP_TCPPROTO:
+		return tcpcompress(tcp, b, f);
+	default:
+		return Pip;
+	}
+}
+
+int
+compress_negotiate(Tcpc *tcp, uchar *data)
+{
+	if(data[0] != MAX_STATES - 1)
+		return -1;
+	tcp->compressid = data[1];
+	return 0;
+}
--- /dev/null
+++ b/os/ip.original/devip.c
@@ -1,0 +1,1417 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"../ip/ip.h"
+
+enum
+{
+	Qtopdir=	1,		/* top level directory */
+	Qtopbase,
+	Qarp=		Qtopbase,
+	Qbootp,
+	Qndb,
+	Qiproute,
+	Qiprouter,
+	Qipselftab,
+	Qlog,
+
+	Qprotodir,			/* directory for a protocol */
+	Qprotobase,
+	Qclone=		Qprotobase,
+	Qstats,
+
+	Qconvdir,			/* directory for a conversation */
+	Qconvbase,
+	Qctl=		Qconvbase,
+	Qdata,
+	Qerr,
+	Qlisten,
+	Qlocal,
+	Qremote,
+	Qstatus,
+	Qsnoop,
+
+	Logtype=	5,
+	Masktype=	(1<<Logtype)-1,
+	Logconv=	12,
+	Maskconv=	(1<<Logconv)-1,
+	Shiftconv=	Logtype,
+	Logproto=	8,
+	Maskproto=	(1<<Logproto)-1,
+	Shiftproto=	Logtype + Logconv,
+
+	Nfs=		32,
+};
+#define TYPE(x) 	( ((u32)(x).path) & Masktype )
+#define CONV(x) 	( (((u32)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define QID(p, c, y) 	( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
+
+static char network[] = "network";
+
+QLock	fslock;
+Fs	*ipfs[Nfs];	/* attached fs's */
+Queue	*qlog;
+
+extern	void nullmediumlink(void);
+extern	void pktmediumlink(void);
+static	long ndbwrite(Fs*, char*, ulong, int);
+extern void    closeconv(Conv*);
+
+static int
+ip3gen(Chan *c, int i, Dir *dp)
+{
+	Qid q;
+	Conv *cv;
+	char *p;
+
+	cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+	if(cv->owner == nil)
+		kstrdup(&cv->owner, eve);
+	mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
+
+	switch(i) {
+	default:
+		return -1;
+	case Qctl:
+		devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
+		return 1;
+	case Qdata:
+		devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp);
+		return 1;
+	case Qerr:
+		devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp);
+		return 1;
+	case Qlisten:
+		devdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
+		return 1;
+	case Qlocal:
+		p = "local";
+		break;
+	case Qremote:
+		p = "remote";
+		break;
+	case Qsnoop:
+		if(strcmp(cv->p->name, "ipifc") != 0)
+			return -1;
+		devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp);
+		return 1;
+	case Qstatus:
+		p = "status";
+		break;
+	}
+	devdir(c, q, p, 0, cv->owner, 0444, dp);
+	return 1;
+}
+
+static int
+ip2gen(Chan *c, int i, Dir *dp)
+{
+	Qid q;
+
+	switch(i) {
+	case Qclone:
+		mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE);
+		devdir(c, q, "clone", 0, network, 0666, dp);
+		return 1;
+	case Qstats:
+		mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
+		devdir(c, q, "stats", 0, network, 0444, dp);
+		return 1;
+	}	
+	return -1;
+}
+
+static int
+ip1gen(Chan *c, int i, Dir *dp)
+{
+	Qid q;
+	char *p;
+	int prot;
+	int len = 0;
+	Fs *f;
+	extern ulong	kerndate;
+
+	f = ipfs[c->dev];
+
+	prot = 0666;
+	mkqid(&q, QID(0, 0, i), 0, QTFILE);
+	switch(i) {
+	default:
+		return -1;
+	case Qarp:
+		p = "arp";
+		break;
+	case Qbootp:
+		p = "bootp";
+		if(bootp == nil)
+			return 0;
+		break;
+	case Qndb:
+		p = "ndb";
+		len = strlen(f->ndb);
+		q.vers = f->ndbvers;
+		break;
+	case Qiproute:
+		p = "iproute";
+		break;
+	case Qipselftab:
+		p = "ipselftab";
+		prot = 0444;
+		break;
+	case Qiprouter:
+		p = "iprouter";
+		break;
+	case Qlog:
+		p = "log";
+		break;
+	}
+	devdir(c, q, p, len, network, prot, dp);
+	if(i == Qndb && f->ndbmtime > kerndate)
+		dp->mtime = f->ndbmtime;
+	return 1;
+}
+
+static int
+ipgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp)
+{
+	Qid q;
+	Conv *cv;
+	Fs *f;
+
+	f = ipfs[c->dev];
+
+	switch(TYPE(c->qid)) {
+	case Qtopdir:
+		if(s == DEVDOTDOT){
+			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+			sprint(up->genbuf, "#I%ud", c->dev);
+			devdir(c, q, up->genbuf, 0, network, 0555, dp);
+			return 1;
+		}
+		if(s < f->np) {
+			if(f->p[s]->connect == nil)
+				return 0;	/* protocol with no user interface */
+			mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+			devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+			return 1;
+		}
+		s -= f->np;
+		return ip1gen(c, s+Qtopbase, dp);
+	case Qarp:
+	case Qbootp:
+	case Qndb:
+	case Qlog:
+	case Qiproute:
+	case Qiprouter:
+	case Qipselftab:
+		return ip1gen(c, TYPE(c->qid), dp);
+	case Qprotodir:
+		if(s == DEVDOTDOT){
+			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+			sprint(up->genbuf, "#I%ud", c->dev);
+			devdir(c, q, up->genbuf, 0, network, 0555, dp);
+			return 1;
+		}
+		if(s < f->p[PROTO(c->qid)]->ac) {
+			cv = f->p[PROTO(c->qid)]->conv[s];
+			sprint(up->genbuf, "%d", s);
+			mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
+			devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
+			return 1;
+		}
+		s -= f->p[PROTO(c->qid)]->ac;
+		return ip2gen(c, s+Qprotobase, dp);
+	case Qclone:
+	case Qstats:
+		return ip2gen(c, TYPE(c->qid), dp);
+	case Qconvdir:
+		if(s == DEVDOTDOT){
+			s = PROTO(c->qid);
+			mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+			devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+			return 1;
+		}
+		return ip3gen(c, s+Qconvbase, dp);
+	case Qctl:
+	case Qdata:
+	case Qerr:
+	case Qlisten:
+	case Qlocal:
+	case Qremote:
+	case Qstatus:
+	case Qsnoop:
+		return ip3gen(c, TYPE(c->qid), dp);
+	}
+	return -1;
+}
+
+static void
+ipreset(void)
+{
+	nullmediumlink();
+	pktmediumlink();
+
+	fmtinstall('i', eipfmt);
+	fmtinstall('I', eipfmt);
+	fmtinstall('E', eipfmt);
+	fmtinstall('V', eipfmt);
+	fmtinstall('M', eipfmt);
+}
+
+static Fs*
+ipgetfs(int dev)
+{
+	extern void (*ipprotoinit[])(Fs*);
+	Fs *f;
+	int i;
+
+	if(dev >= Nfs)
+		return nil;
+
+	qlock(&fslock);
+	if(ipfs[dev] == nil){
+		f = smalloc(sizeof(Fs));
+		ip_init(f);
+		arpinit(f);
+		netloginit(f);
+		for(i = 0; ipprotoinit[i]; i++)
+			ipprotoinit[i](f);
+		f->dev = dev;
+		ipfs[dev] = f;
+	}
+	qunlock(&fslock);
+
+	return ipfs[dev];
+}
+
+IPaux*
+newipaux(char *owner, char *tag)
+{
+	IPaux *a;
+	int n;
+
+	a = smalloc(sizeof(*a));
+	kstrdup(&a->owner, owner);
+	memset(a->tag, ' ', sizeof(a->tag));
+	n = strlen(tag);
+	if(n > sizeof(a->tag))
+		n = sizeof(a->tag);
+	memmove(a->tag, tag, n);
+	return a;
+}
+
+#define ATTACHER(c) (((IPaux*)((c)->aux))->owner)
+
+static Chan*
+ipattach(char* spec)
+{
+	Chan *c;
+	int dev;
+
+	dev = atoi(spec);
+	if(dev >= Nfs)
+		error("bad specification");
+
+	ipgetfs(dev);
+	c = devattach('I', spec);
+	mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
+	c->dev = dev;
+
+	c->aux = newipaux(commonuser(), "none");
+
+	return c;
+}
+
+static Walkqid*
+ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+{
+	IPaux *a = c->aux;
+	Walkqid* w;
+
+	w = devwalk(c, nc, name, nname, nil, 0, ipgen);
+	if(w != nil && w->clone != nil)
+		w->clone->aux = newipaux(a->owner, a->tag);
+	return w;
+}
+
+static s32
+ipstat(Chan* c, uchar* db, s32 n)
+{
+	return devstat(c, db, n, nil, 0, ipgen);
+}
+
+static int
+incoming(void* arg)
+{
+	Conv *conv;
+
+	conv = arg;
+	return conv->incall != nil;
+}
+
+static int m2p[] = {
+	[OREAD]		4,
+	[OWRITE]	2,
+	[ORDWR]		6
+};
+
+static Chan*
+ipopen(Chan* c, u32 omode)
+{
+	Conv *cv, *nc;
+	Proto *p;
+	int perm;
+	Fs *f;
+
+	perm = m2p[omode&3];
+
+	f = ipfs[c->dev];
+
+	switch(TYPE(c->qid)) {
+	default:
+		break;
+	case Qndb:
+		if(omode & (OWRITE|OTRUNC) && !iseve())
+			error(Eperm);
+		if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
+			f->ndb[0] = 0;
+		break;
+	case Qlog:
+		netlogopen(f);
+		break;
+	case Qiprouter:
+		iprouteropen(f);
+		break;
+	case Qiproute:
+		break;
+	case Qtopdir:
+	case Qprotodir:
+	case Qconvdir:
+	case Qstatus:
+	case Qremote:
+	case Qlocal:
+	case Qstats:
+	case Qbootp:
+	case Qipselftab:
+		if(omode != OREAD)
+			error(Eperm);
+		break;
+	case Qsnoop:
+		if(omode != OREAD)
+			error(Eperm);
+		p = f->p[PROTO(c->qid)];
+		cv = p->conv[CONV(c->qid)];
+		if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
+			error(Eperm);
+		incref(&cv->snoopers);
+		break;
+	case Qclone:
+		p = f->p[PROTO(c->qid)];
+		qlock(p);
+		if(waserror()){
+			qunlock(p);
+			nexterror();
+		}
+		cv = Fsprotoclone(p, ATTACHER(c));
+		qunlock(p);
+		poperror();
+		if(cv == nil) {
+			error(Enodev);
+			break;
+		}
+		mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
+		break;
+	case Qdata:
+	case Qctl:
+	case Qerr:
+		p = f->p[PROTO(c->qid)];
+		qlock(p);
+		cv = p->conv[CONV(c->qid)];
+		qlock(cv);
+		if(waserror()) {
+			qunlock(cv);
+			qunlock(p);
+			nexterror();
+		}
+		if((perm & (cv->perm>>6)) != perm) {
+			if(strcmp(ATTACHER(c), cv->owner) != 0)
+				error(Eperm);
+		 	if((perm & cv->perm) != perm)
+				error(Eperm); 
+
+		}
+		cv->inuse++;
+		if(cv->inuse == 1){
+			kstrdup(&cv->owner, ATTACHER(c));
+			cv->perm = 0660;
+		}
+		qunlock(cv);
+		qunlock(p);
+		poperror();
+		break;
+	case Qlisten:
+		cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+		if((perm & (cv->perm>>6)) != perm) {
+			if(strcmp(ATTACHER(c), cv->owner) != 0)
+				error(Eperm);
+		 	if((perm & cv->perm) != perm)
+				error(Eperm); 
+
+		}
+
+		if(cv->state != Announced)
+			error("not announced");
+
+		if(waserror()){
+			closeconv(cv);
+			nexterror();
+		}
+		qlock(cv);
+		cv->inuse++;
+		qunlock(cv);
+
+		nc = nil;
+		while(nc == nil) {
+			/* give up if we got a hangup */
+			if(qisclosed(cv->rq))
+				error("listen hungup");
+
+			qlock(&cv->listenq);
+			if(waserror()) {
+				qunlock(&cv->listenq);
+				nexterror();
+			}
+
+			/* wait for a connect */
+			sleep(&cv->listenr, incoming, cv);
+
+			qlock(cv);
+			nc = cv->incall;
+			if(nc != nil){
+				cv->incall = nc->next;
+				mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
+				kstrdup(&cv->owner, ATTACHER(c));
+			}
+			qunlock(cv);
+
+			qunlock(&cv->listenq);
+			poperror();
+		}
+		closeconv(cv);
+		poperror();
+		break;
+	}
+	c->mode = openmode(omode);
+	c->flag |= COPEN;
+	c->offset = 0;
+	return c;
+}
+
+static s32
+ipwstat(Chan *c, uchar *dp, s32 n)
+{
+	Dir *d;
+	Conv *cv;
+	Fs *f;
+	Proto *p;
+
+	f = ipfs[c->dev];
+	switch(TYPE(c->qid)) {
+	default:
+		error(Eperm);
+		break;
+	case Qctl:
+	case Qdata:
+		break;
+	}
+
+	d = smalloc(sizeof(*d)+n);
+	if(waserror()){
+		free(d);
+		nexterror();
+	}
+	n = convM2D(dp, n, d, (char*)&d[1]);
+	if(n == 0)
+		error(Eshortstat);
+	p = f->p[PROTO(c->qid)];
+	cv = p->conv[CONV(c->qid)];
+	if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+		error(Eperm);
+	if(!emptystr(d->uid))
+		kstrdup(&cv->owner, d->uid);
+	if(d->mode != ~0UL)
+		cv->perm = d->mode & 0777;
+	poperror();
+	free(d);
+	return n;
+}
+
+extern void
+closeconv(Conv *cv)
+{
+	Conv *nc;
+	Ipmulti *mp;
+
+	qlock(cv);
+
+	if(--cv->inuse > 0) {
+		qunlock(cv);
+		return;
+	}
+
+	/* close all incoming calls since no listen will ever happen */
+	for(nc = cv->incall; nc; nc = cv->incall){
+		cv->incall = nc->next;
+		closeconv(nc);
+	}
+	cv->incall = nil;
+
+	kstrdup(&cv->owner, network);
+	cv->perm = 0660;
+
+	while((mp = cv->multi) != nil)
+		ipifcremmulti(cv, mp->ma, mp->ia);
+
+	cv->r = nil;
+	cv->rgen = 0;
+	cv->p->close(cv);
+	cv->state = Idle;
+	qunlock(cv);
+}
+
+static void
+ipclose(Chan* c)
+{
+	Fs *f;
+
+	f = ipfs[c->dev];
+	switch(TYPE(c->qid)) {
+	default:
+		break;
+	case Qlog:
+		if(c->flag & COPEN)
+			netlogclose(f);
+		break;
+	case Qiprouter:
+		if(c->flag & COPEN)
+			iprouterclose(f);
+		break;
+	case Qdata:
+	case Qctl:
+	case Qerr:
+		if(c->flag & COPEN)
+			closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
+		break;
+	case Qsnoop:
+		if(c->flag & COPEN)
+			decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
+		break;
+	}
+	free(((IPaux*)c->aux)->owner);
+	free(c->aux);
+}
+
+enum
+{
+	Statelen=	32*1024,
+};
+
+static s32
+ipread(Chan *ch, void *a, s32 n, s64 off)
+{
+	Conv *c;
+	Proto *x;
+	char *buf, *p;
+	s32 rv;
+	Fs *f;
+	ulong offset = off;
+
+	f = ipfs[ch->dev];
+
+	p = a;
+	switch(TYPE(ch->qid)) {
+	default:
+		error(Eperm);
+	case Qtopdir:
+	case Qprotodir:
+	case Qconvdir:
+		return devdirread(ch, a, n, 0, 0, ipgen);
+	case Qarp:
+		return arpread(f->arp, a, offset, n);
+ 	case Qbootp:
+ 		return bootpread(a, offset, n);
+ 	case Qndb:
+		return readstr(offset, a, n, f->ndb);
+	case Qiproute:
+		return routeread(f, a, offset, n);
+	case Qiprouter:
+		return iprouterread(f, a, n);
+	case Qipselftab:
+		return ipselftabread(f, a, offset, n);
+	case Qlog:
+		return netlogread(f, a, offset, n);
+	case Qctl:
+		sprint(up->genbuf, "%ud", CONV(ch->qid));
+		return readstr(offset, p, n, up->genbuf);
+	case Qremote:
+		buf = smalloc(Statelen);
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		if(x->remote == nil) {
+			sprint(buf, "%I!%d\n", c->raddr, c->rport);
+		} else {
+			(*x->remote)(c, buf, Statelen-2);
+		}
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	case Qlocal:
+		buf = smalloc(Statelen);
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		if(x->local == nil) {
+			sprint(buf, "%I!%d\n", c->laddr, c->lport);
+		} else {
+			(*x->local)(c, buf, Statelen-2);
+		}
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	case Qstatus:
+		buf = smalloc(Statelen);
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		(*x->state)(c, buf, Statelen-2);
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	case Qdata:
+		c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+		return qread(c->rq, a, n);
+	case Qerr:
+		c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+		return qread(c->eq, a, n);
+	case Qsnoop:
+		c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+		return qread(c->sq, a, n);
+	case Qstats:
+		x = f->p[PROTO(ch->qid)];
+		if(x->stats == nil)
+			error("stats not implemented");
+		buf = smalloc(Statelen);
+		(*x->stats)(x, buf, Statelen);
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	}
+}
+
+static Block*
+ipbread(Chan* ch, s32 n, u32 offset)
+{
+	Conv *c;
+	Proto *x;
+	Fs *f;
+
+	switch(TYPE(ch->qid)){
+	case Qdata:
+		f = ipfs[ch->dev];
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		return qbread(c->rq, n);
+	default:
+		return devbread(ch, n, offset);
+	}
+}
+
+/*
+ *  set local address to be that of the ifc closest to remote address
+ */
+static void
+setladdr(Conv* c)
+{
+	findlocalip(c->p->f, c->laddr, c->raddr);
+}
+
+/*
+ *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
+ */
+static char*
+setluniqueport(Conv* c, int lport)
+{
+	Proto *p;
+	Conv *xp;
+	int x;
+
+	p = c->p;
+
+	qlock(p);
+	for(x = 0; x < p->nc; x++){
+		xp = p->conv[x];
+		if(xp == nil)
+			break;
+		if(xp == c)
+			continue;
+		if((xp->state == Connected || xp->state == Announced)
+		&& xp->lport == lport
+		&& xp->rport == c->rport
+		&& ipcmp(xp->raddr, c->raddr) == 0
+		&& ipcmp(xp->laddr, c->laddr) == 0){
+			qunlock(p);
+			return "address in use";
+		}
+	}
+	c->lport = lport;
+	qunlock(p);
+	return nil;
+}
+
+/*
+ *  pick a local port and set it
+ */
+extern void
+setlport(Conv* c)
+{
+	Proto *p;
+	ushort *pp;
+	int x, found;
+
+	p = c->p;
+	if(c->restricted)
+		pp = &p->nextrport;
+	else
+		pp = &p->nextport;
+	qlock(p);
+	for(;;(*pp)++){
+		/*
+		 * Fsproto initialises p->nextport to 0 and the restricted
+		 * ports (p->nextrport) to 600.
+		 * Restricted ports must lie between 600 and 1024.
+		 * For the initial condition or if the unrestricted port number
+		 * has wrapped round, select a random port between 5000 and 1<<15
+		 * to start at.
+		 */
+		if(c->restricted){
+			if(*pp >= 1024)
+				*pp = 600;
+		}
+		else while(*pp < 5000)
+			*pp = nrand(1<<15);
+
+		found = 0;
+		for(x = 0; x < p->nc; x++){
+			if(p->conv[x] == nil)
+				break;
+			if(p->conv[x]->lport == *pp){
+				found = 1;
+				break;
+			}
+		}
+		if(found == 0)
+			break;
+	}
+	c->lport = (*pp)++;
+	qunlock(p);
+}
+
+/*
+ *  set a local address and port from a string of the form
+ *	[address!]port[!r]
+ */
+static char*
+setladdrport(Conv* c, char* str, int announcing)
+{
+	char *p;
+	char *rv;
+	ushort lport;
+	uchar addr[IPaddrlen];
+
+	rv = nil;
+
+	/*
+	 *  ignore restricted part if it exists.  it's
+	 *  meaningless on local ports.
+	 */
+	p = strchr(str, '!');
+	if(p != nil){
+		*p++ = 0;
+		if(strcmp(p, "r") == 0)
+			p = nil;
+	}
+
+	c->lport = 0;
+	if(p == nil){
+		if(announcing)
+			ipmove(c->laddr, IPnoaddr);
+		else
+			setladdr(c);
+		p = str;
+	} else {
+		if(strcmp(str, "*") == 0)
+			ipmove(c->laddr, IPnoaddr);
+		else {
+			parseip(addr, str);
+			if(ipforme(c->p->f, addr))
+				ipmove(c->laddr, addr);
+			else
+				return "not a local IP address";
+		}
+	}
+
+	/* one process can get all connections */
+	if(announcing && strcmp(p, "*") == 0){
+		if(!iseve())
+			error(Eperm);
+		return setluniqueport(c, 0);
+	}
+
+	lport = atoi(p);
+	if(lport <= 0)
+		setlport(c);
+	else
+		rv = setluniqueport(c, lport);
+	return rv;
+}
+
+static char*
+setraddrport(Conv* c, char* str)
+{
+	char *p;
+
+	p = strchr(str, '!');
+	if(p == nil)
+		return "malformed address";
+	*p++ = 0;
+	parseip(c->raddr, str);
+	c->rport = atoi(p);
+	p = strchr(p, '!');
+	if(p){
+		if(strstr(p, "!r") != nil)
+			c->restricted = 1;
+	}
+	return nil;
+}
+
+/*
+ *  called by protocol connect routine to set addresses
+ */
+char*
+Fsstdconnect(Conv *c, char *argv[], int argc)
+{
+	char *p;
+
+	switch(argc) {
+	default:
+		return "bad args to connect";
+	case 2:
+		p = setraddrport(c, argv[1]);
+		if(p != nil)
+			return p;
+		setladdr(c);
+		setlport(c);
+		break;
+	case 3:
+		p = setraddrport(c, argv[1]);
+		if(p != nil)
+			return p;
+		p = setladdrport(c, argv[2], 0);
+		if(p != nil)
+			return p;
+	}
+
+	if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+		memcmp(c->laddr, v4prefix, IPv4off) == 0)
+		|| ipcmp(c->raddr, IPnoaddr) == 0)
+		c->ipversion = V4;
+	else
+		c->ipversion = V6;
+
+	return nil;
+}
+/*
+ *  initiate connection and sleep till its set up
+ */
+static int
+connected(void* a)
+{
+	return ((Conv*)a)->state == Connected;
+}
+static void
+connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+	char *p;
+
+	if(c->state != 0)
+		error(Econinuse);
+	c->state = Connecting;
+	c->cerr[0] = '\0';
+	if(x->connect == nil)
+		error("connect not supported");
+	p = x->connect(c, cb->f, cb->nf);
+	if(p != nil)
+		error(p);
+
+	qunlock(c);
+	if(waserror()){
+		qlock(c);
+		nexterror();
+	}
+	sleep(&c->cr, connected, c);
+	qlock(c);
+	poperror();
+
+	if(c->cerr[0] != '\0')
+		error(c->cerr);
+}
+
+/*
+ *  called by protocol announce routine to set addresses
+ */
+char*
+Fsstdannounce(Conv* c, char* argv[], int argc)
+{
+	memset(c->raddr, 0, sizeof(c->raddr));
+	c->rport = 0;
+	switch(argc){
+	default:
+		return "bad args to announce";
+	case 2:
+		return setladdrport(c, argv[1], 1);
+	}
+}
+
+/*
+ *  initiate announcement and sleep till its set up
+ */
+static int
+announced(void* a)
+{
+	return ((Conv*)a)->state == Announced;
+}
+static void
+announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+	char *p;
+
+	if(c->state != 0)
+		error(Econinuse);
+	c->state = Announcing;
+	c->cerr[0] = '\0';
+	if(x->announce == nil)
+		error("announce not supported");
+	p = x->announce(c, cb->f, cb->nf);
+	if(p != nil)
+		error(p);
+
+	qunlock(c);
+	if(waserror()){
+		qlock(c);
+		nexterror();
+	}
+	sleep(&c->cr, announced, c);
+	qlock(c);
+	poperror();
+
+	if(c->cerr[0] != '\0')
+		error(c->cerr);
+}
+
+/*
+ *  called by protocol bind routine to set addresses
+ */
+char*
+Fsstdbind(Conv* c, char* argv[], int argc)
+{
+	switch(argc){
+	default:
+		return "bad args to bind";
+	case 2:
+		return setladdrport(c, argv[1], 0);
+	}
+}
+
+static void
+bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+	char *p;
+
+	if(x->bind == nil)
+		p = Fsstdbind(c, cb->f, cb->nf);
+	else
+		p = x->bind(c, cb->f, cb->nf);
+	if(p != nil)
+		error(p);
+}
+
+static void
+tosctlmsg(Conv *c, Cmdbuf *cb)
+{
+	if(cb->nf < 2)
+		c->tos = 0;
+	else
+		c->tos = atoi(cb->f[1]);
+}
+
+static void
+ttlctlmsg(Conv *c, Cmdbuf *cb)
+{
+	if(cb->nf < 2)
+		c->ttl = MAXTTL;
+	else
+		c->ttl = atoi(cb->f[1]);
+}
+
+static s32
+ipwrite(Chan* ch, void *v, s32 n, s64 off)
+{
+	Conv *c;
+	Proto *x;
+	char *p;
+	Cmdbuf *cb;
+	uchar ia[IPaddrlen], ma[IPaddrlen];
+	Fs *f;
+	char *a;
+
+	a = v;
+	f = ipfs[ch->dev];
+
+	switch(TYPE(ch->qid)){
+	default:
+		error(Eperm);
+	case Qdata:
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+
+		if(c->wq == nil)
+			error(Eperm);
+
+		qwrite(c->wq, a, n);
+		break;
+	case Qarp:
+		return arpwrite(f, a, n);
+	case Qiproute:
+		return routewrite(f, ch, a, n);
+	case Qlog:
+		netlogctl(f, a, n);
+		return n;
+	case Qndb:
+		return ndbwrite(f, a, off, n);
+	case Qctl:
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		cb = parsecmd(a, n);
+
+		qlock(c);
+		if(waserror()) {
+			qunlock(c);
+			free(cb);
+			nexterror();
+		}
+		if(cb->nf < 1)
+			error("short control request");
+		if(strcmp(cb->f[0], "connect") == 0)
+			connectctlmsg(x, c, cb);
+		else if(strcmp(cb->f[0], "announce") == 0)
+			announcectlmsg(x, c, cb);
+		else if(strcmp(cb->f[0], "bind") == 0)
+			bindctlmsg(x, c, cb);
+		else if(strcmp(cb->f[0], "ttl") == 0)
+			ttlctlmsg(c, cb);
+		else if(strcmp(cb->f[0], "tos") == 0)
+			tosctlmsg(c, cb);
+		else if(strcmp(cb->f[0], "ignoreadvice") == 0)
+			c->ignoreadvice = 1;
+		else if(strcmp(cb->f[0], "addmulti") == 0){
+			if(cb->nf < 2)
+				error("addmulti needs interface address");
+			if(cb->nf == 2){
+				if(!ipismulticast(c->raddr))
+					error("addmulti for a non multicast address");
+				parseip(ia, cb->f[1]);
+				ipifcaddmulti(c, c->raddr, ia);
+			} else {
+				parseip(ma, cb->f[2]);
+				if(!ipismulticast(ma))
+					error("addmulti for a non multicast address");
+				parseip(ia, cb->f[1]);
+				ipifcaddmulti(c, ma, ia);
+			}
+		} else if(strcmp(cb->f[0], "remmulti") == 0){
+			if(cb->nf < 2)
+				error("remmulti needs interface address");
+			if(!ipismulticast(c->raddr))
+				error("remmulti for a non multicast address");
+			parseip(ia, cb->f[1]);
+			ipifcremmulti(c, c->raddr, ia);
+		} else if(x->ctl != nil) {
+			p = x->ctl(c, cb->f, cb->nf);
+			if(p != nil)
+				error(p);
+		} else
+			error("unknown control request");
+		qunlock(c);
+		free(cb);
+		poperror();
+	}
+	return n;
+}
+
+static s32
+ipbwrite(Chan* ch, Block* bp, u32 offset)
+{
+	Conv *c;
+	Proto *x;
+	Fs *f;
+	int n;
+
+	switch(TYPE(ch->qid)){
+	case Qdata:
+		f = ipfs[ch->dev];
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+
+		if(c->wq == nil)
+			error(Eperm);
+
+		if(bp->next)
+			bp = concatblock(bp);
+		n = BLEN(bp);
+		qbwrite(c->wq, bp);
+		return n;
+	default:
+		return devbwrite(ch, bp, offset);
+	}
+}
+
+Dev ipdevtab = {
+	'I',
+	"ip",
+
+	ipreset,
+	devinit,
+	devshutdown,
+	ipattach,
+	ipwalk,
+	ipstat,
+	ipopen,
+	devcreate,
+	ipclose,
+	ipread,
+	ipbread,
+	ipwrite,
+	ipbwrite,
+	devremove,
+	ipwstat,
+};
+
+int
+Fsproto(Fs *f, Proto *p)
+{
+	if(f->np >= Maxproto)
+		return -1;
+
+	p->f = f;
+
+	if(p->ipproto > 0){
+		if(f->t2p[p->ipproto] != nil)
+			return -1;
+		f->t2p[p->ipproto] = p;
+	}
+
+	p->qid.type = QTDIR;
+	p->qid.path = QID(f->np, 0, Qprotodir);
+	p->conv = malloc(sizeof(Conv*)*(p->nc+1));
+	if(p->conv == nil)
+		panic("Fsproto");
+
+	p->x = f->np;
+	p->nextport = 0;
+	p->nextrport = 600;
+	f->p[f->np++] = p;
+
+	return 0;
+}
+
+/*
+ *  return true if this protocol is
+ *  built in
+ */
+int
+Fsbuiltinproto(Fs* f, uchar proto)
+{
+	return f->t2p[proto] != nil;
+}
+
+/*
+ *  called with protocol locked
+ */
+Conv*
+Fsprotoclone(Proto *p, char *user)
+{
+	Conv *c, **pp, **ep;
+
+retry:
+	c = nil;
+	ep = &p->conv[p->nc];
+	for(pp = p->conv; pp < ep; pp++) {
+		c = *pp;
+		if(c == nil){
+			c = malloc(sizeof(Conv));
+			if(c == nil)
+				error(Enomem);
+			qlock(c);
+			c->p = p;
+			c->x = pp - p->conv;
+			if(p->ptclsize != 0){
+				c->ptcl = malloc(p->ptclsize);
+				if(c->ptcl == nil) {
+					free(c);
+					error(Enomem);
+				}
+			}
+			*pp = c;
+			p->ac++;
+			c->eq = qopen(1024, Qmsg, 0, 0);
+			(*p->create)(c);
+			break;
+		}
+		if(canqlock(c)){
+			/*
+			 *  make sure both processes and protocol
+			 *  are done with this Conv
+			 */
+			if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0))
+				break;
+
+			qunlock(c);
+		}
+	}
+	if(pp >= ep) {
+		if(p->gc != nil && (*p->gc)(p))
+			goto retry;
+		return nil;
+	}
+
+	c->inuse = 1;
+	kstrdup(&c->owner, user);
+	c->perm = 0660;
+	c->state = Idle;
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->r = nil;
+	c->rgen = 0;
+	c->lport = 0;
+	c->rport = 0;
+	c->restricted = 0;
+	c->ttl = MAXTTL;
+	c->tos = DFLTTOS;
+	qreopen(c->rq);
+	qreopen(c->wq);
+	qreopen(c->eq);
+
+	qunlock(c);
+	return c;
+}
+
+int
+Fsconnected(Conv* c, char* msg)
+{
+	if(msg != nil && *msg != '\0')
+		kstrcpy(c->cerr, msg, sizeof(c->cerr));
+
+	switch(c->state){
+
+	case Announcing:
+		c->state = Announced;
+		break;
+
+	case Connecting:
+		c->state = Connected;
+		break;
+	}
+
+	wakeup(&c->cr);
+	return 0;
+}
+
+Proto*
+Fsrcvpcol(Fs* f, uchar proto)
+{
+	if(f->ipmux)
+		return f->ipmux;
+	else
+		return f->t2p[proto];
+}
+
+Proto*
+Fsrcvpcolx(Fs *f, uchar proto)
+{
+	return f->t2p[proto];
+}
+
+/*
+ *  called with protocol locked
+ */
+Conv*
+Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version)
+{
+	Conv *nc;
+	Conv **l;
+	int i;
+
+	qlock(c);
+	i = 0;
+	for(l = &c->incall; *l; l = &(*l)->next)
+		i++;
+	if(i >= Maxincall) {
+		qunlock(c);
+		return nil;
+	}
+
+	/* find a free conversation */
+	nc = Fsprotoclone(c->p, network);
+	if(nc == nil) {
+		qunlock(c);
+		return nil;
+	}
+	ipmove(nc->raddr, raddr);
+	nc->rport = rport;
+	ipmove(nc->laddr, laddr);
+	nc->lport = lport;
+	nc->next = nil;
+	*l = nc;
+	nc->state = Connected;
+	nc->ipversion = version;
+
+	qunlock(c);
+
+	wakeup(&c->listenr);
+
+	return nc;
+}
+
+static long
+ndbwrite(Fs *f, char *a, ulong off, int n)
+{
+	if(off > strlen(f->ndb))
+		error(Eio);
+	if(off+n >= sizeof(f->ndb)-1)
+		error(Eio);
+	memmove(f->ndb+off, a, n);
+	f->ndb[off+n] = 0;
+	f->ndbvers++;
+	f->ndbmtime = seconds();
+	return n;
+}
+
+ulong
+scalednconv(void)
+{
+	if(conf.npage*BY2PG >= 128*MB)
+		return Nchans*4;
+	return Nchans;
+}
--- /dev/null
+++ b/os/ip.original/dhcp.c
@@ -1,0 +1,447 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+#include "ppp.h"
+
+Ipaddr pppdns[2];
+
+static	ulong	fsip;
+static	ulong	auip;
+static	ulong	gwip;
+static	ulong	ipmask;
+static	ulong	ipaddr;
+static	ulong	dns1ip;
+static	ulong	dns2ip;
+
+int		dhcpmsgtype;
+int		debug=0;
+enum
+{
+	Bootrequest = 1,
+	Bootreply   = 2,
+};
+
+typedef struct Bootp
+{
+	/* udp.c oldheader */
+	uchar	raddr[IPaddrlen];
+	uchar	laddr[IPaddrlen];
+	uchar	rport[2];
+	uchar	lport[2];
+	/* bootp itself */
+	uchar	op;			/* opcode */
+	uchar	htype;		/* hardware type */
+	uchar	hlen;			/* hardware address len */
+	uchar	hops;		/* hops */
+	uchar	xid[4];		/* a random number */
+	uchar	secs[2];		/* elapsed snce client started booting */
+	uchar	flags[2];		/* flags */
+	uchar	ciaddr[4];		/* client IP address (client tells server) */
+	uchar	yiaddr[4];		/* client IP address (server tells client) */
+	uchar	siaddr[4];		/* server IP address */
+	uchar	giaddr[4];		/* gateway IP address */
+	uchar	chaddr[16];	/* client hardware address */
+	uchar	sname[64];	/* server host name (optional) */
+	uchar	file[128];		/* boot file name */
+	uchar	vend[128];	/* vendor-specific goo 340 */
+} Bootp;
+
+static	Bootp	req;
+static	Proc*	rcvprocp;
+static	int	recv;
+static	int	done;
+static	Rendez	bootpr;
+static	char	rcvbuf[512+2*IPaddrlen+2*2];	  /* 576 */
+static	uchar sid[4];
+static	ulong iplease;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d
+ * dns1ip	d.d.d.d
+ * dns2ip	d.d.d.d
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+	Last change:  SUN  13 Sep 2001    4:36 pm
+ */
+
+/*
+ * Parse the vendor specific fields according to RFC 1084.
+ * We are overloading the "cookie server" to be the Inferno 
+ * authentication server and the "resource location server"
+ * to be the Inferno file server.
+ *
+ * If the vendor specific field is formatted properly, it
+ * will being with the four bytes 99.130.83.99 and end with
+ * an 0xFF byte.
+ */
+static int
+parsevend(uchar* pvend)
+{	
+	uchar *vend=pvend;
+	int dhcpmsg=0;
+	/* The field must start with 99.130.83.99 to be compliant */
+	if ((vend[0] != 99) || (vend[1] != 130) || (vend[2] != 83) || (vend[3] != 99)){
+		print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
+		return -1;
+	}
+
+	/* Skip over the magic cookie */
+	vend += 4;
+
+	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+		int i;
+//	
+		if(debug){
+			print(">>>Opt[%d] [%d]", vend[0], vend[1]);
+			for(i=0; i<vend[1]; i++)
+				print(" %2.2x", vend[i+2]);
+			print("\n");
+		}
+//
+		switch (vend[0]) {
+		case 1:	/* Subnet mask field */
+			/* There must be only one subnet mask */
+			if (vend[1] == 4)
+				ipmask = (vend[2]<<24)|(vend[3]<<16)| (vend[4]<<8)| vend[5];
+			else{ 
+				return -1;
+			}
+			break;
+
+		case 3:	/* Gateway/router field */
+			/* We are only concerned with first address */
+			if (vend[1] >0 && vend[1]%4==0)
+				gwip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+			else 
+				return -1;
+			break;
+		case 6:	/* domain name server */
+			if(vend[1]>0 && vend[1] %4==0){
+				dns1ip=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+				if(vend[1]>4)
+					dns2ip=(vend[6]<<24)|(vend[7]<<16)|(vend[8]<<8)|vend[9];
+			}else
+				return -1;
+			break;
+
+		case 8:	/* "Cookie server" (auth server) field */
+			/* We are only concerned with first address */
+			if (vend[1] > 0 && vend[1]%4==0)
+				auip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+			else
+				return -1;
+			break;
+
+		case 11:	/* "Resource loc server" (file server) field */
+			/* We are only concerned with first address */
+			if (vend[1] > 0 && vend[1]%4==0)
+				fsip = (vend[2]<<24)| (vend[3]<<16)| (vend[4]<<8)| vend[5];
+			else
+				return -1;
+			break;
+		case 51:	/* ip lease time */
+			if(vend[1]==4){
+				iplease=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+			}else
+				return -1;
+			break;
+		case 53:	/* DHCP message type */
+			if(vend[1]==1)
+				dhcpmsg=vend[2];
+			else
+				return -1;
+			break;
+		case 54:	/* server identifier */
+			if(vend[1]==4){
+				memmove(sid, vend+2, 4);
+			}else
+				return -1;
+			break;
+
+		default:	/* Everything else stops us */
+			break;
+		}
+
+		/* Skip over the field */
+		vend += vend[1] + 2;
+	}
+	if(debug)
+		print(">>>Opt[%d] [%d]\n", vend[0], vend[1]);
+	return dhcpmsg;
+}
+
+static void
+dispvend(uchar* pvend)
+{	
+	uchar *vend=pvend;
+
+	//print("<<<Magic : %2.2x%2.2x%2.2x%2.2x\n", vend[0], vend[1], vend[2], vend[3]);
+	
+	vend += 4;		/* Skip over the magic cookie */
+	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+	//	int i;
+	  //	print("<<<Opt[%d] [%d]", vend[0], vend[1]);
+		//for(i=0; i<vend[1]; i++)
+		//	print(" %2.2x", vend[i+2]);
+		//print("\n");
+	
+		vend += vend[1] + 2;
+	}
+	//print("<<<Opt[ %2.2x] [%2.2x]\n", vend[0], vend[1]);
+}
+
+static void
+rcvbootp(void *a)
+{
+	int n, fd, dhcp;
+	Bootp *rp;
+
+	if(waserror())
+		pexit("", 0);
+	rcvprocp = up;	/* store for postnote below */
+	fd = (int)a;
+	while(done == 0) {
+		if(debug)
+			print("rcvbootp:looping\n");
+
+		n = kread(fd, rcvbuf, sizeof(rcvbuf));
+		if(n <= 0)
+			break;
+		rp = (Bootp*)rcvbuf;
+		if (memcmp(req.chaddr, rp->chaddr, 6) == 0 && rp->htype == 1 && rp->hlen == 6) {
+			ipaddr = (rp->yiaddr[0]<<24)| (rp->yiaddr[1]<<16)| (rp->yiaddr[2]<<8)| rp->yiaddr[3];
+			if(debug)
+				print("ipaddr = %2.2x %2.2x %2.2x %2.2x \n", rp->yiaddr[0], rp->yiaddr[1], rp->yiaddr[2], rp->yiaddr[3]);
+			//memmove(req.siaddr, rp->siaddr, 4);	/* siaddr */
+			dhcp = parsevend(rp->vend);
+	
+			if(dhcpmsgtype < dhcp){
+				dhcpmsgtype=dhcp;
+				recv = 1;
+				wakeup(&bootpr);
+				if(dhcp==0 || dhcp ==5 || dhcp == 6 )
+					break;
+			}
+		}
+	}
+	poperror();
+	rcvprocp = nil;
+
+	if(debug)
+		print("rcvbootp exit\n");
+	pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+	int cfd, dfd, tries, n;
+	char ia[5+3*16], im[16], *av[3];
+	uchar nipaddr[4], ngwip[4], nipmask[4];
+	char dir[Maxpath];
+	static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
+	uchar *vend;
+
+	/*
+	 * broadcast bootp's till we get a reply,
+	 * or fixed number of tries
+	 */
+	if(debug)
+	    print("dhcp: bootp() called\n");
+	tries = 0;
+	av[1] = "0.0.0.0";
+	av[2] = "0.0.0.0";
+	ipifcadd(ifc, av, 3, 0, nil);
+
+	cfd = kannounce("udp!*!68", dir);
+	if(cfd < 0)
+		return "dhcp announce failed";
+	strcat(dir, "/data");
+	if(kwrite(cfd, "headers", 7) < 0){
+		kclose(cfd);
+		return "dhcp ctl headers failed";
+	}
+	kwrite(cfd, "oldheaders", 10);
+	dfd = kopen(dir, ORDWR);
+	if(dfd < 0){
+		kclose(cfd);
+		return "dhcp open data failed";
+	}
+	kclose(cfd);
+	
+	while(tries<1){
+		tries++;
+		memset(sid, 0, 4);
+		iplease=0;
+		dhcpmsgtype=-2;
+/* DHCPDISCOVER*/
+		done = 0;
+		recv = 0;
+		kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+		/* Prepare DHCPDISCOVER */	
+		memset(&req, 0, sizeof(req));
+		ipmove(req.raddr, IPv4bcast);
+		hnputs(req.rport, 67);
+		req.op = Bootrequest;
+		req.htype = 1;			/* ethernet (all we know) */
+		req.hlen = 6;			/* ethernet (all we know) */
+		
+		memmove(req.chaddr, ifc->mac, 6);	/* Hardware MAC address */
+		//ipv4local(ifc, req.ciaddr);				/* Fill in the local IP address if we know it */
+		memset(req.file, 0, sizeof(req.file));
+		vend=req.vend;
+		memmove(vend, vend_rfc1048, 4); vend+=4;
+		*vend++=53; *vend++=1;*vend++=1;		/* dhcp msg type==3, dhcprequest */
+		
+		*vend++=61;*vend++=7;*vend++=1;
+		memmove(vend, ifc->mac, 6);vend+=6;
+		*vend=0xff;
+
+		if(debug)
+			dispvend(req.vend); 
+		for(n=0;n<4;n++){
+			if(kwrite(dfd, &req, sizeof(req))<0)	/* SEND DHCPDISCOVER */
+				print("DHCPDISCOVER: %r");
+		
+			tsleep(&bootpr, return0, 0, 1000);	/* wait DHCPOFFER */
+			if(debug)
+				print("[DHCP] DISCOVER: msgtype = %d\n", dhcpmsgtype);
+
+			if(dhcpmsgtype==2)		/* DHCPOFFER */
+				break;
+			else if(dhcpmsgtype==0)	/* bootp */
+				return nil;
+			else if(dhcpmsgtype== -2)	/* time out */
+				continue;
+			else
+				break;
+			
+		}
+		if(dhcpmsgtype!=2)
+			continue;
+
+/* DHCPREQUEST */	
+		memset(req.vend, 0, sizeof(req.vend));
+		vend=req.vend;
+		memmove(vend, vend_rfc1048, 4);vend+=4;	
+
+		*vend++=53; *vend++=1;*vend++=3;		/* dhcp msg type==3, dhcprequest */
+
+		*vend++=50;	*vend++=4;				/* requested ip address */
+		*vend++=(ipaddr >> 24)&0xff;
+		*vend++=(ipaddr >> 16)&0xff;
+		*vend++=(ipaddr >> 8) & 0xff;
+		*vend++=ipaddr & 0xff;
+
+		*vend++=51;*vend++=4;					/* lease time */
+		*vend++=(iplease>>24)&0xff; *vend++=(iplease>>16)&0xff; *vend++=(iplease>>8)&0xff; *vend++=iplease&0xff;
+
+		*vend++=54; *vend++=4;					/* server identifier */
+		memmove(vend, sid, 4);	vend+=4;
+	
+		*vend++=61;*vend++=07;*vend++=01;		/* client identifier */
+		memmove(vend, ifc->mac, 6);vend+=6;
+		*vend=0xff;
+		if(debug) 
+			dispvend(req.vend); 
+		if(kwrite(dfd, &req, sizeof(req))<0){
+			print("DHCPREQUEST: %r");
+			continue;
+		}
+		tsleep(&bootpr, return0, 0, 2000);
+		if(dhcpmsgtype==5)		/* wait for DHCPACK */
+			break;
+		else
+			continue;
+		/* CHECK ARP */
+		/* DHCPDECLINE */
+	}
+	kclose(dfd);
+	done = 1;
+	if(rcvprocp != nil){
+		postnote(rcvprocp, 1, "timeout", 0);
+		rcvprocp = nil;
+	}
+
+	av[1] = "0.0.0.0";
+	av[2] = "0.0.0.0";
+	ipifcrem(ifc, av, 3);
+
+	hnputl(nipaddr, ipaddr);
+	sprint(ia, "%V", nipaddr);
+	hnputl(nipmask, ipmask);
+	sprint(im, "%V", nipmask);
+	av[1] = ia;
+	av[2] = im;
+	ipifcadd(ifc, av, 3, 0, nil);
+
+	if(gwip != 0) {
+		hnputl(ngwip, gwip);
+		n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
+		routewrite(ifc->conv->p->f, nil, ia, n);
+	}
+	return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+	int n, i;
+	char *buf;
+	uchar a[4];
+
+	if(debug)
+		print("dhcp: bootpread() \n");
+	buf = smalloc(READSTR);
+	if(waserror()){
+		free(buf);
+		nexterror();
+	}
+
+	hnputl(a, fsip);
+	n = snprint(buf, READSTR, "fsip %15V\n", a);
+	hnputl(a, auip);
+	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+	hnputl(a, gwip);
+	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+	hnputl(a, ipmask);
+	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+	hnputl(a, ipaddr);
+	n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+	n += snprint(buf+n, READSTR-n, "expired %lud\n", iplease);
+
+	n += snprint(buf + n, READSTR-n, "dns");
+	if(dns2ip){
+		hnputl(a, dns2ip);
+		n+=snprint(buf + n, READSTR-n, " %15V", a);
+	}
+	if(dns1ip){
+		hnputl(a, dns1ip);
+		n += snprint(buf + n, READSTR-n, " %15V", a);
+	}
+
+	for(i=0; i<2; i++)
+		if(ipcmp(pppdns[i], IPnoaddr) != 0 && ipcmp(pppdns[i], v4prefix) != 0)
+			n += snprint(buf + n, READSTR-n, " %15I", pppdns[i]);
+
+	snprint(buf + n, READSTR-n, "\n");
+	len = readstr(offset, bp, len, buf);
+	poperror();
+	free(buf);
+	return len;
+}
+
+char*	(*bootp)(Ipifc*) = rbootp;
+int	(*bootpread)(char*, ulong, int) = rbootpread;
--- /dev/null
+++ b/os/ip.original/eipconvtest.c
@@ -1,0 +1,152 @@
+#include <u.h>
+#include <libc.h>
+
+enum
+{
+	Isprefix= 16,
+};
+
+uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+uchar v4prefix[16] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0xff, 0xff,
+	0, 0, 0, 0
+};
+
+void
+hnputl(void *p, ulong v)
+{
+	uchar *a;
+
+	a = p;
+	a[0] = v>>24;
+	a[1] = v>>16;
+	a[2] = v>>8;
+	a[3] = v;
+}
+
+int
+eipconv(va_list *arg, Fconv *f)
+{
+	char buf[8*5];
+	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
+	static char *ifmt = "%d.%d.%d.%d";
+	uchar *p, ip[16];
+	ulong *lp;
+	ushort s;
+	int i, j, n, eln, eli;
+
+	switch(f->chr) {
+	case 'E':		/* Ethernet address */
+		p = va_arg(*arg, uchar*);
+		sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+		break;
+	case 'I':		/* Ip address */
+		p = va_arg(*arg, uchar*);
+common:
+		if(memcmp(p, v4prefix, 12) == 0)
+			sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
+		else {
+			/* find longest elision */
+			eln = eli = -1;
+			for(i = 0; i < 16; i += 2){
+				for(j = i; j < 16; j += 2)
+					if(p[j] != 0 || p[j+1] != 0)
+						break;
+				if(j > i && j - i > eln){
+					eli = i;
+					eln = j - i;
+				}
+			}
+
+			/* print with possible elision */
+			n = 0;
+			for(i = 0; i < 16; i += 2){
+				if(i == eli){
+					n += sprint(buf+n, "::");
+					i += eln;
+					if(i >= 16)
+						break;
+				} else if(i != 0)
+					n += sprint(buf+n, ":");
+				s = (p[i]<<8) + p[i+1];
+				n += sprint(buf+n, "%ux", s);
+			}
+		}
+		break;
+	case 'i':		/* v6 address as 4 longs */
+		lp = va_arg(*arg, ulong*);
+		for(i = 0; i < 4; i++)
+			hnputl(ip+4*i, *lp++);
+		p = ip;
+		goto common;
+	case 'V':		/* v4 ip address */
+		p = va_arg(*arg, uchar*);
+		sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
+		break;
+	case 'M':		/* ip mask */
+		p = va_arg(*arg, uchar*);
+
+		/* look for a prefix mask */
+		for(i = 0; i < 16; i++)
+			if(p[i] != 0xff)
+				break;
+		if(i < 16){
+			if((prefixvals[p[i]] & Isprefix) == 0)
+				goto common;
+			for(j = i+1; j < 16; j++)
+				if(p[j] != 0)
+					goto common;
+			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+		} else
+			n = 8*16;
+
+		/* got one, use /xx format */
+		sprint(buf, "/%d", n);
+		break;
+	default:
+		strcpy(buf, "(eipconv)");
+	}
+	strconv(buf, f);
+	return sizeof(uchar*);
+}
+
+uchar testvec[11][16] =
+{
+ { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
+};
+
+void
+main(void)
+{
+	int i;
+
+	fmtinstall('I', eipconv);
+	fmtinstall('M', eipconv);
+	for(i = 0; i < 11; i++)
+		print("%I\n%M\n", testvec[i], testvec[i]);
+	exits(0);
+}
--- /dev/null
+++ b/os/ip.original/esp.c
@@ -1,0 +1,866 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+
+#include	"libsec.h"
+
+typedef struct Esphdr Esphdr;
+typedef struct Esptail Esptail;
+typedef struct Userhdr Userhdr;
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+typedef struct Esprc4 Esprc4;
+
+#define DPRINT if(0)print
+
+enum
+{
+	IP_ESPPROTO	= 50,
+	EsphdrSize	= 28,	// includes IP header
+	IphdrSize	= 20,	// options have been striped
+	EsptailSize	= 2,	// does not include pad or auth data
+	UserhdrSize	= 4,	// user visable header size - if enabled
+};
+
+struct Esphdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;	
+	uchar	espproto;	/* Protocol */
+	uchar	espplen[2];	/* Header plus data length */
+	uchar	espsrc[4];	/* Ip source */
+	uchar	espdst[4];	/* Ip destination */
+
+	/* esp header */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+};
+
+struct Esptail
+{
+	uchar	pad;
+	uchar	nexthdr;
+};
+
+/* header as seen by the user */
+struct Userhdr
+{
+	uchar	nexthdr;	// next protocol
+	uchar	unused[3];
+};
+
+struct Esppriv
+{
+	ulong	in;
+	ulong	inerrors;
+};
+
+/*
+ *  protocol specific part of Conv
+ */
+struct Espcb
+{
+	int	incoming;
+	int	header;		// user user level header
+	ulong	spi;
+	ulong	seq;		// last seq sent
+	ulong	window;		// for replay attacks
+	char	*espalg;
+	void	*espstate;	// other state for esp
+	int	espivlen;	// in bytes
+	int	espblklen;
+	int	(*cipher)(Espcb*, uchar *buf, int len);
+	char	*ahalg;
+	void	*ahstate;	// other state for esp
+	int	ahlen;		// auth data length in bytes
+	int	ahblklen;
+	int	(*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+};
+
+struct Algorithm
+{
+	char 	*name;
+	int	keylen;		// in bits
+	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+};
+
+
+enum {
+	RC4forward	= 10*1024*1024,	// maximum skip forward
+	RC4back = 100*1024,		// maximum look back
+};
+
+struct Esprc4
+{
+	ulong cseq;	// current byte sequence number
+	RC4state current;
+
+	int ovalid;	// old is valid
+	ulong lgseq; // last good sequence
+	ulong oseq;	// old byte sequence number
+	RC4state old;
+};
+
+static	Conv* convlookup(Proto *esp, ulong spi);
+static	char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
+static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+static	void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+static	void espkick(void *x);
+
+static Algorithm espalg[] =
+{
+	"null",			0,	nullespinit,
+	"des_56_cbc",		64,	desespinit,
+	"rc4_128",		128,	rc4espinit,
+	nil,			0,	nil,
+};
+
+static Algorithm ahalg[] =
+{
+	"null",			0,	nullahinit,
+	"hmac_sha1_96",		128,	shaahinit,
+	"hmac_md5_96",		128,	md5ahinit,
+	nil,			0,	nil,
+};
+
+static char*
+espconnect(Conv *c, char **argv, int argc)
+{
+	char *p, *pp;
+	char *e = nil;
+	ulong spi;
+	Espcb *ecb = (Espcb*)c->ptcl;
+
+	switch(argc) {
+	default:
+		e = "bad args to connect";
+		break;
+	case 2:
+		p = strchr(argv[1], '!');
+		if(p == nil){
+			e = "malformed address";
+			break;
+		}
+		*p++ = 0;
+		parseip(c->raddr, argv[1]);
+		findlocalip(c->p->f, c->laddr, c->raddr);
+		ecb->incoming = 0;
+		ecb->seq = 0;
+		if(strcmp(p, "*") == 0) {
+			qlock(c->p);
+			for(;;) {
+				spi = nrand(1<<16) + 256;
+				if(convlookup(c->p, spi) == nil)
+					break;
+			}
+			qunlock(c->p);
+			ecb->spi = spi;
+			ecb->incoming = 1;
+			qhangup(c->wq, nil);
+		} else {
+			spi = strtoul(p, &pp, 10);
+			if(pp == p) {
+				e = "malformed address";
+				break;
+			}
+			ecb->spi = spi;
+			qhangup(c->rq, nil);
+		}
+		nullespinit(ecb, "null", nil, 0);
+		nullahinit(ecb, "null", nil, 0);
+	}
+	Fsconnected(c, e);
+
+	return e;
+}
+
+
+static int
+espstate(Conv *c, char *state, int n)
+{
+	return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n");
+}
+
+static void
+espcreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->wq = qopen(64*1024, Qkick, espkick, c);
+}
+
+static void
+espclose(Conv *c)
+{
+	Espcb *ecb;
+
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+
+	ecb = (Espcb*)c->ptcl;
+	free(ecb->espstate);
+	free(ecb->ahstate);
+	memset(ecb, 0, sizeof(Espcb));
+}
+
+static void
+espkick(void *x)
+{
+	Conv *c = x;
+	Esphdr *eh;
+	Esptail *et;
+	Userhdr *uh;
+	Espcb *ecb;
+	Block *bp;
+	int nexthdr;
+	int payload;
+	int pad;
+	int align;
+	uchar *auth;
+
+	bp = qget(c->wq);
+	if(bp == nil)
+		return;
+
+	qlock(c);
+	ecb = c->ptcl;
+
+	if(ecb->header) {
+		/* make sure the message has a User header */
+		bp = pullupblock(bp, UserhdrSize);
+		if(bp == nil) {
+			qunlock(c);
+			return;
+		}
+		uh = (Userhdr*)bp->rp;
+		nexthdr = uh->nexthdr;
+		bp->rp += UserhdrSize;
+	} else {
+		nexthdr = 0;  // what should this be?
+	}
+
+	payload = BLEN(bp) + ecb->espivlen;
+
+	/* Make space to fit ip header */
+	bp = padblock(bp, EsphdrSize + ecb->espivlen);
+
+	align = 4;
+	if(ecb->espblklen > align)
+		align = ecb->espblklen;
+	if(align % ecb->ahblklen != 0)
+		panic("espkick: ahblklen is important after all");
+	pad = (align-1) - (payload + EsptailSize-1)%align;
+
+	/*
+	 * Make space for tail
+	 * this is done by calling padblock with a negative size
+	 * Padblock does not change bp->wp!
+	 */
+	bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
+	bp->wp += pad+EsptailSize+ecb->ahlen;
+
+	eh = (Esphdr *)(bp->rp);
+	et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+
+	// fill in tail
+	et->pad = pad;
+	et->nexthdr = nexthdr;
+
+	ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
+	auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+
+	// fill in head
+	eh->vihl = IP_VER4;
+	hnputl(eh->espspi, ecb->spi);
+	hnputl(eh->espseq, ++ecb->seq);
+	v6tov4(eh->espsrc, c->laddr);
+	v6tov4(eh->espdst, c->raddr);
+	eh->espproto = IP_ESPPROTO;
+	eh->frag[0] = 0;
+	eh->frag[1] = 0;
+
+	ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+
+	qunlock(c);
+	//print("esp: pass down: %uld\n", BLEN(bp));
+	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+}
+
+void
+espiput(Proto *esp, Ipifc*, Block *bp)
+{
+	Esphdr *eh;
+	Esptail *et;
+	Userhdr *uh;
+	Conv *c;
+	Espcb *ecb;
+	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	Fs *f;
+	uchar *auth;
+	ulong spi;
+	int payload, nexthdr;
+
+	f = esp->f;
+
+	bp = pullupblock(bp, EsphdrSize+EsptailSize);
+	if(bp == nil) {
+		netlog(f, Logesp, "esp: short packet\n");
+		return;
+	}
+
+	eh = (Esphdr*)(bp->rp);
+	spi = nhgetl(eh->espspi);
+	v4tov6(raddr, eh->espsrc);
+	v4tov6(laddr, eh->espdst);
+
+	qlock(esp);
+	/* Look for a conversation structure for this port */
+	c = convlookup(esp, spi);
+	if(c == nil) {
+		qunlock(esp);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+			laddr, spi);
+		icmpnoconv(f, bp);
+		freeblist(bp);
+		return;
+	}
+
+	qlock(c);
+	qunlock(esp);
+
+	ecb = c->ptcl;
+	// too hard to do decryption/authentication on block lists
+	if(bp->next)
+		bp = concatblock(bp);
+
+	if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+		qunlock(c);
+		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+			laddr, spi);
+		freeb(bp);
+		return;
+	}
+
+	eh = (Esphdr*)(bp->rp);
+	auth = bp->wp - ecb->ahlen;
+	if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+		qunlock(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+			laddr, spi);
+		freeb(bp);
+		return;
+	}
+
+	payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
+	if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+		qunlock(c);
+		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
+			laddr, spi, payload, BLEN(bp));
+		freeb(bp);
+		return;
+	}
+	if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+		qunlock(c);
+print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
+			laddr, spi);
+		freeb(bp);
+		return;
+	}
+
+	payload -= EsptailSize;
+	et = (Esptail*)(bp->rp + EsphdrSize + payload);
+	payload -= et->pad + ecb->espivlen;
+	nexthdr = et->nexthdr;
+	if(payload <= 0) {
+		qunlock(c);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
+			laddr, spi);
+		freeb(bp);
+		return;
+	}
+
+	// trim packet
+	bp->rp += EsphdrSize + ecb->espivlen;
+	bp->wp = bp->rp + payload;
+	if(ecb->header) {
+		// assume UserhdrSize < EsphdrSize
+		bp->rp -= UserhdrSize;
+		uh = (Userhdr*)bp->rp;
+		memset(uh, 0, UserhdrSize);
+		uh->nexthdr = nexthdr;
+	}
+
+	if(qfull(c->rq)){
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+			laddr, spi);
+		freeblist(bp);
+	}else {
+//print("esp: pass up: %uld\n", BLEN(bp));
+		qpass(c->rq, bp);
+	}
+
+	qunlock(c);
+}
+
+char*
+espctl(Conv *c, char **f, int n)
+{
+	Espcb *ecb = c->ptcl;
+	char *e = nil;
+
+	if(strcmp(f[0], "esp") == 0)
+		e = setalg(ecb, f, n, espalg);
+	else if(strcmp(f[0], "ah") == 0)
+		e = setalg(ecb, f, n, ahalg);
+	else if(strcmp(f[0], "header") == 0)
+		ecb->header = 1;
+	else if(strcmp(f[0], "noheader") == 0)
+		ecb->header = 0;
+	else
+		e = "unknown control request";
+	return e;
+}
+
+void
+espadvise(Proto *esp, Block *bp, char *msg)
+{
+	Esphdr *h;
+	Conv *c;
+	ulong spi;
+
+	h = (Esphdr*)(bp->rp);
+
+	spi = nhgets(h->espspi);
+	qlock(esp);
+	c = convlookup(esp, spi);
+	if(c != nil) {
+		qhangup(c->rq, msg);
+		qhangup(c->wq, msg);
+	}
+	qunlock(esp);
+	freeblist(bp);
+}
+
+int
+espstats(Proto *esp, char *buf, int len)
+{
+	Esppriv *upriv;
+
+	upriv = esp->priv;
+	return snprint(buf, len, "%lud %lud\n",
+		upriv->in,
+		upriv->inerrors);
+}
+
+static int
+esplocal(Conv *c, char *buf, int len)
+{
+	Espcb *ecb = c->ptcl;
+	int n;
+
+	qlock(c);
+	if(ecb->incoming)
+		n = snprint(buf, len, "%I!%uld\n", c->laddr, ecb->spi);
+	else
+		n = snprint(buf, len, "%I\n", c->laddr);
+	qunlock(c);
+	return n;
+}
+
+static int
+espremote(Conv *c, char *buf, int len)
+{
+	Espcb *ecb = c->ptcl;
+	int n;
+
+	qlock(c);
+	if(ecb->incoming)
+		n = snprint(buf, len, "%I\n", c->raddr);
+	else
+		n = snprint(buf, len, "%I!%uld\n", c->raddr, ecb->spi);
+	qunlock(c);
+	return n;
+}
+
+static	Conv*
+convlookup(Proto *esp, ulong spi)
+{
+	Conv *c, **p;
+	Espcb *ecb;
+
+	for(p=esp->conv; *p; p++){
+		c = *p;
+		ecb = c->ptcl;
+		if(ecb->incoming && ecb->spi == spi)
+			return c;
+	}
+	return nil;
+}
+
+static char *
+setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
+{
+	uchar *key;
+	int i, nbyte, nchar;
+	int c;
+
+	if(n < 2)
+		return "bad format";
+	for(; alg->name; alg++)
+		if(strcmp(f[1], alg->name) == 0)
+			break;
+	if(alg->name == nil)
+		return "unknown algorithm";
+
+	if(n != 3)
+		return "bad format";
+	nbyte = (alg->keylen + 7) >> 3;
+	nchar = strlen(f[2]);
+	for(i=0; i<nchar; i++) {
+		c = f[2][i];
+		if(c >= '0' && c <= '9')
+			f[2][i] -= '0';
+		else if(c >= 'a' && c <= 'f')
+			f[2][i] -= 'a'-10;
+		else if(c >= 'A' && c <= 'F')
+			f[2][i] -= 'A'-10;
+		else
+			return "bad character in key";
+	}
+	key = smalloc(nbyte);
+	for(i=0; i<nchar && i*2<nbyte; i++) {
+		c = f[2][nchar-i-1];
+		if(i&1)
+			c <<= 4;
+		key[i>>1] |= c;
+	}
+
+	alg->init(ecb, alg->name, key, alg->keylen);
+	free(key);
+	return nil;
+}
+
+static int
+nullcipher(Espcb*, uchar*, int)
+{
+	return 1;
+}
+
+static void
+nullespinit(Espcb *ecb, char *name, uchar*, int)
+{
+	ecb->espalg = name;
+	ecb->espblklen = 1;
+	ecb->espivlen = 0;
+	ecb->cipher = nullcipher;
+}
+
+static int
+nullauth(Espcb*, uchar*, int, uchar*)
+{
+	return 1;
+}
+
+static void
+nullahinit(Espcb *ecb, char *name, uchar*, int)
+{
+	ecb->ahalg = name;
+	ecb->ahblklen = 1;
+	ecb->ahlen = 0;
+	ecb->auth = nullauth;
+}
+
+void
+seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+	uchar ipad[65], opad[65];
+	int i;
+	DigestState *digest;
+	uchar innerhash[SHA1dlen];
+
+	for(i=0; i<64; i++){
+		ipad[i] = 0x36;
+		opad[i] = 0x5c;
+	}
+	ipad[64] = opad[64] = 0;
+	for(i=0; i<klen; i++){
+		ipad[i] ^= key[i];
+		opad[i] ^= key[i];
+	}
+	digest = sha1(ipad, 64, nil, nil);
+	sha1(t, tlen, innerhash, digest);
+	digest = sha1(opad, 64, nil, nil);
+	sha1(innerhash, SHA1dlen, hash, digest);
+}
+
+static int
+shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+	uchar hash[SHA1dlen];
+	int r;
+
+	memset(hash, 0, SHA1dlen);
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	r = memcmp(auth, hash, ecb->ahlen) == 0;
+	memmove(auth, hash, ecb->ahlen);
+	return r;
+}
+
+static void
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+	if(klen != 128)
+		panic("shaahinit: bad keylen");
+	klen >>= 8;	// convert to bytes
+
+	ecb->ahalg = name;
+	ecb->ahblklen = 1;
+	ecb->ahlen = 12;
+	ecb->auth = shaauth;
+	ecb->ahstate = smalloc(klen);
+	memmove(ecb->ahstate, key, klen);
+}
+
+void
+seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+	uchar ipad[65], opad[65];
+	int i;
+	DigestState *digest;
+	uchar innerhash[MD5dlen];
+
+	for(i=0; i<64; i++){
+		ipad[i] = 0x36;
+		opad[i] = 0x5c;
+	}
+	ipad[64] = opad[64] = 0;
+	for(i=0; i<klen; i++){
+		ipad[i] ^= key[i];
+		opad[i] ^= key[i];
+	}
+	digest = md5(ipad, 64, nil, nil);
+	md5(t, tlen, innerhash, digest);
+	digest = md5(opad, 64, nil, nil);
+	md5(innerhash, MD5dlen, hash, digest);
+}
+
+static int
+md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+	uchar hash[MD5dlen];
+	int r;
+
+	memset(hash, 0, MD5dlen);
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	r = memcmp(auth, hash, ecb->ahlen) == 0;
+	memmove(auth, hash, ecb->ahlen);
+	return r;
+}
+
+static void
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+	if(klen != 128)
+		panic("md5ahinit: bad keylen");
+	klen >>= 3;	// convert to bytes
+
+
+	ecb->ahalg = name;
+	ecb->ahblklen = 1;
+	ecb->ahlen = 12;
+	ecb->auth = md5auth;
+	ecb->ahstate = smalloc(klen);
+	memmove(ecb->ahstate, key, klen);
+}
+
+static int
+descipher(Espcb *ecb, uchar *p, int n)
+{
+	uchar tmp[8];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	DESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, 8);
+		p += 8;
+		while(p < ep){
+			memmove(tmp, p, 8);
+			block_cipher(ds->expanded, p, 1);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip+8; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, 8);
+		for(p += 8; p < ep; p += 8){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip+8; ip < eip; )
+				*pp++ ^= *ip++;
+			block_cipher(ds->expanded, p, 0);
+			memmove(ds->ivec, p, 8);
+		}
+	}
+	return 1;
+}
+	
+static void
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+	uchar key[8];
+	uchar ivec[8];
+	int i;
+	
+	// bits to bytes
+	n = (n+7)>>3;
+	if(n > 8)
+		n = 8;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	for(i=0; i<8; i++)
+		ivec[i] = nrand(256);
+	ecb->espalg = name;
+	ecb->espblklen = 8;
+	ecb->espivlen = 8;
+	ecb->cipher = descipher;
+	ecb->espstate = smalloc(sizeof(DESstate));
+	setupDESstate(ecb->espstate, key, ivec);
+}
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+	Esprc4 *esprc4;
+	RC4state tmpstate;
+	ulong seq;
+	long d, dd;
+
+	if(n < 4)
+		return 0;
+
+	esprc4 = ecb->espstate;
+	if(ecb->incoming) {
+		seq = nhgetl(p);
+		p += 4;
+		n -= 4;
+		d = seq-esprc4->cseq;
+		if(d == 0) {
+			rc4(&esprc4->current, p, n);
+			esprc4->cseq += n;
+			if(esprc4->ovalid) {
+				dd = esprc4->cseq - esprc4->lgseq;
+				if(dd > RC4back)
+					esprc4->ovalid = 0;
+			}
+		} else if(d > 0) {
+print("missing packet: %uld %ld\n", seq, d);
+			// this link is hosed
+			if(d > RC4forward) {
+				strcpy(up->errstr, "rc4cipher: skipped too much");
+				return 0;
+			}
+			esprc4->lgseq = seq;
+			if(!esprc4->ovalid) {
+				esprc4->ovalid = 1;
+				esprc4->oseq = esprc4->cseq;
+				memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
+			}
+			rc4skip(&esprc4->current, d);
+			rc4(&esprc4->current, p, n);
+			esprc4->cseq = seq+n;
+		} else {
+print("reordered packet: %uld %ld\n", seq, d);
+			dd = seq - esprc4->oseq;
+			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+				strcpy(up->errstr, "rc4cipher: too far back");
+				return 0;
+			}
+			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+			rc4skip(&tmpstate, dd);
+			rc4(&tmpstate, p, n);
+			return 1;
+		}
+
+		// move old state up
+		if(esprc4->ovalid) {
+			dd = esprc4->cseq - RC4back - esprc4->oseq;
+			if(dd > 0) {
+				rc4skip(&esprc4->old, dd);
+				esprc4->oseq += dd;
+			}
+		}
+	} else {
+		hnputl(p, esprc4->cseq);
+		p += 4;
+		n -= 4;
+		rc4(&esprc4->current, p, n);
+		esprc4->cseq += n;
+	}
+	return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{	
+	Esprc4 *esprc4;
+
+	// bits to bytes
+	n = (n+7)>>3;
+	esprc4 = smalloc(sizeof(Esprc4));
+	memset(esprc4, 0, sizeof(Esprc4));
+	setupRC4state(&esprc4->current, k, n);
+	ecb->espalg = name;
+	ecb->espblklen = 4;
+	ecb->espivlen = 4;
+	ecb->cipher = rc4cipher;
+	ecb->espstate = esprc4;
+}
+	
+void
+espinit(Fs *fs)
+{
+	Proto *esp;
+
+	esp = smalloc(sizeof(Proto));
+	esp->priv = smalloc(sizeof(Esppriv));
+	esp->name = "esp";
+	esp->connect = espconnect;
+	esp->announce = nil;
+	esp->ctl = espctl;
+	esp->state = espstate;
+	esp->create = espcreate;
+	esp->close = espclose;
+	esp->rcv = espiput;
+	esp->advise = espadvise;
+	esp->stats = espstats;
+	esp->local = esplocal;
+	esp->remote = espremote;
+	esp->ipproto = IP_ESPPROTO;
+	esp->nc = Nchans;
+	esp->ptclsize = sizeof(Espcb);
+
+	Fsproto(fs, esp);
+}
--- /dev/null
+++ b/os/ip.original/ethermedium.c
@@ -1,0 +1,792 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+#include "kernel.h"
+
+typedef struct Etherhdr Etherhdr;
+struct Etherhdr
+{
+	uchar	d[6];
+	uchar	s[6];
+	uchar	t[2];
+};
+
+static uchar ipbroadcast[IPaddrlen] = {
+	0xff,0xff,0xff,0xff,  
+	0xff,0xff,0xff,0xff,  
+	0xff,0xff,0xff,0xff,  
+	0xff,0xff,0xff,0xff,
+};
+
+static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static void	etherread4(void *a);
+static void	etherread6(void *a);
+static void	etherbind(Ipifc *ifc, int argc, char **argv);
+static void	etherunbind(Ipifc *ifc);
+static void	etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void	etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void	etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static Block*	multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
+static void	sendarp(Ipifc *ifc, Arpent *a);
+static void	sendgarp(Ipifc *ifc, uchar*);
+static int	multicastea(uchar *ea, uchar *ip);
+static void	recvarpproc(void*);
+static void	resolveaddr6(Ipifc *ifc, Arpent *a);
+static void	etherpref2addr(uchar *pref, uchar *ea);
+
+Medium ethermedium =
+{
+.name=		"ether",
+.hsize=		14,
+.mintu=		60,
+.maxtu=		1514,
+.maclen=	6,
+.bind=		etherbind,
+.unbind=	etherunbind,
+.bwrite=	etherbwrite,
+.addmulti=	etheraddmulti,
+.remmulti=	etherremmulti,
+.ares=		arpenter,
+.areg=		sendgarp,
+.pref2addr=	etherpref2addr,
+};
+
+Medium gbemedium =
+{
+.name=		"gbe",
+.hsize=		14,
+.mintu=		60,
+.maxtu=		9014,
+.maclen=	6,
+.bind=		etherbind,
+.unbind=	etherunbind,
+.bwrite=	etherbwrite,
+.addmulti=	etheraddmulti,
+.remmulti=	etherremmulti,
+.ares=		arpenter,
+.areg=		sendgarp,
+.pref2addr=	etherpref2addr,
+};
+
+typedef struct	Etherrock Etherrock;
+struct Etherrock
+{
+	Fs	*f;		/* file system we belong to */
+	Proc	*arpp;		/* arp process */
+	Proc	*read4p;	/* reading process (v4)*/
+	Proc	*read6p;	/* reading process (v6)*/
+	Chan	*mchan4;	/* Data channel for v4 */
+	Chan	*achan;		/* Arp channel */
+	Chan	*cchan4;	/* Control channel for v4 */
+	Chan	*mchan6;	/* Data channel for v6 */
+	Chan	*cchan6;	/* Control channel for v6 */
+};
+
+/*
+ *  ethernet arp request
+ */
+enum
+{
+	ETARP		= 0x0806,
+	ETIP4		= 0x0800,
+	ETIP6		= 0x86DD,
+	ARPREQUEST	= 1,
+	ARPREPLY	= 2,
+};
+
+typedef struct Etherarp Etherarp;
+struct Etherarp
+{
+	uchar	d[6];
+	uchar	s[6];
+	uchar	type[2];
+	uchar	hrd[2];
+	uchar	pro[2];
+	uchar	hln;
+	uchar	pln;
+	uchar	op[2];
+	uchar	sha[6];
+	uchar	spa[4];
+	uchar	tha[6];
+	uchar	tpa[4];
+};
+
+static char *nbmsg = "nonblocking";
+
+/*
+ *  called to bind an IP ifc to an ethernet device
+ *  called with ifc wlock'd
+ */
+static void
+etherbind(Ipifc *ifc, int argc, char **argv)
+{
+	Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
+	char addr[Maxpath];	//char addr[2*KNAMELEN];
+	char dir[Maxpath];	//char dir[2*KNAMELEN];
+	char *buf;
+	int fd, cfd, n;
+	char *ptr;
+	Etherrock *er;
+
+	if(argc < 2)
+		error(Ebadarg);
+
+	mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
+	buf = nil;
+	if(waserror()){
+		if(mchan4 != nil)
+			cclose(mchan4);
+		if(cchan4 != nil)
+			cclose(cchan4);
+		if(achan != nil)
+			cclose(achan);
+		if(mchan6 != nil)
+			cclose(mchan6);
+		if(cchan6 != nil)
+			cclose(cchan6);
+		if(buf != nil)
+			free(buf);
+		nexterror(); 
+	}
+
+	/*
+	 *  open ip converstation
+	 *
+	 *  the dial will fail if the type is already open on
+	 *  this device.
+	 */
+	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
+	fd = kdial(addr, nil, dir, &cfd);
+	if(fd < 0)
+		errorf("dial 0x800 failed: %s", up->env->errstr);
+	mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
+	cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
+	kclose(fd);
+	kclose(cfd);
+
+	/*
+	 *  make it non-blocking
+	 */
+	devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+
+	/*
+	 *  get mac address and speed
+	 */
+	snprint(addr, sizeof(addr), "%s/stats", dir);
+	fd = kopen(addr, OREAD);
+	if(fd < 0)
+		errorf("can't open ether stats: %s", up->env->errstr);
+
+	buf = smalloc(512);
+	n = kread(fd, buf, 511);
+	kclose(fd);
+	if(n <= 0)
+		error(Eio);
+	buf[n] = 0;
+
+	ptr = strstr(buf, "addr: ");
+	if(!ptr)
+		error(Eio);
+	ptr += 6;
+	parsemac(ifc->mac, ptr, 6);
+
+	ptr = strstr(buf, "mbps: ");
+	if(ptr){
+		ptr += 6;
+		ifc->mbps = atoi(ptr);
+	} else
+		ifc->mbps = 100;
+
+	/*
+ 	 *  open arp conversation
+	 */
+	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
+	fd = kdial(addr, nil, nil, nil);
+	if(fd < 0)
+		errorf("dial 0x806 failed: %s", up->env->errstr);
+	achan = commonfdtochan(fd, ORDWR, 0, 1);
+	kclose(fd);
+
+	/*
+	 *  open ip conversation
+	 *
+	 *  the dial will fail if the type is already open on
+	 *  this device.
+	 */
+	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
+	fd = kdial(addr, nil, dir, &cfd);
+	if(fd < 0)
+		errorf("dial 0x86DD failed: %s", up->env->errstr);
+	mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
+	cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
+	kclose(fd);
+	kclose(cfd);
+
+	/*
+	 *  make it non-blocking
+	 */
+	devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+
+	er = smalloc(sizeof(*er));
+	er->mchan4 = mchan4;
+	er->cchan4 = cchan4;
+	er->achan = achan;
+	er->mchan6 = mchan6;
+	er->cchan6 = cchan6;
+	er->f = ifc->conv->p->f;
+	ifc->arg = er;
+
+	free(buf);
+	poperror();
+
+	kproc("etherread4", etherread4, ifc, 0);
+	kproc("recvarpproc", recvarpproc, ifc, 0);
+	kproc("etherread6", etherread6, ifc, 0);
+}
+
+/*
+ *  called with ifc wlock'd
+ */
+static void
+etherunbind(Ipifc *ifc)
+{
+	Etherrock *er = ifc->arg;
+
+	if(er->read4p)
+		postnote(er->read4p, 1, "unbind", 0);
+	if(er->read6p)
+		postnote(er->read6p, 1, "unbind", 0);
+	if(er->arpp)
+		postnote(er->arpp, 1, "unbind", 0);
+
+	/* wait for readers to die */
+	while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->mchan4 != nil)
+		cclose(er->mchan4);
+	if(er->achan != nil)
+		cclose(er->achan);
+	if(er->cchan4 != nil)
+		cclose(er->cchan4);
+	if(er->mchan6 != nil)
+		cclose(er->mchan6);
+	if(er->cchan6 != nil)
+		cclose(er->cchan6);
+
+	free(er);
+}
+
+/*
+ *  called by ipoput with a single block to write with ifc rlock'd
+ */
+static void
+etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	Etherhdr *eh;
+	Arpent *a;
+	uchar mac[6];
+	Etherrock *er = ifc->arg;
+
+	/* get mac address of destination */
+	a = arpget(er->f->arp, bp, version, ifc, ip, mac);
+	if(a){
+		/* check for broadcast or multicast */
+		bp = multicastarp(er->f, a, ifc->m, mac);
+		if(bp==nil){
+			switch(version){
+			case V4:
+				sendarp(ifc, a);
+				break;
+			case V6: 
+				resolveaddr6(ifc, a);
+				break;
+			default:
+				panic("etherbwrite: version %d", version);
+			}
+			return;
+		}
+	}
+
+	/* make it a single block with space for the ether header */
+	bp = padblock(bp, ifc->m->hsize);
+	if(bp->next)
+		bp = concatblock(bp);
+	if(BLEN(bp) < ifc->mintu)
+		bp = adjustblock(bp, ifc->mintu);
+	eh = (Etherhdr*)bp->rp;
+
+	/* copy in mac addresses and ether type */
+	memmove(eh->s, ifc->mac, sizeof(eh->s));
+	memmove(eh->d, mac, sizeof(eh->d));
+
+ 	switch(version){
+	case V4:
+		eh->t[0] = 0x08;
+		eh->t[1] = 0x00;
+		devtab[er->mchan4->type]->bwrite(er->mchan4, bp, 0);
+		break;
+	case V6:
+		eh->t[0] = 0x86;
+		eh->t[1] = 0xDD;
+		devtab[er->mchan6->type]->bwrite(er->mchan6, bp, 0);
+		break;
+	default:
+		panic("etherbwrite2: version %d", version);
+	}
+	ifc->out++;
+}
+
+
+/*
+ *  process to read from the ethernet
+ */
+static void
+etherread4(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	Etherrock *er;
+
+	ifc = a;
+	er = ifc->arg;
+	er->read4p = up;	/* hide identity under a rock for unbind */
+	if(waserror()){
+		er->read4p = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
+		if(!canrlock(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			runlock(ifc);
+			nexterror();
+		}
+		ifc->in++;
+		bp->rp += ifc->m->hsize;
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput4(er->f, ifc, bp);
+		runlock(ifc);
+		poperror();
+	}
+}
+
+
+/*
+ *  process to read from the ethernet, IPv6
+ */
+static void
+etherread6(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	Etherrock *er;
+
+	ifc = a;
+	er = ifc->arg;
+	er->read6p = up;	/* hide identity under a rock for unbind */
+	if(waserror()){
+		er->read6p = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
+		if(!canrlock(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			runlock(ifc);
+			nexterror();
+		}
+		ifc->in++;
+		bp->rp += ifc->m->hsize;
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput6(er->f, ifc, bp);
+		runlock(ifc);
+		poperror();
+	}
+}
+
+static void
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *)
+{
+	uchar mac[6];
+	char buf[64];
+	Etherrock *er = ifc->arg;
+	int version;
+
+	version = multicastea(mac, a);
+	sprint(buf, "addmulti %E", mac);
+	switch(version){
+	case V4:
+		devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+		break;
+	case V6:
+		devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+		break;
+	default:
+		panic("etheraddmulti: version %d", version);
+	}
+}
+
+static void
+etherremmulti(Ipifc *ifc, uchar *a, uchar *)
+{
+	uchar mac[6];
+	char buf[64];
+	Etherrock *er = ifc->arg;
+	int version;
+
+	version = multicastea(mac, a);
+	sprint(buf, "remmulti %E", mac);
+	switch(version){
+	case V4:
+		devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+		break;
+	case V6:
+		devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+		break;
+	default:
+		panic("etherremmulti: version %d", version);
+	}
+}
+
+/*
+ *  send an ethernet arp
+ *  (only v4, v6 uses the neighbor discovery, rfc1970)
+ */
+static void
+sendarp(Ipifc *ifc, Arpent *a)
+{
+	int n;
+	Block *bp;
+	Etherarp *e;
+	Etherrock *er = ifc->arg;
+
+	/* don't do anything if it's been less than a second since the last */
+	if(NOW - a->ctime < 1000){
+		arprelease(er->f->arp, a);
+		return;
+	}
+
+	/* remove all but the last message */
+	while((bp = a->hold) != nil){
+		if(bp == a->last)
+			break;
+		a->hold = bp->list;
+		freeblist(bp);
+	}
+
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+	arprelease(er->f->arp, a);
+
+	n = sizeof(Etherarp);
+	if(n < a->type->mintu)
+		n = a->type->mintu;
+	bp = allocb(n);
+	memset(bp->rp, 0, n);
+	e = (Etherarp*)bp->rp;
+	memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
+	ipv4local(ifc, e->spa);
+	memmove(e->sha, ifc->mac, sizeof(e->sha));
+	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
+	memmove(e->s, ifc->mac, sizeof(e->s));
+
+	hnputs(e->type, ETARP);
+	hnputs(e->hrd, 1);
+	hnputs(e->pro, ETIP4);
+	e->hln = sizeof(e->sha);
+	e->pln = sizeof(e->spa);
+	hnputs(e->op, ARPREQUEST);
+	bp->wp += n;
+
+	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+	if(n < 0)
+		print("arp: send: %r\n");
+}
+
+static void
+resolveaddr6(Ipifc *ifc, Arpent *a)
+{
+	int sflag;
+	Block *bp;
+	Etherrock *er = ifc->arg;
+	uchar ipsrc[IPaddrlen];
+
+	/* don't do anything if it's been less than a second since the last */
+	if(NOW - a->ctime < ReTransTimer){
+		arprelease(er->f->arp, a);
+		return;
+	}
+
+	/* remove all but the last message */
+	while((bp = a->hold) != nil){
+		if(bp == a->last)
+			break;
+		a->hold = bp->list;
+		freeblist(bp);
+	}
+
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+	a->rtime = NOW + ReTransTimer;
+	if(a->rxtsrem <= 0) {
+		arprelease(er->f->arp, a);
+		return;
+	}
+
+	a->rxtsrem--;
+	arprelease(er->f->arp, a);
+
+	if(sflag = ipv6anylocal(ifc, ipsrc)) 
+		icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+}
+
+/*
+ *  send a gratuitous arp to refresh arp caches
+ */
+static void
+sendgarp(Ipifc *ifc, uchar *ip)
+{
+	int n;
+	Block *bp;
+	Etherarp *e;
+	Etherrock *er = ifc->arg;
+
+	/* don't arp for our initial non address */
+	if(ipcmp(ip, IPnoaddr) == 0)
+		return;
+
+	n = sizeof(Etherarp);
+	if(n < ifc->m->mintu)
+		n = ifc->m->mintu;
+	bp = allocb(n);
+	memset(bp->rp, 0, n);
+	e = (Etherarp*)bp->rp;
+	memmove(e->tpa, ip+IPv4off, sizeof(e->tpa));
+	memmove(e->spa, ip+IPv4off, sizeof(e->spa));
+	memmove(e->sha, ifc->mac, sizeof(e->sha));
+	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
+	memmove(e->s, ifc->mac, sizeof(e->s));
+
+	hnputs(e->type, ETARP);
+	hnputs(e->hrd, 1);
+	hnputs(e->pro, ETIP4);
+	e->hln = sizeof(e->sha);
+	e->pln = sizeof(e->spa);
+	hnputs(e->op, ARPREQUEST);
+	bp->wp += n;
+
+	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+	if(n < 0)
+		print("garp: send: %r\n");
+}
+
+static void
+recvarp(Ipifc *ifc)
+{
+	int n;
+	Block *ebp, *rbp;
+	Etherarp *e, *r;
+	uchar ip[IPaddrlen];
+	static uchar eprinted[4];
+	Etherrock *er = ifc->arg;
+
+	ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
+	if(ebp == nil) {
+		print("arp: rcv: %r\n");
+		return;
+	}
+
+	e = (Etherarp*)ebp->rp;
+	switch(nhgets(e->op)) {
+	default:
+		break;
+
+	case ARPREPLY:
+		/* check for machine using my ip address */
+		v4tov6(ip, e->spa);
+		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+				print("arprep: 0x%E/0x%E also has ip addr %V\n",
+					e->s, e->sha, e->spa);
+				break;
+			}
+		}
+
+		/* make sure we're not entering broadcast addresses */
+		if(ipcmp(ip, ipbroadcast) == 0 ||
+			!memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
+			print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
+				e->s, e->sha, e->spa);
+			break;
+		}
+
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+		break;
+
+	case ARPREQUEST:
+		/* don't answer arps till we know who we are */
+		if(ifc->lifc == 0)
+			break;
+
+		/* check for machine using my ip or ether address */
+		v4tov6(ip, e->spa);
+		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+				if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+					/* print only once */
+					print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+					memmove(eprinted, e->spa, sizeof(e->spa));
+				}
+			}
+		} else {
+			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
+				print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+				break;
+			}
+		}
+
+		/* refresh what we know about sender */
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
+
+		/* answer only requests for our address or systems we're proxying for */
+		v4tov6(ip, e->tpa);
+		if(!iplocalonifc(ifc, ip))
+		if(!ipproxyifc(er->f, ifc, ip))
+			break;
+
+		n = sizeof(Etherarp);
+		if(n < ifc->mintu)
+			n = ifc->mintu;
+		rbp = allocb(n);
+		r = (Etherarp*)rbp->rp;
+		memset(r, 0, sizeof(Etherarp));
+		hnputs(r->type, ETARP);
+		hnputs(r->hrd, 1);
+		hnputs(r->pro, ETIP4);
+		r->hln = sizeof(r->sha);
+		r->pln = sizeof(r->spa);
+		hnputs(r->op, ARPREPLY);
+		memmove(r->tha, e->sha, sizeof(r->tha));
+		memmove(r->tpa, e->spa, sizeof(r->tpa));
+		memmove(r->sha, ifc->mac, sizeof(r->sha));
+		memmove(r->spa, e->tpa, sizeof(r->spa));
+		memmove(r->d, e->sha, sizeof(r->d));
+		memmove(r->s, ifc->mac, sizeof(r->s));
+		rbp->wp += n;
+
+		n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+		if(n < 0)
+			print("arp: write: %r\n");
+	}
+	freeb(ebp);
+}
+
+static void
+recvarpproc(void *v)
+{
+	Ipifc *ifc = v;
+	Etherrock *er = ifc->arg;
+
+	er->arpp = up;
+	if(waserror()){
+		er->arpp = 0;
+		pexit("hangup", 1);
+	}
+	for(;;)
+		recvarp(ifc);
+}
+
+static int
+multicastea(uchar *ea, uchar *ip)
+{
+	int x;
+
+	switch(x = ipismulticast(ip)){
+	case V4:
+		ea[0] = 0x01;
+		ea[1] = 0x00;
+		ea[2] = 0x5e;
+		ea[3] = ip[13] & 0x7f;
+		ea[4] = ip[14];
+		ea[5] = ip[15];
+		break;
+ 	case V6:
+ 		ea[0] = 0x33;
+ 		ea[1] = 0x33;
+ 		ea[2] = ip[12];
+		ea[3] = ip[13];
+ 		ea[4] = ip[14];
+ 		ea[5] = ip[15];
+ 		break;
+	}
+	return x;
+}
+
+/*
+ *  fill in an arp entry for broadcast or multicast
+ *  addresses.  Return the first queued packet for the
+ *  IP address.
+ */
+static Block*
+multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
+{
+	/* is it broadcast? */
+	switch(ipforme(f, a->ip)){
+	case Runi:
+		return nil;
+	case Rbcast:
+		memset(mac, 0xff, 6);
+		return arpresolve(f->arp, a, medium, mac);
+	default:
+		break;
+	}
+
+	/* if multicast, fill in mac */
+	switch(multicastea(mac, a->ip)){
+	case V4:
+	case V6:
+		return arpresolve(f->arp, a, medium, mac);
+	}
+
+	/* let arp take care of it */
+	return nil;
+}
+
+void
+ethermediumlink(void)
+{
+	addipmedium(&ethermedium);
+	addipmedium(&gbemedium);
+}
+
+
+static void 
+etherpref2addr(uchar *pref, uchar *ea)
+{
+	pref[8]  = ea[0] | 0x2;
+	pref[9]  = ea[1];
+	pref[10] = ea[2];
+	pref[11] = 0xFF;
+	pref[12] = 0xFE;
+	pref[13] = ea[3];
+	pref[14] = ea[4];
+	pref[15] = ea[5];
+}
--- /dev/null
+++ b/os/ip.original/gre.c
@@ -1,0 +1,282 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+#define DPRINT if(0)print
+
+enum
+{
+	GRE_IPONLY	= 12,		/* size of ip header */
+	GRE_IPPLUSGRE	= 12,		/* minimum size of GRE header */
+	IP_GREPROTO	= 47,
+
+	GRErxms		= 200,
+	GREtickms	= 100,
+	GREmaxxmit	= 10,
+};
+
+typedef struct GREhdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	len[2];		/* packet length (including headers) */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;	
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* checksum */
+	uchar	src[4];		/* Ip source */
+	uchar	dst[4];		/* Ip destination */
+
+	/* gre header */
+	uchar	flags[2];
+	uchar	eproto[2];	/* encapsulation protocol */
+} GREhdr;
+
+typedef struct GREpriv GREpriv;
+struct GREpriv
+{
+	int		raw;			/* Raw GRE mode */
+
+	/* non-MIB stats */
+	ulong		csumerr;		/* checksum errors */
+	ulong		lenerr;			/* short packet */
+};
+
+static void grekick(void *x, Block *bp);
+
+static char*
+greconnect(Conv *c, char **argv, int argc)
+{
+	Proto *p;
+	char *err;
+	Conv *tc, **cp, **ecp;
+
+	err = Fsstdconnect(c, argv, argc);
+	if(err != nil)
+		return err;
+
+	/* make sure noone's already connected to this other sys */
+	p = c->p;
+	qlock(p);
+	ecp = &p->conv[p->nc];
+	for(cp = p->conv; cp < ecp; cp++){
+		tc = *cp;
+		if(tc == nil)
+			break;
+		if(tc == c)
+			continue;
+		if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){
+			err = "already connected to that addr/proto";
+			ipmove(c->laddr, IPnoaddr);
+			ipmove(c->raddr, IPnoaddr);
+			break;
+		}
+	}
+	qunlock(p);
+
+	if(err != nil)
+		return err;
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+static void
+grecreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->wq = qbypass(grekick, c);
+}
+
+static int
+grestate(Conv *c, char *state, int n)
+{
+	USED(c);
+	return snprint(state, n, "%s", "Datagram");
+}
+
+static char*
+greannounce(Conv*, char**, int)
+{
+	return "pktifc does not support announce";
+}
+
+static void
+greclose(Conv *c)
+{
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+	c->rport = 0;
+}
+
+int drop;
+
+static void
+grekick(void *x, Block *bp)
+{
+	Conv *c = x;
+	GREhdr *ghp;
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
+
+	if(bp == nil)
+		return;
+
+	/* Make space to fit ip header (gre header already there) */
+	bp = padblock(bp, GRE_IPONLY);
+	if(bp == nil)
+		return;
+
+	/* make sure the message has a GRE header */
+	bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
+	if(bp == nil)
+		return;
+
+	ghp = (GREhdr *)(bp->rp);
+	ghp->vihl = IP_VER4;
+
+	if(!((GREpriv*)c->p->priv)->raw){
+		v4tov6(raddr, ghp->dst);
+		if(ipcmp(raddr, v4prefix) == 0)
+			memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+		v4tov6(laddr, ghp->src);
+		if(ipcmp(laddr, v4prefix) == 0){
+			if(ipcmp(c->laddr, IPnoaddr) == 0)
+				findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+			memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+		}
+		hnputs(ghp->eproto, c->rport);
+	}
+
+	ghp->proto = IP_GREPROTO;
+	ghp->frag[0] = 0;
+	ghp->frag[1] = 0;
+
+	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+static void
+greiput(Proto *gre, Ipifc*, Block *bp)
+{
+	int len;
+	GREhdr *ghp;
+	Conv *c, **p;
+	ushort eproto;
+	uchar raddr[IPaddrlen];
+	GREpriv *gpriv;
+
+	gpriv = gre->priv;
+	ghp = (GREhdr*)(bp->rp);
+
+	v4tov6(raddr, ghp->src);
+	eproto = nhgets(ghp->eproto);
+	qlock(gre);
+
+	/* Look for a conversation structure for this port and address */
+	c = nil;
+	for(p = gre->conv; *p; p++) {
+		c = *p;
+		if(c->inuse == 0)
+			continue;
+		if(c->rport == eproto && 
+			(gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+			break;
+	}
+
+	if(*p == nil) {
+		qunlock(gre);
+		freeblist(bp);
+		return;
+	}
+
+	qunlock(gre);
+
+	/*
+	 * Trim the packet down to data size
+	 */
+	len = nhgets(ghp->len) - GRE_IPONLY;
+	if(len < GRE_IPPLUSGRE){
+		freeblist(bp);
+		return;
+	}
+	bp = trimblock(bp, GRE_IPONLY, len);
+	if(bp == nil){
+		gpriv->lenerr++;
+		return;
+	}
+
+	/*
+	 *  Can't delimit packet so pull it all into one block.
+	 */
+	if(qlen(c->rq) > 64*1024)
+		freeblist(bp);
+	else{
+		bp = concatblock(bp);
+		if(bp == 0)
+			panic("greiput");
+		qpass(c->rq, bp);
+	}
+}
+
+int
+grestats(Proto *gre, char *buf, int len)
+{
+	GREpriv *gpriv;
+
+	gpriv = gre->priv;
+
+	return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+}
+
+char*
+grectl(Conv *c, char **f, int n)
+{
+	GREpriv *gpriv;
+
+	gpriv = c->p->priv;
+	if(n == 1){
+		if(strcmp(f[0], "raw") == 0){
+			gpriv->raw = 1;
+			return nil;
+		}
+		else if(strcmp(f[0], "cooked") == 0){
+			gpriv->raw = 0;
+			return nil;
+		}
+	}
+	return "unknown control request";
+}
+
+void
+greinit(Fs *fs)
+{
+	Proto *gre;
+
+	gre = smalloc(sizeof(Proto));
+	gre->priv = smalloc(sizeof(GREpriv));
+	gre->name = "gre";
+	gre->connect = greconnect;
+	gre->announce = greannounce;
+	gre->state = grestate;
+	gre->create = grecreate;
+	gre->close = greclose;
+	gre->rcv = greiput;
+	gre->ctl = grectl;
+	gre->advise = nil;
+	gre->stats = grestats;
+	gre->ipproto = IP_GREPROTO;
+	gre->nc = 64;
+	gre->ptclsize = 0;
+
+	Fsproto(fs, gre);
+}
--- /dev/null
+++ b/os/ip.original/icmp.c
@@ -1,0 +1,496 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+typedef struct Icmp {
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	ttl;		/* Time to live */
+	uchar	proto;		/* Protocol */
+	uchar	ipcksum[2];	/* Header checksum */
+	uchar	src[4];		/* Ip source */
+	uchar	dst[4];		/* Ip destination */
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+	uchar	data[1];
+} Icmp;
+
+enum {			/* Packet Types */
+	EchoReply	= 0,
+	Unreachable	= 3,
+	SrcQuench	= 4,
+	Redirect	= 5,
+	EchoRequest	= 8,
+	TimeExceed	= 11,
+	InParmProblem	= 12,
+	Timestamp	= 13,
+	TimestampReply	= 14,
+	InfoRequest	= 15,
+	InfoReply	= 16,
+	AddrMaskRequest = 17,
+	AddrMaskReply   = 18,
+
+	Maxtype		= 18,
+};
+
+enum
+{
+	MinAdvise	= 24,	/* minimum needed for us to advise another protocol */ 
+};
+
+char *icmpnames[Maxtype+1] =
+{
+[EchoReply]		"EchoReply",
+[Unreachable]		"Unreachable",
+[SrcQuench]		"SrcQuench",
+[Redirect]		"Redirect",
+[EchoRequest]		"EchoRequest",
+[TimeExceed]		"TimeExceed",
+[InParmProblem]		"InParmProblem",
+[Timestamp]		"Timestamp",
+[TimestampReply]	"TimestampReply",
+[InfoRequest]		"InfoRequest",
+[InfoReply]		"InfoReply",
+[AddrMaskRequest]	"AddrMaskRequest",
+[AddrMaskReply  ]	"AddrMaskReply  ",
+};
+
+enum {
+	IP_ICMPPROTO	= 1,
+	ICMP_IPSIZE	= 20,
+	ICMP_HDRSIZE	= 8,
+};
+
+enum
+{
+	InMsgs,
+	InErrors,
+	OutMsgs,
+	CsumErrs,
+	LenErrs,
+	HlenErrs,
+
+	Nstats,
+};
+
+static char *statnames[Nstats] =
+{
+[InMsgs]	"InMsgs",
+[InErrors]	"InErrors",
+[OutMsgs]	"OutMsgs",
+[CsumErrs]	"CsumErrs",
+[LenErrs]	"LenErrs",
+[HlenErrs]	"HlenErrs",
+};
+
+typedef struct Icmppriv Icmppriv;
+struct Icmppriv
+{
+	ulong	stats[Nstats];
+
+	/* message counts */
+	ulong	in[Maxtype+1];
+	ulong	out[Maxtype+1];
+};
+
+static void icmpkick(void *x, Block*);
+
+static void
+icmpcreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->wq = qbypass(icmpkick, c);
+}
+
+extern char*
+icmpconnect(Conv *c, char **argv, int argc)
+{
+	char *e;
+
+	e = Fsstdconnect(c, argv, argc);
+	if(e != nil)
+		return e;
+	Fsconnected(c, e);
+
+	return nil;
+}
+
+extern int
+icmpstate(Conv *c, char *state, int n)
+{
+	USED(c);
+	return snprint(state, n, "%s qin %d qout %d",
+		"Datagram",
+		c->rq ? qlen(c->rq) : 0,
+		c->wq ? qlen(c->wq) : 0
+	);
+}
+
+extern char*
+icmpannounce(Conv *c, char **argv, int argc)
+{
+	char *e;
+
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+extern void
+icmpclose(Conv *c)
+{
+	qclose(c->rq);
+	qclose(c->wq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+}
+
+static void
+icmpkick(void *x, Block *bp)
+{
+	Conv *c = x;
+	Icmp *p;
+	Icmppriv *ipriv;
+
+	if(bp == nil)
+		return;
+
+	if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+		freeblist(bp);
+		return;
+	}
+	p = (Icmp *)(bp->rp);
+	p->vihl = IP_VER4;
+	ipriv = c->p->priv;
+	if(p->type <= Maxtype)	
+		ipriv->out[p->type]++;
+	
+	v6tov4(p->dst, c->raddr);
+	v6tov4(p->src, c->laddr);
+	p->proto = IP_ICMPPROTO;
+	hnputs(p->icmpid, c->lport);
+	memset(p->cksum, 0, sizeof(p->cksum));
+	hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+	ipriv->stats[OutMsgs]++;
+	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+extern void
+icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+{
+	Block	*nbp;
+	Icmp	*p, *np;
+
+	p = (Icmp *)bp->rp;
+
+	netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+	np = (Icmp *)nbp->rp;
+	np->vihl = IP_VER4;
+	memmove(np->dst, p->src, sizeof(np->dst));
+	v6tov4(np->src, ia);
+	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+	np->type = TimeExceed;
+	np->code = 0;
+	np->proto = IP_ICMPPROTO;
+	hnputs(np->icmpid, 0);
+	hnputs(np->seq, 0);
+	memset(np->cksum, 0, sizeof(np->cksum));
+	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+
+}
+
+static void
+icmpunreachable(Fs *f, Block *bp, int code, int seq)
+{
+	Block	*nbp;
+	Icmp	*p, *np;
+	int	i;
+	uchar	addr[IPaddrlen];
+
+	p = (Icmp *)bp->rp;
+
+	/* only do this for unicast sources and destinations */
+	v4tov6(addr, p->dst);
+	i = ipforme(f, addr);
+	if((i&Runi) == 0)
+		return;
+	v4tov6(addr, p->src);
+	i = ipforme(f, addr);
+	if(i != 0 && (i&Runi) == 0)
+		return;
+
+	netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
+	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+	np = (Icmp *)nbp->rp;
+	np->vihl = IP_VER4;
+	memmove(np->dst, p->src, sizeof(np->dst));
+	memmove(np->src, p->dst, sizeof(np->src));
+	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+	np->type = Unreachable;
+	np->code = code;
+	np->proto = IP_ICMPPROTO;
+	hnputs(np->icmpid, 0);
+	hnputs(np->seq, seq);
+	memset(np->cksum, 0, sizeof(np->cksum));
+	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmpnoconv(Fs *f, Block *bp)
+{
+	icmpunreachable(f, bp, 3, 0);
+}
+
+extern void
+icmpcantfrag(Fs *f, Block *bp, int mtu)
+{
+	icmpunreachable(f, bp, 4, mtu);
+}
+
+static void
+goticmpkt(Proto *icmp, Block *bp)
+{
+	Conv	**c, *s;
+	Icmp	*p;
+	uchar	dst[IPaddrlen];
+	ushort	recid;
+
+	p = (Icmp *) bp->rp;
+	v4tov6(dst, p->src);
+	recid = nhgets(p->icmpid);
+
+	for(c = icmp->conv; *c; c++) {
+		s = *c;
+		if(s->lport == recid)
+		if(ipcmp(s->raddr, dst) == 0){
+			bp = concatblock(bp);
+			if(bp != nil)
+				qpass(s->rq, bp);
+			return;
+		}
+	}
+	freeblist(bp);
+}
+
+static Block *
+mkechoreply(Block *bp)
+{
+	Icmp	*q;
+	uchar	ip[4];
+
+	q = (Icmp *)bp->rp;
+	q->vihl = IP_VER4;
+	memmove(ip, q->src, sizeof(q->dst));
+	memmove(q->src, q->dst, sizeof(q->src));
+	memmove(q->dst, ip,  sizeof(q->dst));
+	q->type = EchoReply;
+	memset(q->cksum, 0, sizeof(q->cksum));
+	hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+
+	return bp;
+}
+
+static char *unreachcode[] =
+{
+[0]	"net unreachable",
+[1]	"host unreachable",
+[2]	"protocol unreachable",
+[3]	"port unreachable",
+[4]	"fragmentation needed and DF set",
+[5]	"source route failed",
+};
+
+static void
+icmpiput(Proto *icmp, Ipifc*, Block *bp)
+{
+	int	n, iplen;
+	Icmp	*p;
+	Block	*r;
+	Proto	*pr;
+	char	*msg;
+	char	m2[128];
+	Icmppriv *ipriv;
+
+	ipriv = icmp->priv;
+	
+	ipriv->stats[InMsgs]++;
+
+	p = (Icmp *)bp->rp;
+	netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+	n = blocklen(bp);
+	if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
+		ipriv->stats[InErrors]++;
+		ipriv->stats[HlenErrs]++;
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		goto raise;
+	}
+	iplen = nhgets(p->length);
+	if(iplen > n || (iplen % 1)){
+		ipriv->stats[LenErrs]++;
+		ipriv->stats[InErrors]++;
+		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+		goto raise;
+	}
+	if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
+		ipriv->stats[InErrors]++;
+		ipriv->stats[CsumErrs]++;
+		netlog(icmp->f, Logicmp, "icmp checksum error\n");
+		goto raise;
+	}
+	if(p->type <= Maxtype)
+		ipriv->in[p->type]++;
+
+	switch(p->type) {
+	case EchoRequest:
+		if (iplen < n)
+			bp = trimblock(bp, 0, iplen);
+		r = mkechoreply(bp);
+		ipriv->out[EchoReply]++;
+		ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	case Unreachable:
+		if(p->code > 5)
+			msg = unreachcode[1];
+		else
+			msg = unreachcode[p->code];
+
+		bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+		if(blocklen(bp) < MinAdvise){
+			ipriv->stats[LenErrs]++;
+			goto raise;
+		}
+		p = (Icmp *)bp->rp;
+		pr = Fsrcvpcolx(icmp->f, p->proto);
+		if(pr != nil && pr->advise != nil) {
+			(*pr->advise)(pr, bp, msg);
+			return;
+		}
+
+		bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+		goticmpkt(icmp, bp);
+		break;
+	case TimeExceed:
+		if(p->code == 0){
+			sprint(m2, "ttl exceeded at %V", p->src);
+
+			bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+			if(blocklen(bp) < MinAdvise){
+				ipriv->stats[LenErrs]++;
+				goto raise;
+			}
+			p = (Icmp *)bp->rp;
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr != nil && pr->advise != nil) {
+				(*pr->advise)(pr, bp, m2);
+				return;
+			}
+			bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+		}
+
+		goticmpkt(icmp, bp);
+		break;
+	default:
+		goticmpkt(icmp, bp);
+		break;
+	}
+	return;
+
+raise:
+	freeblist(bp);
+}
+
+void
+icmpadvise(Proto *icmp, Block *bp, char *msg)
+{
+	Conv	**c, *s;
+	Icmp	*p;
+	uchar	dst[IPaddrlen];
+	ushort	recid;
+
+	p = (Icmp *) bp->rp;
+	v4tov6(dst, p->dst);
+	recid = nhgets(p->icmpid);
+
+	for(c = icmp->conv; *c; c++) {
+		s = *c;
+		if(s->lport == recid)
+		if(ipcmp(s->raddr, dst) == 0){
+			qhangup(s->rq, msg);
+			qhangup(s->wq, msg);
+			break;
+		}
+	}
+	freeblist(bp);
+}
+
+int
+icmpstats(Proto *icmp, char *buf, int len)
+{
+	Icmppriv *priv;
+	char *p, *e;
+	int i;
+
+	priv = icmp->priv;
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+	for(i = 0; i <= Maxtype; i++){
+		if(icmpnames[i])
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
+		else
+			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+	}
+	return p - buf;
+}
+
+int
+icmpgc(Proto *icmp)
+{
+	return natgc(icmp->ipproto);
+}
+
+void
+icmpinit(Fs *fs)
+{
+	Proto *icmp;
+
+	icmp = smalloc(sizeof(Proto));
+	icmp->priv = smalloc(sizeof(Icmppriv));
+	icmp->name = "icmp";
+	icmp->connect = icmpconnect;
+	icmp->announce = icmpannounce;
+	icmp->state = icmpstate;
+	icmp->create = icmpcreate;
+	icmp->close = icmpclose;
+	icmp->rcv = icmpiput;
+	icmp->stats = icmpstats;
+	icmp->ctl = nil;
+	icmp->advise = icmpadvise;
+	icmp->gc = icmpgc;
+	icmp->ipproto = IP_ICMPPROTO;
+	icmp->nc = 128;
+	icmp->ptclsize = 0;
+
+	Fsproto(fs, icmp);
+}
--- /dev/null
+++ b/os/ip.original/icmp6.c
@@ -1,0 +1,917 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct ICMPpkt ICMPpkt;
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+struct ICMPpkt {
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+};
+
+struct IPICMP {
+	Ip6hdr;
+	ICMPpkt;
+};
+
+struct NdiscC
+{
+	IPICMP;
+	uchar target[IPaddrlen];
+};
+
+struct Ndpkt
+{
+	NdiscC;
+	uchar otype;
+	uchar olen;	// length in units of 8 octets(incl type, code),
+				// 1 for IEEE 802 addresses
+	uchar lnaddr[6];	// link-layer address
+};
+
+enum {	
+	// ICMPv6 types
+	EchoReply	= 0,
+	UnreachableV6	= 1,
+	PacketTooBigV6	= 2,
+	TimeExceedV6	= 3,
+	SrcQuench	= 4,
+	ParamProblemV6	= 4,
+	Redirect	= 5,
+	EchoRequest	= 8,
+	TimeExceed	= 11,
+	InParmProblem	= 12,
+	Timestamp	= 13,
+	TimestampReply	= 14,
+	InfoRequest	= 15,
+	InfoReply	= 16,
+	AddrMaskRequest = 17,
+	AddrMaskReply   = 18,
+	EchoRequestV6	= 128,
+	EchoReplyV6	= 129,
+	RouterSolicit	= 133,
+	RouterAdvert	= 134,
+	NbrSolicit	= 135,
+	NbrAdvert	= 136,
+	RedirectV6	= 137,
+
+	Maxtype6	= 137,
+};
+
+char *icmpnames6[Maxtype6+1] =
+{
+[EchoReply]		"EchoReply",
+[UnreachableV6]		"UnreachableV6",
+[PacketTooBigV6]	"PacketTooBigV6",
+[TimeExceedV6]		"TimeExceedV6",
+[SrcQuench]		"SrcQuench",
+[Redirect]		"Redirect",
+[EchoRequest]		"EchoRequest",
+[TimeExceed]		"TimeExceed",
+[InParmProblem]		"InParmProblem",
+[Timestamp]		"Timestamp",
+[TimestampReply]	"TimestampReply",
+[InfoRequest]		"InfoRequest",
+[InfoReply]		"InfoReply",
+[AddrMaskRequest]	"AddrMaskRequest",
+[AddrMaskReply]		"AddrMaskReply",
+[EchoRequestV6]		"EchoRequestV6",
+[EchoReplyV6]		"EchoReplyV6",
+[RouterSolicit]		"RouterSolicit",
+[RouterAdvert]		"RouterAdvert",
+[NbrSolicit]		"NbrSolicit",
+[NbrAdvert]		"NbrAdvert",
+[RedirectV6]		"RedirectV6",
+};
+
+enum
+{
+	InMsgs6,
+	InErrors6,
+	OutMsgs6,
+	CsumErrs6,
+	LenErrs6,
+	HlenErrs6,
+	HoplimErrs6,
+	IcmpCodeErrs6,
+	TargetErrs6,
+	OptlenErrs6,
+	AddrmxpErrs6,
+	RouterAddrErrs6,
+
+	Nstats6,
+};
+
+static char *statnames6[Nstats6] =
+{
+[InMsgs6]	"InMsgs",
+[InErrors6]	"InErrors",
+[OutMsgs6]	"OutMsgs",
+[CsumErrs6]	"CsumErrs",
+[LenErrs6]	"LenErrs",
+[HlenErrs6]	"HlenErrs",
+[HoplimErrs6]	"HoplimErrs",
+[IcmpCodeErrs6]	"IcmpCodeErrs",
+[TargetErrs6]	"TargetErrs",
+[OptlenErrs6]	"OptlenErrs",
+[AddrmxpErrs6]	"AddrmxpErrs",
+[RouterAddrErrs6]	"RouterAddrErrs",
+};
+
+typedef struct Icmppriv6
+{
+	ulong	stats[Nstats6];
+
+	/* message counts */
+	ulong	in[Maxtype6+1];
+	ulong	out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6 
+{
+	QLock;
+	uchar headers;
+} Icmpcb6;
+
+static char *unreachcode[] =
+{
+[icmp6_no_route]	"no route to destination",
+[icmp6_ad_prohib]	"comm with destination administratively prohibited",
+[icmp6_unassigned]	"icmp unreachable: unassigned error code (2)",
+[icmp6_adr_unreach]	"address unreachable",
+[icmp6_port_unreach]	"port unreachable",
+[icmp6_unkn_code]	"icmp unreachable: unknown code",
+};
+
+enum {
+	ICMP_USEAD6	= 40,
+};
+
+enum {
+	Oflag	= 1<<5,
+	Sflag	= 1<<6,
+	Rflag	= 1<<7,
+};
+
+enum {
+	slladd	= 1,
+	tlladd	= 2,
+	prfinfo	= 3,
+	redhdr	= 4,
+	mtuopt	= 5,
+};
+
+static void icmpkick6(void *x, Block *bp);
+
+static void
+icmpcreate6(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->wq = qbypass(icmpkick6, c);
+}
+
+static void
+set_cksum(Block *bp)
+{
+	IPICMP *p = (IPICMP *)(bp->rp);
+
+	hnputl(p->vcf, 0);  // borrow IP header as pseudoheader
+	hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+	p->proto = 0;
+	p->ttl = ICMPv6;	// ttl gets set later
+	hnputs(p->cksum, 0);
+	hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+	p->proto = ICMPv6;
+}
+
+static Block *
+newIPICMP(int packetlen)
+{
+	Block	*nbp;
+	nbp = allocb(packetlen);
+	nbp->wp += packetlen;
+	memset(nbp->rp, 0, packetlen);
+	return nbp;
+}
+
+void
+icmpadvise6(Proto *icmp, Block *bp, char *msg)
+{
+	Conv	**c, *s;
+	IPICMP	*p;
+	ushort	recid;
+
+	p = (IPICMP *) bp->rp;
+	recid = nhgets(p->icmpid);
+
+	for(c = icmp->conv; *c; c++) {
+		s = *c;
+		if(s->lport == recid)
+		if(ipcmp(s->raddr, p->dst) == 0){
+			qhangup(s->rq, msg);
+			qhangup(s->wq, msg);
+			break;
+		}
+	}
+	freeblist(bp);
+}
+
+static void
+icmpkick6(void *x, Block *bp)
+{
+	Conv *c = x;
+	IPICMP *p;
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
+	Icmppriv6 *ipriv = c->p->priv;
+	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+
+	if(bp == nil)
+		return;
+
+	if(icb->headers==6) {
+		/* get user specified addresses */
+		bp = pullupblock(bp, ICMP_USEAD6);
+		if(bp == nil)
+			return;
+		bp->rp += 8;
+		ipmove(laddr, bp->rp);
+		bp->rp += IPaddrlen;
+		ipmove(raddr, bp->rp);
+		bp->rp += IPaddrlen;
+		bp = padblock(bp, sizeof(Ip6hdr));
+	}
+
+	if(blocklen(bp) < sizeof(IPICMP)){
+		freeblist(bp);
+		return;
+	}
+	p = (IPICMP *)(bp->rp);
+	if(icb->headers == 6) {
+		ipmove(p->dst, raddr);
+		ipmove(p->src, laddr);
+	} else {
+		ipmove(p->dst, c->raddr);
+		ipmove(p->src, c->laddr);
+		hnputs(p->icmpid, c->lport);
+	}
+
+	set_cksum(bp);
+	p->vcf[0] = 0x06 << 4;
+	if(p->type <= Maxtype6)	
+		ipriv->out[p->type]++;
+	ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+char*
+icmpctl6(Conv *c, char **argv, int argc)
+{
+	Icmpcb6 *icb;
+
+	icb = (Icmpcb6*) c->ptcl;
+
+	if(argc==1) {
+		if(strcmp(argv[0], "headers")==0) {
+			icb->headers = 6;
+			return nil;
+		}
+	}
+	return "unknown control request";
+}
+
+static void
+goticmpkt6(Proto *icmp, Block *bp, int muxkey)
+{
+	Conv	**c, *s;
+	IPICMP	*p = (IPICMP *)bp->rp;
+	ushort	recid; 
+	uchar 	*addr;
+
+	if(muxkey == 0) {
+		recid = nhgets(p->icmpid);
+		addr = p->src;
+	}
+	else {
+		recid = muxkey;
+		addr = p->dst;
+	}
+
+	for(c = icmp->conv; *c; c++){
+		s = *c;
+		if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
+			bp = concatblock(bp);
+			if(bp != nil)
+				qpass(s->rq, bp);
+			return;
+		}
+	}
+
+	freeblist(bp);
+}
+
+static Block *
+mkechoreply6(Block *bp)
+{
+	IPICMP *p = (IPICMP *)(bp->rp);
+	uchar	addr[IPaddrlen];
+
+	ipmove(addr, p->src);
+	ipmove(p->src, p->dst);
+	ipmove(p->dst, addr);
+	p->type = EchoReplyV6;
+	set_cksum(bp);
+	return bp;
+}
+
+/*
+ * sends out an ICMPv6 neighbor solicitation
+ * 	suni == SRC_UNSPEC or SRC_UNI, 
+ *	tuni == TARG_MULTI => multicast for address resolution,
+ * 	and tuni == TARG_UNI => neighbor reachability.
+ */
+
+extern void
+icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
+{
+	Block	*nbp;
+	Ndpkt *np;
+	Proto *icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+
+	nbp = newIPICMP(sizeof(Ndpkt));
+	np = (Ndpkt*) nbp->rp;
+
+
+	if(suni == SRC_UNSPEC) 
+		memmove(np->src, v6Unspecified, IPaddrlen);
+	else 
+		memmove(np->src, src, IPaddrlen);
+
+	if(tuni == TARG_UNI)
+		memmove(np->dst, targ, IPaddrlen);
+	else
+		ipv62smcast(np->dst, targ);
+
+	np->type = NbrSolicit;
+	np->code = 0;
+	memmove(np->target, targ, IPaddrlen);
+	if(suni != SRC_UNSPEC) {
+		np->otype = SRC_LLADDRESS;
+		np->olen = 1;	/* 1+1+6 = 8 = 1 8-octet */
+		memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+	}
+	else {
+		int r = sizeof(Ndpkt)-sizeof(NdiscC);
+		nbp->wp -= r;
+	}
+
+	set_cksum(nbp);
+	np = (Ndpkt*) nbp->rp;
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[NbrSolicit]++;
+	netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ);
+	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
+ */
+extern void
+icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
+{
+	Block	*nbp;
+	Ndpkt *np;
+	Proto *icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	nbp = newIPICMP(sizeof(Ndpkt));
+	np = (Ndpkt*) nbp->rp;
+
+	memmove(np->src, src, IPaddrlen);
+	memmove(np->dst, dst, IPaddrlen);
+
+	np->type = NbrAdvert;
+	np->code = 0;
+	np->icmpid[0] = flags;
+	memmove(np->target, targ, IPaddrlen);
+
+	np->otype = TARGET_LLADDRESS;
+	np->olen = 1;	
+	memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+
+	set_cksum(nbp);
+	np = (Ndpkt*) nbp->rp;
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[NbrAdvert]++;
+	netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+{
+	Block *nbp;
+	IPICMP *np;
+	Ip6hdr	*p;
+	int osz = BLEN(bp);
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+	Proto	*icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	p = (Ip6hdr *) bp->rp;
+
+	if(isv6mcast(p->src)) 
+		goto clean;
+
+	nbp = newIPICMP(sz);
+	np = (IPICMP *) nbp->rp;
+
+	rlock(ifc);
+	if(ipv6anylocal(ifc, np->src)) {
+		netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
+	}
+	else {
+		netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
+		freeblist(nbp);
+		if(free) 
+			goto clean;
+		else
+			return;
+	}
+
+	memmove(np->dst, p->src, IPaddrlen);
+	np->type = UnreachableV6;
+	np->code = code;
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	set_cksum(nbp);
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[UnreachableV6]++;
+
+	if(free)
+		ipiput6(f, ifc, nbp);
+	else {
+		ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+		return;
+	}
+
+clean:
+	runlock(ifc);
+	freeblist(bp);
+}
+
+extern void
+icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
+{
+	Block *nbp;
+	IPICMP *np;
+	Ip6hdr	*p;
+	int osz = BLEN(bp);
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+	Proto	*icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	p = (Ip6hdr *) bp->rp;
+
+	if(isv6mcast(p->src)) 
+		return;
+
+	nbp = newIPICMP(sz);
+	np = (IPICMP *) nbp->rp;
+
+	if(ipv6anylocal(ifc, np->src)) {
+		netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
+	}
+	else {
+		netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
+		return;
+	}
+
+	memmove(np->dst, p->src, IPaddrlen);
+	np->type = TimeExceedV6;
+	np->code = 0;
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	set_cksum(nbp);
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[TimeExceedV6]++;
+	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
+{
+	Block *nbp;
+	IPICMP *np;
+	Ip6hdr	*p;
+	int osz = BLEN(bp);
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+	Proto	*icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	p = (Ip6hdr *) bp->rp;
+
+	if(isv6mcast(p->src)) 
+		return;
+
+	nbp = newIPICMP(sz);
+	np = (IPICMP *) nbp->rp;
+
+	if(ipv6anylocal(ifc, np->src)) {
+		netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
+	}
+	else {
+		netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
+		return;
+	}
+
+	memmove(np->dst, p->src, IPaddrlen);
+	np->type = PacketTooBigV6;
+	np->code = 0;
+	hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	set_cksum(nbp);
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[PacketTooBigV6]++;
+	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * RFC 2461, pages 39-40, pages 57-58.
+ */
+static int
+valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
+	int 	sz, osz, unsp, n, ttl, iplen;
+	int 	pktsz = BLEN(bp);
+	uchar	*packet = bp->rp;
+	IPICMP	*p = (IPICMP *) packet;
+	Ndpkt	*np;
+
+	USED(ifc);
+	n = blocklen(bp);
+	if(n < sizeof(IPICMP)) {
+		ipriv->stats[HlenErrs6]++;
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		goto err;
+	}
+
+	iplen = nhgets(p->ploadlen);
+	if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
+		ipriv->stats[LenErrs6]++;
+		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+		goto err;
+	}
+
+	// Rather than construct explicit pseudoheader, overwrite IPv6 header
+	if(p->proto != ICMPv6) {
+		// This code assumes no extension headers!!!
+		netlog(icmp->f, Logicmp, "icmp error: extension header\n");
+		goto err;
+	}
+	memset(packet, 0, 4);
+	ttl = p->ttl;
+	p->ttl = p->proto;
+	p->proto = 0;
+	if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+		ipriv->stats[CsumErrs6]++;
+		netlog(icmp->f, Logicmp, "icmp checksum error\n");
+		goto err;
+	}
+	p->proto = p->ttl;
+	p->ttl = ttl;
+
+	/* additional tests for some pkt types */
+	if( (p->type == NbrSolicit) ||
+		(p->type == NbrAdvert) ||
+		(p->type == RouterAdvert) ||
+		(p->type == RouterSolicit) ||
+		(p->type == RedirectV6) ) {
+
+		if(p->ttl != HOP_LIMIT) {
+			ipriv->stats[HoplimErrs6]++; 
+			goto err; 
+		}
+		if(p->code != 0) {
+			ipriv->stats[IcmpCodeErrs6]++; 
+			goto err; 
+		}
+
+		switch (p->type) {
+		case NbrSolicit:
+		case NbrAdvert:
+			np = (Ndpkt*) p;
+			if(isv6mcast(np->target)) {
+				ipriv->stats[TargetErrs6]++; 
+				goto err; 
+			}
+			if(optexsts(np) && (np->olen == 0)) {
+				ipriv->stats[OptlenErrs6]++; 
+				goto err; 
+			}
+		
+			if(p->type == NbrSolicit) {
+				if(ipcmp(np->src, v6Unspecified) == 0) { 
+					if(!issmcast(np->dst) || optexsts(np))  {
+						ipriv->stats[AddrmxpErrs6]++; 
+						goto err;
+					}
+				}
+			}
+		
+			if(p->type == NbrAdvert) {
+				if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
+					ipriv->stats[AddrmxpErrs6]++; 
+					goto err; 
+				}
+			}
+			break;
+	
+		case RouterAdvert:
+			if(pktsz - sizeof(Ip6hdr) < 16) {
+				ipriv->stats[HlenErrs6]++; 
+				goto err; 
+			}
+			if(!islinklocal(p->src)) {
+				ipriv->stats[RouterAddrErrs6]++; 
+				goto err; 
+			}
+			sz = sizeof(IPICMP) + 8;
+			while ((sz+1) < pktsz) {
+				osz = *(packet+sz+1);
+				if(osz <= 0) {
+					ipriv->stats[OptlenErrs6]++; 
+					goto err; 
+				}	
+				sz += 8*osz;
+			}
+			break;
+	
+		case RouterSolicit:
+			if(pktsz - sizeof(Ip6hdr) < 8) {
+				ipriv->stats[HlenErrs6]++; 
+				goto err; 
+			}
+			unsp = (ipcmp(p->src, v6Unspecified) == 0);
+			sz = sizeof(IPICMP) + 8;
+			while ((sz+1) < pktsz) {
+				osz = *(packet+sz+1);
+				if((osz <= 0) ||
+					(unsp && (*(packet+sz) == slladd)) ) {
+					ipriv->stats[OptlenErrs6]++; 
+					goto err; 
+				}
+				sz += 8*osz;
+			}
+			break;
+	
+		case RedirectV6:
+			//to be filled in
+			break;
+	
+		default:
+			goto err;
+		}
+	}
+
+	return 1;
+
+err:
+	ipriv->stats[InErrors6]++; 
+	return 0;
+}
+
+static int
+targettype(Fs *f, Ipifc *ifc, uchar *target)
+{
+	Iplifc *lifc;
+	int t;
+
+	rlock(ifc);
+	if(ipproxyifc(f, ifc, target)) {
+		runlock(ifc);
+		return t_uniproxy;
+	}
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
+		if(ipcmp(lifc->local, target) == 0) {
+			t = (lifc->tentative) ? t_unitent : t_unirany; 
+			runlock(ifc);
+			return t;
+		}
+	}
+
+	runlock(ifc);
+	return 0;
+}
+
+static void
+icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+{
+	uchar	*packet = bp->rp;
+	IPICMP	*p = (IPICMP *)packet;
+	Icmppriv6 *ipriv = icmp->priv;
+	Block	*r;
+	Proto	*pr;
+	char	*msg, m2[128];
+	Ndpkt* np;
+	uchar pktflags;
+	uchar lsrc[IPaddrlen];
+	int refresh = 1;
+	Iplifc *lifc;
+
+	if(!valid(icmp, ipifc, bp, ipriv)) 
+		goto raise;
+
+	if(p->type <= Maxtype6)
+		ipriv->in[p->type]++;
+	else
+		goto raise;
+
+	switch(p->type) {
+	case EchoRequestV6:
+		r = mkechoreply6(bp);
+		ipriv->out[EchoReply]++;
+		ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+		break;
+
+	case UnreachableV6:
+		if(p->code > 4)
+			msg = unreachcode[icmp6_unkn_code];
+		else
+			msg = unreachcode[p->code];
+
+		bp->rp += sizeof(IPICMP);
+		if(blocklen(bp) < 8){
+			ipriv->stats[LenErrs6]++;
+			goto raise;
+		}
+		p = (IPICMP *)bp->rp;
+		pr = Fsrcvpcolx(icmp->f, p->proto);
+		if(pr != nil && pr->advise != nil) {
+			(*pr->advise)(pr, bp, msg);
+			return;
+		}
+
+		bp->rp -= sizeof(IPICMP);
+		goticmpkt6(icmp, bp, 0);
+		break;
+
+	case TimeExceedV6:
+		if(p->code == 0){
+			sprint(m2, "ttl exceeded at %I", p->src);
+
+			bp->rp += sizeof(IPICMP);
+			if(blocklen(bp) < 8){
+				ipriv->stats[LenErrs6]++;
+				goto raise;
+			}
+			p = (IPICMP *)bp->rp;
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr != nil && pr->advise != nil) {
+				(*pr->advise)(pr, bp, m2);
+				return;
+			}
+			bp->rp -= sizeof(IPICMP);
+		}
+
+		goticmpkt6(icmp, bp, 0);
+		break;
+
+	case RouterAdvert:
+	case RouterSolicit:
+		/* using lsrc as a temp, munge hdr for goticmp6 
+		memmove(lsrc, p->src, IPaddrlen);
+		memmove(p->src, p->dst, IPaddrlen);
+		memmove(p->dst, lsrc, IPaddrlen); */
+
+		goticmpkt6(icmp, bp, p->type);
+		break;
+
+	case NbrSolicit:
+		np = (Ndpkt*) p;
+		pktflags = 0;
+		switch (targettype(icmp->f, ipifc, np->target)) {
+		case t_unirany:
+			pktflags |= Oflag;
+			/* fall through */
+
+		case t_uniproxy: 
+			if(ipcmp(np->src, v6Unspecified) != 0) {
+				arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+				pktflags |= Sflag;
+			}
+			if(ipv6local(ipifc, lsrc)) {
+				icmpna(icmp->f, lsrc, 
+				   (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
+				   np->target, ipifc->mac, pktflags); 
+			}
+			else
+				freeblist(bp);
+			break;
+
+		case t_unitent:
+			/* not clear what needs to be done. send up
+			 * an icmp mesg saying don't use this address? */
+
+		default:
+			freeblist(bp);
+		}
+
+		break;
+
+	case NbrAdvert:
+		np = (Ndpkt*) p;
+
+		/* if the target address matches one of the local interface 
+		 * address and the local interface address has tentative bit set, 
+		 * then insert into ARP table. this is so the duplication address 
+		 * detection part of ipconfig can discover duplication through 
+		 * the arp table
+		 */
+		lifc = iplocalonifc(ipifc, np->target);
+		if(lifc && lifc->tentative)
+			refresh = 0;
+		arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+		freeblist(bp);
+		break;
+
+	case PacketTooBigV6:
+
+	default:
+		goticmpkt6(icmp, bp, 0);
+		break;
+	}
+	return;
+
+raise:
+	freeblist(bp);
+
+}
+
+int
+icmpstats6(Proto *icmp6, char *buf, int len)
+{
+	Icmppriv6 *priv;
+	char *p, *e;
+	int i;
+
+	priv = icmp6->priv;
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats6; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
+	for(i = 0; i <= Maxtype6; i++){
+		if(icmpnames6[i])
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
+/*		else
+			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+*/
+	}
+	return p - buf;
+}
+
+
+// need to import from icmp.c
+extern int	icmpstate(Conv *c, char *state, int n);
+extern char*	icmpannounce(Conv *c, char **argv, int argc);
+extern char*	icmpconnect(Conv *c, char **argv, int argc);
+extern void	icmpclose(Conv *c);
+
+void
+icmp6init(Fs *fs)
+{
+	Proto *icmp6 = smalloc(sizeof(Proto));
+
+	icmp6->priv = smalloc(sizeof(Icmppriv6));
+	icmp6->name = "icmpv6";
+	icmp6->connect = icmpconnect;
+	icmp6->announce = icmpannounce;
+	icmp6->state = icmpstate;
+	icmp6->create = icmpcreate6;
+	icmp6->close = icmpclose;
+	icmp6->rcv = icmpiput6;
+	icmp6->stats = icmpstats6;
+	icmp6->ctl = icmpctl6;
+	icmp6->advise = icmpadvise6;
+	icmp6->gc = nil;
+	icmp6->ipproto = ICMPv6;
+	icmp6->nc = 16;
+	icmp6->ptclsize = sizeof(Icmpcb6);
+
+	Fsproto(fs, icmp6);
+}
+
--- /dev/null
+++ b/os/ip.original/igmp.c
@@ -1,0 +1,291 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+enum
+{
+	IGMP_IPHDRSIZE	= 20,		/* size of ip header */
+	IGMP_HDRSIZE	= 8,		/* size of IGMP header */
+	IP_IGMPPROTO	= 2,
+
+	IGMPquery	= 1,
+	IGMPreport	= 2,
+
+	MSPTICK		= 100,
+	MAXTIMEOUT	= 10000/MSPTICK,	/* at most 10 secs for a response */
+};
+
+typedef struct IGMPpkt IGMPpkt;
+struct IGMPpkt
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	len[2];		/* packet length (including headers) */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;	
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* checksum of ip portion */
+	uchar	src[IPaddrlen];		/* Ip source */
+	uchar	dst[IPaddrlen];		/* Ip destination */
+
+	/* igmp header */
+	uchar	vertype;	/* version and type */
+	uchar	unused;
+	uchar	igmpcksum[2];		/* checksum of igmp portion */
+	uchar	group[IPaddrlen];	/* multicast group */
+};
+
+/*
+ *  lists for group reports
+ */
+typedef struct IGMPrep IGMPrep;
+struct IGMPrep
+{
+	IGMPrep		*next;
+	Media		*m;
+	int		ticks;
+	Multicast	*multi;
+};
+
+typedef struct IGMP IGMP;
+struct IGMP
+{
+	Lock;
+	Rendez	r;
+	IGMPrep	*reports;
+};
+
+IGMP igmpalloc;
+
+	Proto	igmp;
+extern	Fs	fs;
+
+static struct Stats
+{
+	ulong 	inqueries;
+	ulong	outqueries;
+	ulong	inreports;
+	ulong	outreports;
+} stats;
+
+void
+igmpsendreport(Media *m, uchar *addr)
+{
+	IGMPpkt *p;
+	Block *bp;
+
+	bp = allocb(sizeof(IGMPpkt));
+	if(bp == nil)
+		return;
+	p = (IGMPpkt*)bp->wp;
+	p->vihl = IP_VER4;
+	bp->wp += sizeof(IGMPpkt);
+	memset(bp->rp, 0, sizeof(IGMPpkt));
+	hnputl(p->src, Mediagetaddr(m));
+	hnputl(p->dst, Ipallsys);
+	p->vertype = (1<<4) | IGMPreport;
+	p->proto = IP_IGMPPROTO;
+	memmove(p->group, addr, IPaddrlen);
+	hnputs(p->igmpcksum, ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE));
+	netlog(Logigmp, "igmpreport %I\n", p->group);
+	stats.outreports++;
+	ipoput4(bp, 0, 1, DFLTTOS, nil);	/* TTL of 1 */
+}
+
+static int
+isreport(void *a)
+{
+	USED(a);
+	return igmpalloc.reports != 0;
+}
+
+
+void
+igmpproc(void *a)
+{
+	IGMPrep *rp, **lrp;
+	Multicast *mp, **lmp;
+	uchar ip[IPaddrlen];
+
+	USED(a);
+
+	for(;;){
+		sleep(&igmpalloc.r, isreport, 0);
+		for(;;){
+			lock(&igmpalloc);
+
+			if(igmpalloc.reports == nil)
+				break;
+	
+			/* look for a single report */
+			lrp = &igmpalloc.reports;
+			mp = nil;
+			for(rp = *lrp; rp; rp = *lrp){
+				rp->ticks++;
+				lmp = &rp->multi;
+				for(mp = *lmp; mp; mp = *lmp){
+					if(rp->ticks >= mp->timeout){
+						*lmp = mp->next;
+						break;
+					}
+					lmp = &mp->next;
+				}
+				if(mp != nil)
+					break;
+
+				if(rp->multi != nil){
+					lrp = &rp->next;
+					continue;
+				} else {
+					*lrp = rp->next;
+					free(rp);
+				}
+			}
+			unlock(&igmpalloc);
+
+			if(mp){
+				/* do a single report and try again */
+				hnputl(ip, mp->addr);
+				igmpsendreport(rp->m, ip);
+				free(mp);
+				continue;
+			}
+
+			tsleep(&up->sleep, return0, 0, MSPTICK);
+		}
+		unlock(&igmpalloc);
+	}
+
+}
+
+void
+igmpiput(Media *m, Ipifc *, Block *bp)
+{
+	int n;
+	IGMPpkt *ghp;
+	Ipaddr group;
+	IGMPrep *rp, **lrp;
+	Multicast *mp, **lmp;
+
+	ghp = (IGMPpkt*)(bp->rp);
+	netlog(Logigmp, "igmpiput: %d %I\n", ghp->vertype, ghp->group);
+
+	n = blocklen(bp);
+	if(n < IGMP_IPHDRSIZE+IGMP_HDRSIZE){
+		netlog(Logigmp, "igmpiput: bad len\n");
+		goto error;
+	}
+	if((ghp->vertype>>4) != 1){
+		netlog(Logigmp, "igmpiput: bad igmp type\n");
+		goto error;
+	}
+	if(ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)){
+		netlog(Logigmp, "igmpiput: checksum error %I\n", ghp->src);
+		goto error;
+	}
+
+	group = nhgetl(ghp->group);
+	
+	lock(&igmpalloc);
+	switch(ghp->vertype & 0xf){
+	case IGMPquery:
+		/*
+		 *  start reporting groups that we're a member of.
+		 */
+		stats.inqueries++;
+		for(rp = igmpalloc.reports; rp; rp = rp->next)
+			if(rp->m == m)
+				break;
+		if(rp != nil)
+			break;	/* already reporting */
+
+		mp = Mediacopymulti(m);
+		if(mp == nil)
+			break;
+
+		rp = malloc(sizeof(*rp));
+		if(rp == nil)
+			break;
+
+		rp->m = m;
+		rp->multi = mp;
+		rp->ticks = 0;
+		for(; mp; mp = mp->next)
+			mp->timeout = nrand(MAXTIMEOUT);
+		rp->next = igmpalloc.reports;
+		igmpalloc.reports = rp;
+
+		wakeup(&igmpalloc.r);
+
+		break;
+	case IGMPreport:
+		/*
+		 *  find report list for this medium
+		 */
+		stats.inreports++;
+		lrp = &igmpalloc.reports;
+		for(rp = *lrp; rp; rp = *lrp){
+			if(rp->m == m)
+				break;
+			lrp = &rp->next;
+		}
+		if(rp == nil)
+			break;
+
+		/*
+		 *  if someone else has reported a group,
+		 *  we don't have to.
+		 */
+		lmp = &rp->multi;
+		for(mp = *lmp; mp; mp = *lmp){
+			if(mp->addr == group){
+				*lmp = mp->next;
+				free(mp);
+				break;
+			}
+			lmp = &mp->next;
+		}
+
+		break;
+	}
+	unlock(&igmpalloc);
+
+error:
+	freeb(bp);
+}
+
+int
+igmpstats(char *buf, int len)
+{
+	return snprint(buf, len, "\trcvd %d %d\n\tsent %d %d\n",
+		stats.inqueries, stats.inreports,
+		stats.outqueries, stats.outreports);
+}
+
+void
+igmpinit(Fs *fs)
+{
+	igmp.name = "igmp";
+	igmp.connect = nil;
+	igmp.announce = nil;
+	igmp.ctl = nil;
+	igmp.state = nil;
+	igmp.close = nil;
+	igmp.rcv = igmpiput;
+	igmp.stats = igmpstats;
+	igmp.ipproto = IP_IGMPPROTO;
+	igmp.nc = 0;
+	igmp.ptclsize = 0;
+
+	igmpreportfn = igmpsendreport;
+	kproc("igmpproc", igmpproc, 0, 0);
+
+	Fsproto(fs, &igmp);
+}
--- /dev/null
+++ b/os/ip.original/ihbootp.c
@@ -1,0 +1,323 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+
+static	ulong	fsip;
+static	ulong	auip;
+static	ulong	gwip;
+static	ulong	ipmask;
+static	ulong	ipaddr;
+static	ulong	dnsip;
+
+enum
+{
+	Bootrequest = 1,
+	Bootreply   = 2,
+};
+
+typedef struct Bootp
+{
+	/* udp.c oldheader */
+	uchar	raddr[IPaddrlen];
+	uchar	laddr[IPaddrlen];
+	uchar	rport[2];
+	uchar	lport[2];
+	/* bootp itself */
+	uchar	op;		/* opcode */
+	uchar	htype;		/* hardware type */
+	uchar	hlen;		/* hardware address len */
+	uchar	hops;		/* hops */
+	uchar	xid[4];		/* a random number */
+	uchar	secs[2];	/* elapsed snce client started booting */
+	uchar	pad[2];
+	uchar	ciaddr[4];	/* client IP address (client tells server) */
+	uchar	yiaddr[4];	/* client IP address (server tells client) */
+	uchar	siaddr[4];	/* server IP address */
+	uchar	giaddr[4];	/* gateway IP address */
+	uchar	chaddr[16];	/* client hardware address */
+	uchar	sname[64];	/* server host name (optional) */
+	uchar	file[128];	/* boot file name */
+	uchar	vend[128];	/* vendor-specific goo */
+} Bootp;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d
+ * dnsip d.d.d.d"
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+ */
+
+static	Bootp	req;
+static	Proc*	rcvprocp;
+static	int	recv;
+static	int	done;
+static	Rendez	bootpr;
+static	char	rcvbuf[512];
+static	int	bootpdebug;
+
+/*
+ * Parse the vendor specific fields according to RFC 1084.
+ * We are overloading the "cookie server" to be the Inferno 
+ * authentication server and the "resource location server"
+ * to be the Inferno file server.
+ *
+ * If the vendor specific field is formatted properly, it
+ * will begin with the four bytes 99.130.83.99 and end with
+ * an 0xFF byte.
+ */
+static void
+parsevend(uchar* vend)
+{
+	/* The field must start with 99.130.83.99 to be compliant */
+	if ((vend[0] != 99) || (vend[1] != 130) ||
+	    (vend[2] != 83) || (vend[3] != 99)){
+		if(bootpdebug)
+			print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
+		return;
+	}
+
+	/* Skip over the magic cookie */
+	vend += 4;
+
+	while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+		if(bootpdebug){
+			int i;
+			print("vend %d [%d]", vend[0], vend[1]);
+			for(i=0; i<vend[1]; i++)
+				print(" %2.2x", vend[i]);
+			print("\n");
+		}
+		switch (vend[0]) {
+		case 1:	/* Subnet mask field */
+			/* There must be only one subnet mask */
+			if (vend[1] != 4)
+				return;
+
+			ipmask = (vend[2]<<24)|
+				 (vend[3]<<16)|
+				 (vend[4]<<8)|
+				  vend[5];
+			break;
+
+		case 3:	/* Gateway/router field */
+			/* We are only concerned with first address */
+			if (vend[1] < 4)
+				break;
+
+			gwip =	(vend[2]<<24)|
+				(vend[3]<<16)|
+				(vend[4]<<8)|
+				 vend[5];
+			break;
+
+		case 6:	/* DNS server */
+			/* We are only concerned with first address */
+			if (vend[1] < 4)
+				break;
+
+			dnsip =	(vend[2]<<24)|
+				(vend[3]<<16)|
+				(vend[4]<<8)|
+				 vend[5];
+			break;
+
+		case 8:	/* "Cookie server" (auth server) field */
+			/* We are only concerned with first address */
+			if (vend[1] < 4)
+				break;
+
+			auip =	(vend[2]<<24)|
+				(vend[3]<<16)|
+				(vend[4]<<8)|
+				 vend[5];
+			break;
+
+		case 11:	/* "Resource loc server" (file server) field */
+			/* We are only concerned with first address */
+			if (vend[1] < 4)
+				break;
+
+			fsip =	(vend[2]<<24)|
+				(vend[3]<<16)|
+				(vend[4]<<8)|
+				 vend[5];
+			break;
+
+		default:	/* Ignore everything else */
+			break;
+		}
+
+		/* Skip over the field */
+		vend += vend[1] + 2;
+	}
+}
+
+static void
+rcvbootp(void *a)
+{
+	int n, fd;
+	Bootp *rp;
+
+	if(waserror())
+		pexit("", 0);
+	rcvprocp = up;	/* store for postnote below */
+	fd = (int)a;
+	while(done == 0) {
+		n = kread(fd, rcvbuf, sizeof(rcvbuf));
+		if(n <= 0)
+			break;
+		rp = (Bootp*)rcvbuf;
+		if (memcmp(req.chaddr, rp->chaddr, 6) == 0 &&
+		   rp->htype == 1 && rp->hlen == 6) {
+			ipaddr = (rp->yiaddr[0]<<24)|
+				 (rp->yiaddr[1]<<16)|
+				 (rp->yiaddr[2]<<8)|
+				  rp->yiaddr[3];
+			parsevend(rp->vend);
+			break;
+		}
+	}
+	poperror();
+	rcvprocp = nil;
+
+	recv = 1;
+	wakeup(&bootpr);
+	pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+	int cfd, dfd, tries, n;
+	char ia[5+3*16], im[16], *av[3];
+	uchar nipaddr[4], ngwip[4], nipmask[4];
+	char dir[Maxpath];
+	static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
+
+	av[1] = "0.0.0.0";
+	av[2] = "0.0.0.0";
+	ipifcadd(ifc, av, 3, 0, nil);
+
+	cfd = kannounce("udp!*!68", dir);
+	if(cfd < 0)
+		return "bootp announce failed";
+	strcat(dir, "/data");
+	if(kwrite(cfd, "headers", 7) < 0){
+		kclose(cfd);
+		return "bootp ctl headers failed";
+	}
+	kwrite(cfd, "oldheaders", 10);
+	dfd = kopen(dir, ORDWR);
+	if(dfd < 0){
+		kclose(cfd);
+		return "bootp open data failed";
+	}
+	kclose(cfd);
+
+	/* create request */
+	memset(&req, 0, sizeof(req));
+	ipmove(req.raddr, IPv4bcast);
+	hnputs(req.rport, 67);
+	req.op = Bootrequest;
+	req.htype = 1;			/* ethernet (all we know) */
+	req.hlen = 6;			/* ethernet (all we know) */
+
+	/* Hardware MAC address */
+	memmove(req.chaddr, ifc->mac, 6);
+	/* Fill in the local IP address if we know it */
+	ipv4local(ifc, req.ciaddr);
+	memset(req.file, 0, sizeof(req.file));
+	memmove(req.vend, vend_rfc1048, 4);
+
+	done = 0;
+	recv = 0;
+
+	kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+
+	/*
+	 * broadcast bootp's till we get a reply,
+	 * or fixed number of tries
+	 */
+	tries = 0;
+	while(recv == 0) {
+		if(kwrite(dfd, &req, sizeof(req)) < 0)
+			print("bootp: write: %r");
+
+		tsleep(&bootpr, return0, 0, 1000);
+		if(++tries > 10) {
+			print("bootp: timed out\n");
+			break;
+		}
+	}
+	kclose(dfd);
+	done = 1;
+	if(rcvprocp != nil){
+		postnote(rcvprocp, 1, "timeout", 0);
+		rcvprocp = nil;
+	}
+
+	av[1] = "0.0.0.0";
+	av[2] = "0.0.0.0";
+	ipifcrem(ifc, av, 3);
+
+	hnputl(nipaddr, ipaddr);
+	sprint(ia, "%V", nipaddr);
+	hnputl(nipmask, ipmask);
+	sprint(im, "%V", nipmask);
+	av[1] = ia;
+	av[2] = im;
+	ipifcadd(ifc, av, 3, 0, nil);
+
+	if(gwip != 0) {
+		hnputl(ngwip, gwip);
+		n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
+		routewrite(ifc->conv->p->f, nil, ia, n);
+	}
+	return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+	int n;
+	char *buf;
+	uchar a[4];
+
+	buf = smalloc(READSTR);
+	if(waserror()){
+		free(buf);
+		nexterror();
+	}
+	hnputl(a, fsip);
+	n = snprint(buf, READSTR, "fsip %15V\n", a);
+	hnputl(a, auip);
+	n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+	hnputl(a, gwip);
+	n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+	hnputl(a, ipmask);
+	n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+	hnputl(a, ipaddr);
+	n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+	hnputl(a, dnsip);
+	snprint(buf + n, READSTR-n, "dnsip %15V\n", a);
+
+	len = readstr(offset, bp, len, buf);
+	poperror();
+	free(buf);
+	return len;
+}
+
+char*	(*bootp)(Ipifc*) = rbootp;
+int	(*bootpread)(char*, ulong, int) = rbootpread;
--- /dev/null
+++ b/os/ip.original/il.c
@@ -1,0 +1,1414 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+
+enum				/* Connection state */
+{
+	Ilclosed,
+	Ilsyncer,
+	Ilsyncee,
+	Ilestablished,
+	Illistening,
+	Ilclosing,
+	Ilopening,		/* only for file server */
+};
+
+char	*ilstates[] = 
+{ 
+	"Closed",
+	"Syncer",
+	"Syncee",
+	"Established",
+	"Listen",
+	"Closing",
+	"Opening",		/* only for file server */
+};
+
+enum				/* Packet types */
+{
+	Ilsync,
+	Ildata,
+	Ildataquery,
+	Ilack,
+	Ilquery,
+	Ilstate,
+	Ilclose,
+};
+
+char	*iltype[] = 
+{	
+	"sync",
+	"data",
+	"dataquery",
+	"ack",
+	"query",
+	"state",
+	"close" 
+};
+
+enum
+{
+	Seconds		= 1000,
+	Iltickms 	= 50,		/* time base */
+	AckDelay	= 2*Iltickms,	/* max time twixt message rcvd & ack sent */
+	MaxTimeout 	= 30*Seconds,	/* max time between rexmit */
+	QueryTime	= 10*Seconds,	/* time between subsequent queries */
+	DeathTime	= 30*QueryTime,
+
+	MaxRexmit 	= 16,		/* max retransmissions before hangup */
+	Defaultwin	= 20,
+
+	LogAGain	= 3,
+	AGain		= 1<<LogAGain,
+	LogDGain	= 2,
+	DGain		= 1<<LogDGain,
+
+	DefByteRate	= 100,		/* assume a megabit link */
+	DefRtt		= 50,		/* cross country on a great day */
+
+	Maxrq		= 64*1024,
+};
+
+enum
+{
+	Nqt=	8,
+};
+
+typedef struct Ilcb Ilcb;
+struct Ilcb			/* Control block */
+{
+	int	state;		/* Connection state */
+	Conv	*conv;
+	QLock	ackq;		/* Unacknowledged queue */
+	Block	*unacked;
+	Block	*unackedtail;
+	ulong	unackedbytes;
+	QLock	outo;		/* Out of order packet queue */
+	Block	*outoforder;
+	ulong	next;		/* Id of next to send */
+	ulong	recvd;		/* Last packet received */
+	ulong	acksent;	/* Last packet acked */
+	ulong	start;		/* Local start id */
+	ulong	rstart;		/* Remote start id */
+	int	window;		/* Maximum receive window */
+	int	rxquery;	/* number of queries on this connection */
+	int	rxtot;		/* number of retransmits on this connection */
+	int	rexmit;		/* number of retransmits of *unacked */
+	ulong	qt[Nqt+1];	/* state table for query messages */
+	int	qtx;		/* ... index into qt */
+
+	/* if set, fasttimeout causes a connection request to terminate after 4*Iltickms */
+	int	fasttimeout;
+
+	/* timers */
+	ulong	lastxmit;	/* time of last xmit */
+	ulong	lastrecv;	/* time of last recv */
+	ulong	timeout;	/* retransmission time for *unacked */
+	ulong	acktime;	/* time to send next ack */
+	ulong	querytime;	/* time to send next query */
+
+	/* adaptive measurements */
+	int	delay;		/* Average of the fixed rtt delay */
+	int	rate;		/* Average uchar rate */
+	int	mdev;		/* Mean deviation of rtt */
+	int	maxrtt;		/* largest rtt seen */
+	ulong	rttack;		/* The ack we are waiting for */
+	int	rttlen;		/* Length of rttack packet */
+	uvlong	rttstart;	/* Time we issued rttack packet */
+};
+
+enum
+{
+	IL_IPSIZE 	= 20,
+	IL_HDRSIZE	= 18,	
+	IL_LISTEN	= 0,
+	IL_CONNECT	= 1,
+	IP_ILPROTO	= 40,
+};
+
+typedef struct Ilhdr Ilhdr;
+struct Ilhdr
+{
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	ttl;		/* Time to live */
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* Header checksum */
+	uchar	src[4];		/* Ip source */
+	uchar	dst[4];		/* Ip destination */
+	uchar	ilsum[2];	/* Checksum including header */
+	uchar	illen[2];	/* Packet length */
+	uchar	iltype;		/* Packet type */
+	uchar	ilspec;		/* Special */
+	uchar	ilsrc[2];	/* Src port */
+	uchar	ildst[2];	/* Dst port */
+	uchar	ilid[4];	/* Sequence id */
+	uchar	ilack[4];	/* Acked sequence */
+};
+
+enum
+{
+	InMsgs,
+	OutMsgs,
+	CsumErrs,		/* checksum errors */
+	HlenErrs,		/* header length error */
+	LenErrs,		/* short packet */
+	OutOfOrder,		/* out of order */
+	Retrans,		/* retransmissions */
+	DupMsg,
+	DupBytes,
+	DroppedMsgs,
+
+	Nstats,
+};
+
+static char *statnames[] =
+{
+[InMsgs]	"InMsgs",
+[OutMsgs]	"OutMsgs",
+[CsumErrs]	"CsumErrs",
+[HlenErrs]	"HlenErr",
+[LenErrs]	"LenErrs",
+[OutOfOrder]	"OutOfOrder",
+[Retrans]	"Retrans",
+[DupMsg]	"DupMsg",
+[DupBytes]	"DupBytes",
+[DroppedMsgs]	"DroppedMsgs",
+};
+
+typedef struct Ilpriv Ilpriv;
+struct Ilpriv
+{
+	Ipht	ht;
+
+	ulong	stats[Nstats];
+
+	ulong	csumerr;		/* checksum errors */
+	ulong	hlenerr;		/* header length error */
+	ulong	lenerr;			/* short packet */
+	ulong	order;			/* out of order */
+	ulong	rexmit;			/* retransmissions */
+	ulong	dup;
+	ulong	dupb;
+
+	/* keeping track of the ack kproc */
+	int	ackprocstarted;
+	QLock	apl;
+};
+
+/* state for query/dataquery messages */
+
+
+void	ilrcvmsg(Conv*, Block*);
+void	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+void	ilackq(Ilcb*, Block*);
+void	ilprocess(Conv*, Ilhdr*, Block*);
+void	ilpullup(Conv*);
+void	ilhangup(Conv*, char*);
+void	ilfreeq(Ilcb*);
+void	ilrexmit(Ilcb*);
+void	ilbackoff(Ilcb*);
+void	ilsettimeout(Ilcb*);
+char*	ilstart(Conv*, int, int);
+void	ilackproc(void*);
+void	iloutoforder(Conv*, Ilhdr*, Block*);
+void	iliput(Proto*, Ipifc*, Block*);
+void	iladvise(Proto*, Block*, char*);
+int	ilnextqt(Ilcb*);
+void	ilcbinit(Ilcb*);
+int	later(ulong, ulong, char*);
+void	ilreject(Fs*, Ilhdr*);
+void	illocalclose(Conv *c);
+	int 	ilcksum = 1;
+static 	int 	initseq = 25001;
+static	ulong	scalediv, scalemul;
+static	char	*etime = "connection timed out";
+
+static char*
+ilconnect(Conv *c, char **argv, int argc)
+{
+	char *e, *p;
+	int fast;
+
+	/* huge hack to quickly try an il connection */
+	fast = 0;
+	if(argc > 1){
+		p = strstr(argv[1], "!fasttimeout");
+		if(p != nil){
+			*p = 0;
+			fast = 1;
+		}
+	}
+
+	e = Fsstdconnect(c, argv, argc);
+	if(e != nil)
+		return e;
+	return ilstart(c, IL_CONNECT, fast);
+}
+
+static int
+ilstate(Conv *c, char *state, int n)
+{
+	Ilcb *ic;
+
+	ic = (Ilcb*)(c->ptcl);
+	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+		ilstates[ic->state],
+		c->rq ? qlen(c->rq) : 0,
+		c->wq ? qlen(c->wq) : 0,
+		ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain,
+		ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt);
+}
+
+static int
+ilinuse(Conv *c)
+{
+	Ilcb *ic;
+
+	ic = (Ilcb*)(c->ptcl);
+	return ic->state != Ilclosed;
+
+}
+
+/* called with c locked */
+static char*
+ilannounce(Conv *c, char **argv, int argc)
+{
+	char *e;
+
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	e = ilstart(c, IL_LISTEN, 0);
+	if(e != nil)
+		return e;
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+void
+illocalclose(Conv *c)
+{
+	Ilcb *ic;
+	Ilpriv *ipriv;
+
+	ipriv = c->p->priv;
+	ic = (Ilcb*)c->ptcl;
+	ic->state = Ilclosed;
+	iphtrem(&ipriv->ht, c);
+	ipmove(c->laddr, IPnoaddr);
+	c->lport = 0;
+}
+
+static void
+ilclose(Conv *c)
+{
+	Ilcb *ic;
+
+	ic = (Ilcb*)c->ptcl;
+
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+
+	switch(ic->state) {
+	case Ilclosing:
+	case Ilclosed:
+		break;
+	case Ilsyncer:
+	case Ilsyncee:
+	case Ilestablished:
+		ic->state = Ilclosing;
+		ilsettimeout(ic);
+		ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0);
+		break;
+	case Illistening:
+		illocalclose(c);
+		break;
+	}
+	ilfreeq(ic);
+}
+
+void
+ilkick(void *x, Block *bp)
+{
+	Conv *c = x;
+	Ilhdr *ih;
+	Ilcb *ic;
+	int dlen;
+	ulong id, ack;
+	Fs *f;
+	Ilpriv *priv;
+
+	f = c->p->f;
+	priv = c->p->priv;
+	ic = (Ilcb*)c->ptcl;
+
+	if(bp == nil)
+		return;
+
+	switch(ic->state) {
+	case Ilclosed:
+	case Illistening:
+	case Ilclosing:
+		freeblist(bp);
+		qhangup(c->rq, nil);
+		return;
+	}
+
+	dlen = blocklen(bp);
+
+	/* Make space to fit il & ip */
+	bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE);
+	ih = (Ilhdr *)(bp->rp);
+	ih->vihl = IP_VER4;
+
+	/* Ip fields */
+	ih->frag[0] = 0;
+	ih->frag[1] = 0;
+	v6tov4(ih->dst, c->raddr);
+	v6tov4(ih->src, c->laddr);
+	ih->proto = IP_ILPROTO;
+
+	/* Il fields */
+	hnputs(ih->illen, dlen+IL_HDRSIZE);
+	hnputs(ih->ilsrc, c->lport);
+	hnputs(ih->ildst, c->rport);
+
+	qlock(&ic->ackq);
+	id = ic->next++;
+	hnputl(ih->ilid, id);
+	ack = ic->recvd;
+	hnputl(ih->ilack, ack);
+	ic->acksent = ack;
+	ic->acktime = NOW + AckDelay;
+	ih->iltype = Ildata;
+	ih->ilspec = 0;
+	ih->ilsum[0] = 0;
+	ih->ilsum[1] = 0;
+
+	/* Checksum of ilheader plus data (not ip & no pseudo header) */
+	if(ilcksum)
+		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE));
+
+	ilackq(ic, bp);
+	qunlock(&ic->ackq);
+
+	/* Start the round trip timer for this packet if the timer is free */
+	if(ic->rttack == 0) {
+		ic->rttack = id;
+		ic->rttstart = fastticks(nil);
+		ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE;
+	}
+
+	if(later(NOW, ic->timeout, nil))
+		ilsettimeout(ic);
+	ipoput4(f, bp, 0, c->ttl, c->tos, c);
+	priv->stats[OutMsgs]++;
+}
+
+static void
+ilcreate(Conv *c)
+{
+	c->rq = qopen(Maxrq, 0, 0, c);
+	c->wq = qbypass(ilkick, c);
+}
+
+int
+ilxstats(Proto *il, char *buf, int len)
+{
+	Ilpriv *priv;
+	char *p, *e;
+	int i;
+
+	priv = il->priv;
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+	return p - buf;
+}
+
+void
+ilackq(Ilcb *ic, Block *bp)
+{
+	Block *np;
+	int n;
+
+	n = blocklen(bp);
+
+	/* Enqueue a copy on the unacked queue in case this one gets lost */
+	np = copyblock(bp, n);
+	if(ic->unacked)
+		ic->unackedtail->list = np;
+	else
+		ic->unacked = np;
+	ic->unackedtail = np;
+	np->list = nil;
+	ic->unackedbytes += n;
+}
+
+static
+void
+ilrttcalc(Ilcb *ic, Block *bp)
+{
+	int rtt, tt, pt, delay, rate;
+
+	rtt = fastticks(nil) - ic->rttstart;
+	rtt = (rtt*scalemul)/scalediv;
+	delay = ic->delay;
+	rate = ic->rate;
+
+	/* Guard against zero wrap */
+	if(rtt > 120000 || rtt < 0)
+		return;
+
+	/* this block had to be transmitted after the one acked so count its size */
+	ic->rttlen += blocklen(bp)  + IL_IPSIZE + IL_HDRSIZE;
+
+	if(ic->rttlen < 256){
+		/* guess fixed delay as rtt of small packets */
+		delay += rtt - (delay>>LogAGain);
+		if(delay < AGain)
+			delay = AGain;
+		ic->delay = delay;
+	} else {
+		/* if packet took longer than avg rtt delay, recalc rate */
+		tt = rtt - (delay>>LogAGain);
+		if(tt > 0){
+			rate += ic->rttlen/tt - (rate>>LogAGain);
+			if(rate < AGain)
+				rate = AGain;
+			ic->rate = rate;
+		}
+	}
+
+	/* mdev */
+	pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain);
+	ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain);
+
+	if(rtt > ic->maxrtt)
+		ic->maxrtt = rtt;
+}
+
+void
+ilackto(Ilcb *ic, ulong ackto, Block *bp)
+{
+	Ilhdr *h;
+	ulong id;
+
+	if(ic->rttack == ackto)
+		ilrttcalc(ic, bp);
+
+	/* Cancel if we've passed the packet we were interested in */
+	if(ic->rttack <= ackto)
+		ic->rttack = 0;
+
+	qlock(&ic->ackq);
+	while(ic->unacked) {
+		h = (Ilhdr *)ic->unacked->rp;
+		id = nhgetl(h->ilid);
+		if(ackto < id)
+			break;
+
+		bp = ic->unacked;
+		ic->unacked = bp->list;
+		bp->list = nil;
+		ic->unackedbytes -= blocklen(bp);
+		freeblist(bp);
+		ic->rexmit = 0;
+		ilsettimeout(ic);
+	}
+	qunlock(&ic->ackq);
+}
+
+void
+iliput(Proto *il, Ipifc*, Block *bp)
+{
+	char *st;
+	Ilcb *ic;
+	Ilhdr *ih;
+	uchar raddr[IPaddrlen];
+	uchar laddr[IPaddrlen];
+	ushort sp, dp, csum;
+	int plen, illen;
+	Conv *new, *s;
+	Ilpriv *ipriv;
+
+	ipriv = il->priv;
+
+	ih = (Ilhdr *)bp->rp;
+	plen = blocklen(bp);
+	if(plen < IL_IPSIZE+IL_HDRSIZE){
+		netlog(il->f, Logil, "il: hlenerr\n");
+		ipriv->stats[HlenErrs]++;
+		goto raise;
+	}
+
+	illen = nhgets(ih->illen);
+	if(illen+IL_IPSIZE > plen){
+		netlog(il->f, Logil, "il: lenerr\n");
+		ipriv->stats[LenErrs]++;
+		goto raise;
+	}
+
+	sp = nhgets(ih->ildst);
+	dp = nhgets(ih->ilsrc);
+	v4tov6(raddr, ih->src);
+	v4tov6(laddr, ih->dst);
+
+	if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) {
+		if(ih->iltype > Ilclose)
+			st = "?";
+		else
+			st = iltype[ih->iltype];
+		ipriv->stats[CsumErrs]++;
+		netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+			csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
+		goto raise;
+	}
+
+	qlock(il);
+	s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp);
+	if(s == nil){
+		if(ih->iltype == Ilsync)
+			ilreject(il->f, ih);		/* no listener */
+		qunlock(il);
+		goto raise;
+	}
+
+	ic = (Ilcb*)s->ptcl;
+	if(ic->state == Illistening){
+		if(ih->iltype != Ilsync){
+			qunlock(il);
+			if(ih->iltype > Ilclose)
+				st = "?";
+			else
+				st = iltype[ih->iltype];
+			ilreject(il->f, ih);		/* no channel and not sync */
+			netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+				st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); 
+			goto raise;
+		}
+
+		new = Fsnewcall(s, raddr, dp, laddr, sp, V4);
+		if(new == nil){
+			qunlock(il);
+			netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp);
+			ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0);
+			goto raise;
+		}
+		s = new;
+
+		ic = (Ilcb*)s->ptcl;
+	
+		ic->conv = s;
+		ic->state = Ilsyncee;
+		ilcbinit(ic);
+		ic->rstart = nhgetl(ih->ilid);
+		iphtadd(&ipriv->ht, s);
+	}
+
+	qlock(s);
+	qunlock(il);
+	if(waserror()){
+		qunlock(s);
+		nexterror();
+	}
+	ilprocess(s, ih, bp);
+	qunlock(s);
+	poperror();
+	return;
+raise:
+	freeblist(bp);
+}
+
+void
+_ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+	Ilcb *ic;
+	ulong id, ack;
+	Ilpriv *priv;
+
+	id = nhgetl(h->ilid);
+	ack = nhgetl(h->ilack);
+
+	ic = (Ilcb*)s->ptcl;
+
+	ic->lastrecv = NOW;
+	ic->querytime = NOW + QueryTime;
+	priv = s->p->priv;
+	priv->stats[InMsgs]++;
+
+	switch(ic->state) {
+	default:
+		netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state);
+	case Ilclosed:
+		freeblist(bp);
+		break;
+	case Ilsyncer:
+		switch(h->iltype) {
+		default:
+			break;
+		case Ilsync:
+			if(ack != ic->start)
+				ilhangup(s, "connection rejected");
+			else {
+				ic->recvd = id;
+				ic->rstart = id;
+				ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0);
+				ic->state = Ilestablished;
+				ic->fasttimeout = 0;
+				ic->rexmit = 0;
+				Fsconnected(s, nil);
+				ilpullup(s);
+			}
+			break;
+		case Ilclose:
+			if(ack == ic->start)
+				ilhangup(s, "connection rejected");
+			break;
+		}
+		freeblist(bp);
+		break;
+	case Ilsyncee:
+		switch(h->iltype) {
+		default:
+			break;
+		case Ilsync:
+			if(id != ic->rstart || ack != 0){
+				illocalclose(s);
+			} else {
+				ic->recvd = id;
+				ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0);
+			}
+			break;
+		case Ilack:
+			if(ack == ic->start) {
+				ic->state = Ilestablished;
+				ic->fasttimeout = 0;
+				ic->rexmit = 0;
+				ilpullup(s);
+			}
+			break;
+		case Ildata:
+			if(ack == ic->start) {
+				ic->state = Ilestablished;
+				ic->fasttimeout = 0;
+				ic->rexmit = 0;
+				goto established;
+			}
+			break;
+		case Ilclose:
+			if(ack == ic->start)
+				ilhangup(s, "remote close");
+			break;
+		}
+		freeblist(bp);
+		break;
+	case Ilestablished:
+	established:
+		switch(h->iltype) {
+		case Ilsync:
+			if(id != ic->rstart)
+				ilhangup(s, "remote close");
+			else
+				ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0);
+			freeblist(bp);	
+			break;
+		case Ildata:
+			/*
+			 * avoid consuming all the mount rpc buffers in the
+			 * system.  if the input queue is too long, drop this
+			 * packet.
+			 */
+			if (s->rq && qlen(s->rq) >= Maxrq) {
+				priv->stats[DroppedMsgs]++;
+				freeblist(bp);
+				break;
+			}
+
+			ilackto(ic, ack, bp);
+			iloutoforder(s, h, bp);
+			ilpullup(s);
+			break;
+		case Ildataquery:
+			ilackto(ic, ack, bp);
+			iloutoforder(s, h, bp);
+			ilpullup(s);
+			ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+			break;
+		case Ilack:
+			ilackto(ic, ack, bp);
+			freeblist(bp);
+			break;
+		case Ilquery:
+			ilackto(ic, ack, bp);
+			ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+			freeblist(bp);
+			break;
+		case Ilstate:
+			if(ack >= ic->rttack)
+				ic->rttack = 0;
+			ilackto(ic, ack, bp);
+			if(h->ilspec > Nqt)
+				h->ilspec = 0;
+			if(ic->qt[h->ilspec] > ack){
+				ilrexmit(ic);
+				ilsettimeout(ic);
+			}
+			freeblist(bp);
+			break;
+		case Ilclose:
+			freeblist(bp);
+			if(ack < ic->start || ack > ic->next) 
+				break;
+			ic->recvd = id;
+			ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+			ic->state = Ilclosing;
+			ilsettimeout(ic);
+			ilfreeq(ic);
+			break;
+		}
+		break;
+	case Illistening:
+		freeblist(bp);
+		break;
+	case Ilclosing:
+		switch(h->iltype) {
+		case Ilclose:
+			ic->recvd = id;
+			ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+			if(ack == ic->next)
+				ilhangup(s, nil);
+			break;
+		default:
+			break;
+		}
+		freeblist(bp);
+		break;
+	}
+}
+
+void
+ilrexmit(Ilcb *ic)
+{
+	Ilhdr *h;
+	Block *nb;
+	Conv *c;
+	ulong id;
+	Ilpriv *priv;
+
+	nb = nil;
+	qlock(&ic->ackq);
+	if(ic->unacked)
+		nb = copyblock(ic->unacked, blocklen(ic->unacked));
+	qunlock(&ic->ackq);
+
+	if(nb == nil)
+		return;
+
+	h = (Ilhdr*)nb->rp;
+	h->vihl = IP_VER4;
+
+	h->iltype = Ildataquery;
+	hnputl(h->ilack, ic->recvd);
+	h->ilspec = ilnextqt(ic);
+	h->ilsum[0] = 0;
+	h->ilsum[1] = 0;
+	hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen)));
+
+	c = ic->conv;
+	id = nhgetl(h->ilid);
+	netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+		ic->rexmit, ic->timeout,
+		c->raddr, c->lport, c->rport);
+
+	ilbackoff(ic);
+
+	ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c);
+
+	/* statistics */
+	ic->rxtot++;
+	priv = c->p->priv;
+	priv->rexmit++;
+}
+
+/* DEBUG */
+void
+ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+	Ilcb *ic;
+
+	ic = (Ilcb*)s->ptcl;
+
+	USED(ic);
+	netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+		ilstates[ic->state],  ic->rstart, ic->recvd, ic->start, 
+		ic->next, iltype[h->iltype], nhgetl(h->ilid), 
+		nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
+
+	_ilprocess(s, h, bp);
+
+	netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+}
+
+void
+ilhangup(Conv *s, char *msg)
+{
+	Ilcb *ic;
+	int callout;
+
+	netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr,
+		s->lport, s->rport, msg?msg:"no reason");
+
+	ic = (Ilcb*)s->ptcl;
+	callout = ic->state == Ilsyncer;
+	illocalclose(s);
+
+	qhangup(s->rq, msg);
+	qhangup(s->wq, msg);
+
+	if(callout)
+		Fsconnected(s, msg);
+}
+
+void
+ilpullup(Conv *s)
+{
+	Ilcb *ic;
+	Ilhdr *oh;
+	Block *bp;
+	ulong oid, dlen;
+	Ilpriv *ipriv;
+
+	ic = (Ilcb*)s->ptcl;
+	if(ic->state != Ilestablished)
+		return;
+
+	qlock(&ic->outo);
+	while(ic->outoforder) {
+		bp = ic->outoforder;
+		oh = (Ilhdr*)bp->rp;
+		oid = nhgetl(oh->ilid);
+		if(oid <= ic->recvd) {
+			ic->outoforder = bp->list;
+			freeblist(bp);
+			continue;
+		}
+		if(oid != ic->recvd+1){
+			ipriv = s->p->priv;
+			ipriv->stats[OutOfOrder]++;
+			break;
+		}
+
+		ic->recvd = oid;
+		ic->outoforder = bp->list;
+
+		bp->list = nil;
+		dlen = nhgets(oh->illen)-IL_HDRSIZE;
+		bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+		/*
+		 * Upper levels don't know about multiple-block
+		 * messages so copy all into one (yick).
+		 */
+		bp = concatblock(bp);
+		if(bp == 0)
+			panic("ilpullup");
+		bp = packblock(bp);
+		if(bp == 0)
+			panic("ilpullup2");
+		qpass(s->rq, bp);
+	}
+	qunlock(&ic->outo);
+}
+
+void
+iloutoforder(Conv *s, Ilhdr *h, Block *bp)
+{
+	Ilcb *ic;
+	uchar *lid;
+	Block *f, **l;
+	ulong id, newid;
+	Ilpriv *ipriv;
+
+	ipriv = s->p->priv;
+	ic = (Ilcb*)s->ptcl;
+	bp->list = nil;
+
+	id = nhgetl(h->ilid);
+	/* Window checks */
+	if(id <= ic->recvd || id > ic->recvd+ic->window) {
+		netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+			id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
+		freeblist(bp);
+		return;
+	}
+
+	/* Packet is acceptable so sort onto receive queue for pullup */
+	qlock(&ic->outo);
+	if(ic->outoforder == nil)
+		ic->outoforder = bp;
+	else {
+		l = &ic->outoforder;
+		for(f = *l; f; f = f->list) {
+			lid = ((Ilhdr*)(f->rp))->ilid;
+			newid = nhgetl(lid);
+			if(id <= newid) {
+				if(id == newid) {
+					ipriv->stats[DupMsg]++;
+					ipriv->stats[DupBytes] += blocklen(bp);
+					qunlock(&ic->outo);
+					freeblist(bp);
+					return;
+				}
+				bp->list = f;
+				*l = bp;
+				qunlock(&ic->outo);
+				return;
+			}
+			l = &f->list;
+		}
+		*l = bp;
+	}
+	qunlock(&ic->outo);
+}
+
+void
+ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
+{
+	Ilhdr *ih;
+	Ilcb *ic;
+	Block *bp;
+	int ttl, tos;
+
+	bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+	bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+	ih = (Ilhdr *)(bp->rp);
+	ih->vihl = IP_VER4;
+
+	/* Ip fields */
+	ih->proto = IP_ILPROTO;
+	hnputs(ih->illen, IL_HDRSIZE);
+	ih->frag[0] = 0;
+	ih->frag[1] = 0;
+	if(inih) {
+		hnputl(ih->dst, nhgetl(inih->src));
+		hnputl(ih->src, nhgetl(inih->dst));
+		hnputs(ih->ilsrc, nhgets(inih->ildst));
+		hnputs(ih->ildst, nhgets(inih->ilsrc));
+		hnputl(ih->ilid, nhgetl(inih->ilack));
+		hnputl(ih->ilack, nhgetl(inih->ilid));
+		ttl = MAXTTL;
+		tos = DFLTTOS;
+	}
+	else {
+		v6tov4(ih->dst, ipc->raddr);
+		v6tov4(ih->src, ipc->laddr);
+		hnputs(ih->ilsrc, ipc->lport);
+		hnputs(ih->ildst, ipc->rport);
+		hnputl(ih->ilid, id);
+		hnputl(ih->ilack, ack);
+		ic = (Ilcb*)ipc->ptcl;
+		ic->acksent = ack;
+		ic->acktime = NOW;
+		ttl = ipc->ttl;
+		tos = ipc->tos;
+	}
+	ih->iltype = type;
+	ih->ilspec = ilspec;
+	ih->ilsum[0] = 0;
+	ih->ilsum[1] = 0;
+
+	if(ilcksum)
+		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+if(ipc==nil)
+	panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+if(ipc->p==nil)
+	panic("ipc->p is nil");
+
+	netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n",
+		iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), 
+		nhgets(ih->ilsrc), nhgets(ih->ildst));
+
+	ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+}
+
+void
+ilreject(Fs *f, Ilhdr *inih)
+{
+	Ilhdr *ih;
+	Block *bp;
+
+	bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+	bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+	ih = (Ilhdr *)(bp->rp);
+	ih->vihl = IP_VER4;
+
+	/* Ip fields */
+	ih->proto = IP_ILPROTO;
+	hnputs(ih->illen, IL_HDRSIZE);
+	ih->frag[0] = 0;
+	ih->frag[1] = 0;
+	hnputl(ih->dst, nhgetl(inih->src));
+	hnputl(ih->src, nhgetl(inih->dst));
+	hnputs(ih->ilsrc, nhgets(inih->ildst));
+	hnputs(ih->ildst, nhgets(inih->ilsrc));
+	hnputl(ih->ilid, nhgetl(inih->ilack));
+	hnputl(ih->ilack, nhgetl(inih->ilid));
+	ih->iltype = Ilclose;
+	ih->ilspec = 0;
+	ih->ilsum[0] = 0;
+	ih->ilsum[1] = 0;
+
+	if(ilcksum)
+		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+	ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+void
+ilsettimeout(Ilcb *ic)
+{
+	ulong pt;
+
+	pt = (ic->delay>>LogAGain)
+		+ ic->unackedbytes/(ic->rate>>LogAGain)
+		+ (ic->mdev>>(LogDGain-1))
+		+ AckDelay;
+	if(pt > MaxTimeout)
+		pt = MaxTimeout;
+	ic->timeout = NOW + pt;
+}
+
+void
+ilbackoff(Ilcb *ic)
+{
+	ulong pt;
+	int i;
+
+	pt = (ic->delay>>LogAGain)
+		+ ic->unackedbytes/(ic->rate>>LogAGain)
+		+ (ic->mdev>>(LogDGain-1))
+		+ AckDelay;
+	for(i = 0; i < ic->rexmit; i++)
+		pt = pt + (pt>>1);
+	if(pt > MaxTimeout)
+		pt = MaxTimeout;
+	ic->timeout = NOW + pt;
+
+	if(ic->fasttimeout)
+		ic->timeout = NOW+Iltickms;
+
+	ic->rexmit++;
+}
+
+// complain if two numbers not within an hour of each other
+#define Tfuture (1000*60*60)
+int
+later(ulong t1, ulong t2, char *x)
+{
+	int dt;
+
+	dt = t1 - t2;
+	if(dt > 0) {
+		if(x != nil && dt > Tfuture)
+			print("%s: way future %d\n", x, dt);
+		return 1;
+	}
+	if(dt < -Tfuture) {
+		if(x != nil)
+			print("%s: way past %d\n", x, -dt);
+		return 1;
+	}
+	return 0;
+}
+
+void
+ilackproc(void *x)
+{
+	Ilcb *ic;
+	Conv **s, *p;
+	Proto *il;
+
+	il = x;
+
+loop:
+	tsleep(&up->sleep, return0, 0, Iltickms);
+	for(s = il->conv; s && *s; s++) {
+		p = *s;
+		ic = (Ilcb*)p->ptcl;
+
+		switch(ic->state) {
+		case Ilclosed:
+		case Illistening:
+			break;
+		case Ilclosing:
+			if(later(NOW, ic->timeout, "timeout0")) {
+				if(ic->rexmit > MaxRexmit){
+					ilhangup(p, nil);
+					break;
+				}
+				ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0);
+				ilbackoff(ic);
+			}
+			break;
+
+		case Ilsyncee:
+		case Ilsyncer:
+			if(later(NOW, ic->timeout, "timeout1")) {
+				if(ic->rexmit > MaxRexmit){
+					ilhangup(p, etime);
+					break;
+				}
+				ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0);
+				ilbackoff(ic);
+			}
+			break;
+
+		case Ilestablished:
+			if(ic->recvd != ic->acksent)
+			if(later(NOW, ic->acktime, "acktime"))
+				ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0);
+
+			if(later(NOW, ic->querytime, "querytime")){
+				if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){
+					netlog(il->f, Logil, "il: hangup: deathtime\n");
+					ilhangup(p, etime);
+					break;
+				}
+				ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+				ic->querytime = NOW + QueryTime;
+			}
+
+			if(ic->unacked != nil)
+			if(later(NOW, ic->timeout, "timeout2")) {
+				if(ic->rexmit > MaxRexmit){
+					netlog(il->f, Logil, "il: hangup: too many rexmits\n");
+					ilhangup(p, etime);
+					break;
+				}
+				ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+				ic->rxquery++;
+				ilbackoff(ic);
+			}
+			break;
+		}
+	}
+	goto loop;
+}
+
+void
+ilcbinit(Ilcb *ic)
+{
+	ic->start = nrand(0x1000000);
+	ic->next = ic->start+1;
+	ic->recvd = 0;
+	ic->window = Defaultwin;
+	ic->unackedbytes = 0;
+	ic->unacked = nil;
+	ic->outoforder = nil;
+	ic->rexmit = 0;
+	ic->rxtot = 0;
+	ic->rxquery = 0;
+	ic->qtx = 1;
+	ic->fasttimeout = 0;
+
+	/* timers */
+	ic->delay = DefRtt<<LogAGain;
+	ic->mdev = DefRtt<<LogDGain;
+	ic->rate = DefByteRate<<LogAGain;
+	ic->querytime = NOW + QueryTime;
+	ic->lastrecv = NOW;	/* or we'll timeout right away */
+	ilsettimeout(ic);
+}
+
+char*
+ilstart(Conv *c, int type, int fasttimeout)
+{
+	Ilcb *ic;
+	Ilpriv *ipriv;
+	char kpname[KNAMELEN];
+
+	ipriv = c->p->priv;
+
+	if(ipriv->ackprocstarted == 0){
+		qlock(&ipriv->apl);
+		if(ipriv->ackprocstarted == 0){
+			sprint(kpname, "#I%dilack", c->p->f->dev);
+			kproc(kpname, ilackproc, c->p, 0);
+			ipriv->ackprocstarted = 1;
+		}
+		qunlock(&ipriv->apl);
+	}
+
+	ic = (Ilcb*)c->ptcl;
+	ic->conv = c;
+
+	if(ic->state != Ilclosed)
+		return nil;
+
+	ilcbinit(ic);
+
+	if(fasttimeout){
+		/* timeout if we can't connect quickly */
+		ic->fasttimeout = 1;
+		ic->timeout = NOW+Iltickms;
+		ic->rexmit = MaxRexmit - 4;
+	};
+
+	switch(type) {
+	default:
+		netlog(c->p->f, Logil, "il: start: type %d\n", type);
+		break;
+	case IL_LISTEN:
+		ic->state = Illistening;
+		iphtadd(&ipriv->ht, c);
+		break;
+	case IL_CONNECT:
+		ic->state = Ilsyncer;
+		iphtadd(&ipriv->ht, c);
+		ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+		break;
+	}
+
+	return nil;
+}
+
+void
+ilfreeq(Ilcb *ic)
+{
+	Block *bp, *next;
+
+	qlock(&ic->ackq);
+	for(bp = ic->unacked; bp; bp = next) {
+		next = bp->list;
+		freeblist(bp);
+	}
+	ic->unacked = nil;
+	qunlock(&ic->ackq);
+
+	qlock(&ic->outo);
+	for(bp = ic->outoforder; bp; bp = next) {
+		next = bp->list;
+		freeblist(bp);
+	}
+	ic->outoforder = nil;
+	qunlock(&ic->outo);
+}
+
+void
+iladvise(Proto *il, Block *bp, char *msg)
+{
+	Ilhdr *h;
+	Ilcb *ic;		
+	uchar source[IPaddrlen], dest[IPaddrlen];
+	ushort psource;
+	Conv *s, **p;
+
+	h = (Ilhdr*)(bp->rp);
+
+	v4tov6(dest, h->dst);
+	v4tov6(source, h->src);
+	psource = nhgets(h->ilsrc);
+
+
+	/* Look for a connection, unfortunately the destination port is missing */
+	qlock(il);
+	for(p = il->conv; *p; p++) {
+		s = *p;
+		if(s->lport == psource)
+		if(ipcmp(s->laddr, source) == 0)
+		if(ipcmp(s->raddr, dest) == 0){
+			qunlock(il);
+			ic = (Ilcb*)s->ptcl;
+			switch(ic->state){
+			case Ilsyncer:
+				ilhangup(s, msg);
+				break;
+			}
+			freeblist(bp);
+			return;
+		}
+	}
+	qunlock(il);
+	freeblist(bp);
+}
+
+int
+ilnextqt(Ilcb *ic)
+{
+	int x;
+
+	qlock(&ic->ackq);
+	x = ic->qtx;
+	if(++x > Nqt)
+		x = 1;
+	ic->qtx = x;
+	ic->qt[x] = ic->next-1;	/* highest xmitted packet */
+	ic->qt[0] = ic->qt[x];	/* compatibility with old implementations */
+	qunlock(&ic->ackq);
+
+	return x;
+}
+
+/* calculate scale constants that converts fast ticks to ms (more or less) */
+static void
+inittimescale(void)
+{
+	uvlong hz;
+
+	fastticks(&hz);
+	if(hz > 1000){
+		scalediv = hz/1000;
+		scalemul = 1;
+	} else {
+		scalediv = 1;
+		scalemul = 1000/hz;
+	}
+}
+
+int
+ilgc(Proto *il)
+{
+	return natgc(il->ipproto);
+}
+
+void
+ilinit(Fs *f)
+{
+	Proto *il;
+
+	inittimescale();
+
+	il = smalloc(sizeof(Proto));
+	il->priv = smalloc(sizeof(Ilpriv));
+	il->name = "il";
+	il->connect = ilconnect;
+	il->announce = ilannounce;
+	il->state = ilstate;
+	il->create = ilcreate;
+	il->close = ilclose;
+	il->rcv = iliput;
+	il->ctl = nil;
+	il->advise = iladvise;
+	il->stats = ilxstats;
+	il->inuse = ilinuse;
+	il->gc = ilgc;
+	il->ipproto = IP_ILPROTO;
+	il->nc = scalednconv();
+	il->ptclsize = sizeof(Ilcb);
+	Fsproto(f, il);
+}
--- /dev/null
+++ b/os/ip.original/ip.c
@@ -1,0 +1,797 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+
+typedef struct IP		IP;
+typedef struct Fragment4	Fragment4;
+typedef struct Fragment6	Fragment6;
+typedef struct Ipfrag		Ipfrag;
+
+enum
+{
+	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
+	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
+	IP_HLEN4	= 0x05,		/* Header length in words */
+	IP_DF		= 0x4000,	/* Don't fragment */
+	IP_MF		= 0x2000,	/* More fragments */
+	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
+	IP_MAX		= 64*1024,	/* Maximum Internet packet size */
+};
+
+#define BLKIPVER(xp)	(((Ip4hdr*)((xp)->rp))->vihl&0xF0)
+
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nstats,
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+	ulong		stats[Nstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
+static char *statnames[] =
+{
+[Forwarding]	"Forwarding",
+[DefaultTTL]	"DefaultTTL",
+[InReceives]	"InReceives",
+[InHdrErrors]	"InHdrErrors",
+[InAddrErrors]	"InAddrErrors",
+[ForwDatagrams]	"ForwDatagrams",
+[InUnknownProtos]	"InUnknownProtos",
+[InDiscards]	"InDiscards",
+[InDelivers]	"InDelivers",
+[OutRequests]	"OutRequests",
+[OutDiscards]	"OutDiscards",
+[OutNoRoutes]	"OutNoRoutes",
+[ReasmTimeout]	"ReasmTimeout",
+[ReasmReqds]	"ReasmReqds",
+[ReasmOKs]	"ReasmOKs",
+[ReasmFails]	"ReasmFails",
+[FragOKs]	"FragOKs",
+[FragFails]	"FragFails",
+[FragCreates]	"FragCreates",
+};
+
+#define BLKIP(xp)	((Ip4hdr*)((xp)->rp))
+/*
+ * This sleazy macro relies on the media header size being
+ * larger than sizeof(Ipfrag). ipreassemble checks this is true
+ */
+#define BKFG(xp)	((Ipfrag*)((xp)->base))
+
+ushort		ipcsum(uchar*);
+Block*		ip4reassemble(IP*, int, Block*, Ip4hdr*);
+void		ipfragfree4(IP*, Fragment4*);
+Fragment4*	ipfragallo4(IP*);
+
+
+void
+ip_init_6(Fs *f)
+{
+	V6params *v6p;
+
+	v6p = smalloc(sizeof(V6params));
+	
+	v6p->rp.mflag		= 0;		// default not managed
+	v6p->rp.oflag		= 0;
+	v6p->rp.maxraint	= 600000;	// millisecs
+	v6p->rp.minraint	= 200000;
+	v6p->rp.linkmtu		= 0;		// no mtu sent
+	v6p->rp.reachtime	= 0;
+	v6p->rp.rxmitra		= 0;
+	v6p->rp.ttl		= MAXTTL;
+	v6p->rp.routerlt	= 3*(v6p->rp.maxraint);	
+
+	v6p->hp.rxmithost	= 1000;		// v6 RETRANS_TIMER
+
+	v6p->cdrouter 		= -1;
+
+	f->v6p			= v6p;
+
+}
+
+void
+initfrag(IP *ip, int size)
+{
+	Fragment4 *fq4, *eq4;
+	Fragment6 *fq6, *eq6;
+
+	ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size);
+	if(ip->fragfree4 == nil)
+		panic("initfrag");
+
+	eq4 = &ip->fragfree4[size];
+	for(fq4 = ip->fragfree4; fq4 < eq4; fq4++)
+		fq4->next = fq4+1;
+
+	ip->fragfree4[size-1].next = nil;
+
+	ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size);
+	if(ip->fragfree6 == nil)
+		panic("initfrag");
+
+	eq6 = &ip->fragfree6[size];
+	for(fq6 = ip->fragfree6; fq6 < eq6; fq6++)
+		fq6->next = fq6+1;
+
+	ip->fragfree6[size-1].next = nil;
+}
+
+void
+ip_init(Fs *f)
+{
+	IP *ip;
+
+	ip = smalloc(sizeof(IP));
+	initfrag(ip, 100);
+	f->ip = ip;
+
+	ip_init_6(f);
+}
+
+void
+iprouting(Fs *f, int on)
+{
+	f->ip->iprouting = on;
+	if(f->ip->iprouting==0)
+		f->ip->stats[Forwarding] = 2;
+	else
+		f->ip->stats[Forwarding] = 1;	
+}
+
+int
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+	Ipifc *ifc;
+	uchar *gate;
+	ulong fragoff;
+	Block *xp, *nb;
+	Ip4hdr *eh, *feh;
+	int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
+	Route *r, *sr;
+	IP *ip;
+	int rv = 0;
+
+	ip = f->ip;
+
+	/* Fill out the ip header */
+	eh = (Ip4hdr*)(bp->rp);
+
+	ip->stats[OutRequests]++;
+
+	/* Number of uchars in data and ip header to write */
+	len = blocklen(bp);
+
+	if(gating){
+		chunk = nhgets(eh->length);
+		if(chunk > len){
+			ip->stats[OutDiscards]++;
+			netlog(f, Logip, "short gated packet\n");
+			goto free;
+		}
+		if(chunk < len)
+			len = chunk;
+	}
+	if(len >= IP_MAX){
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+		goto free;
+	}
+
+	r = v4lookup(f, eh->dst, c);
+	if(r == nil){
+		ip->stats[OutNoRoutes]++;
+		netlog(f, Logip, "no interface %V\n", eh->dst);
+		rv = -1;
+		goto free;
+	}
+
+	ifc = r->ifc;
+	if(r->type & (Rifc|Runi))
+		gate = eh->dst;
+	else
+	if(r->type & (Rbcast|Rmulti)) {
+		gate = eh->dst;
+		sr = v4lookup(f, eh->src, nil);
+		if(sr != nil && (sr->type & Runi))
+			ifc = sr->ifc;
+	}
+	else
+		gate = r->v4.gate;
+
+	if(!gating)
+		eh->vihl = IP_VER4|IP_HLEN4;
+	eh->ttl = ttl;
+	if(!gating)
+		eh->tos = tos;
+
+	if(!canrlock(ifc))
+		goto free;
+	if(waserror()){
+		runlock(ifc);
+		nexterror();
+	}
+	if(ifc->m == nil)
+		goto raise;
+
+	/* Output NAT */
+	if(nato(bp, ifc, f) != 0)
+		goto raise;
+
+	/* If we dont need to fragment just send it */
+	medialen = ifc->maxtu - ifc->m->hsize;
+	if(len <= medialen) {
+		if(!gating)
+			hnputs(eh->id, incref(&ip->id4));
+		hnputs(eh->length, len);
+		if(!gating){
+			eh->frag[0] = 0;
+			eh->frag[1] = 0;
+		}
+		eh->cksum[0] = 0;
+		eh->cksum[1] = 0;
+		hnputs(eh->cksum, ipcsum(&eh->vihl));
+		ifc->m->bwrite(ifc, bp, V4, gate);
+		runlock(ifc);
+		poperror();
+		return 0;
+	}
+
+if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
+
+	if(eh->frag[0] & (IP_DF>>8)){
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		icmpcantfrag(f, bp, medialen);
+		netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+		goto raise;
+	}
+
+	seglen = (medialen - IP4HDR) & ~7;
+	if(seglen < 8){
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+		goto raise;
+	}
+
+	dlen = len - IP4HDR;
+	xp = bp;
+	if(gating)
+		lid = nhgets(eh->id);
+	else
+		lid = incref(&ip->id4);
+
+	offset = IP4HDR;
+	while(xp != nil && offset && offset >= BLEN(xp)) {
+		offset -= BLEN(xp);
+		xp = xp->next;
+	}
+	xp->rp += offset;
+
+	if(gating)
+		fragoff = nhgets(eh->frag)<<3;
+	else
+		fragoff = 0;
+	dlen += fragoff;
+	for(; fragoff < dlen; fragoff += seglen) {
+		nb = allocb(IP4HDR+seglen);
+		feh = (Ip4hdr*)(nb->rp);
+
+		memmove(nb->wp, eh, IP4HDR);
+		nb->wp += IP4HDR;
+
+		if((fragoff + seglen) >= dlen) {
+			seglen = dlen - fragoff;
+			hnputs(feh->frag, fragoff>>3);
+		}
+		else	
+			hnputs(feh->frag, (fragoff>>3)|IP_MF);
+
+		hnputs(feh->length, seglen + IP4HDR);
+		hnputs(feh->id, lid);
+
+		/* Copy up the data area */
+		chunk = seglen;
+		while(chunk) {
+			if(!xp) {
+				ip->stats[OutDiscards]++;
+				ip->stats[FragFails]++;
+				freeblist(nb);
+				netlog(f, Logip, "!xp: chunk %d\n", chunk);
+				goto raise;
+			}
+			blklen = chunk;
+			if(BLEN(xp) < chunk)
+				blklen = BLEN(xp);
+			memmove(nb->wp, xp->rp, blklen);
+			nb->wp += blklen;
+			xp->rp += blklen;
+			chunk -= blklen;
+			if(xp->rp == xp->wp)
+				xp = xp->next;
+		} 
+
+		feh->cksum[0] = 0;
+		feh->cksum[1] = 0;
+		hnputs(feh->cksum, ipcsum(&feh->vihl));
+		ifc->m->bwrite(ifc, nb, V4, gate);
+		ip->stats[FragCreates]++;
+	}
+	ip->stats[FragOKs]++;
+raise:
+	runlock(ifc);
+	poperror();
+free:
+	freeblist(bp);
+	return rv;
+}
+
+void
+ipiput4(Fs *f, Ipifc *ifc, Block *bp)
+{
+	int hl;
+	int hop, tos, proto, olen;
+	Ip4hdr *h;
+	Proto *p;
+	ushort frag;
+	int notforme;
+	uchar *dp, v6dst[IPaddrlen];
+	IP *ip;
+	Route *r;
+
+	if(BLKIPVER(bp) != IP_VER4) {
+		ipiput6(f, ifc, bp);
+		return;
+	}
+
+	ip = f->ip;
+	ip->stats[InReceives]++;
+
+	/*
+	 *  Ensure we have all the header info in the first
+	 *  block.  Make life easier for other protocols by
+	 *  collecting up to the first 64 bytes in the first block.
+	 */
+	if(BLEN(bp) < 64) {
+		hl = blocklen(bp);
+		if(hl < IP4HDR)
+			hl = IP4HDR;
+		if(hl > 64)
+			hl = 64;
+		bp = pullupblock(bp, hl);
+		if(bp == nil)
+			return;
+	}
+
+	h = (Ip4hdr*)(bp->rp);
+
+	/* Input NAT */
+	nati(bp, ifc);
+
+	/* dump anything that whose header doesn't checksum */
+	if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "ip: checksum error %V\n", h->src);
+		freeblist(bp);
+		return;
+	}
+	v4tov6(v6dst, h->dst);
+	notforme = ipforme(f, v6dst) == 0;
+
+	/* Check header length and version */
+	if((h->vihl&0x0F) != IP_HLEN4) {
+		hl = (h->vihl&0xF)<<2;
+		if(hl < (IP_HLEN4<<2)) {
+			ip->stats[InHdrErrors]++;
+			netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
+			freeblist(bp);
+			return;
+		}
+	  /* If this is not routed strip off the options */
+		if(notforme == 0) {
+			olen = nhgets(h->length);
+			dp = bp->rp + (hl - (IP_HLEN4<<2));
+			memmove(dp, h, IP_HLEN4<<2);
+			bp->rp = dp;
+			h = (Ip4hdr*)(bp->rp);
+			h->vihl = (IP_VER4|IP_HLEN4);
+			hnputs(h->length, olen-hl+(IP_HLEN4<<2));
+		}
+	}
+
+	/* route */
+	if(notforme) {
+		Conv conv;
+
+		if(!ip->iprouting){
+			freeb(bp);
+			return;
+		}
+
+		/* don't forward to source's network */
+		conv.r = nil;
+		r = v4lookup(f, h->dst, &conv);
+		if(r == nil || r->ifc == ifc){
+			ip->stats[OutDiscards]++;
+			freeblist(bp);
+			return;
+		}
+
+		/* don't forward if packet has timed out */
+		hop = h->ttl;
+		if(hop < 1) {
+			ip->stats[InHdrErrors]++;
+			icmpttlexceeded(f, ifc->lifc->local, bp);
+			freeblist(bp);
+			return;
+		}
+
+		/* reassemble if the interface expects it */
+if(r->ifc == nil) panic("nil route rfc");
+		if(r->ifc->reassemble){
+			frag = nhgets(h->frag);
+			if(frag) {
+				h->tos = 0;
+				if(frag & IP_MF)
+					h->tos = 1;
+				bp = ip4reassemble(ip, frag, bp, h);
+				if(bp == nil)
+					return;
+				h = (Ip4hdr*)(bp->rp);
+			}
+		}
+
+		ip->stats[ForwDatagrams]++;
+		tos = h->tos;
+		hop = h->ttl;
+		ipoput4(f, bp, 1, hop - 1, tos, &conv);
+		return;
+	}
+
+	frag = nhgets(h->frag);
+	if(frag) {
+		h->tos = 0;
+		if(frag & IP_MF)
+			h->tos = 1;
+		bp = ip4reassemble(ip, frag, bp, h);
+		if(bp == nil)
+			return;
+		h = (Ip4hdr*)(bp->rp);
+	}
+
+	/* don't let any frag info go up the stack */
+	h->frag[0] = 0;
+	h->frag[1] = 0;
+
+	proto = h->proto;
+	p = Fsrcvpcol(f, proto);
+	if(p != nil && p->rcv != nil) {
+		ip->stats[InDelivers]++;
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+	ip->stats[InDiscards]++;
+	ip->stats[InUnknownProtos]++;
+	freeblist(bp);
+}
+
+int
+ipstats(Fs *f, char *buf, int len)
+{
+	IP *ip;
+	char *p, *e;
+	int i;
+
+	ip = f->ip;
+	ip->stats[DefaultTTL] = MAXTTL;
+
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+	return p - buf;
+}
+
+Block*
+ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+{
+	int fend;
+	ushort id;
+	Fragment4 *f, *fnext;
+	ulong src, dst;
+	Block *bl, **l, *last, *prev;
+	int ovlap, len, fragsize, pktposn;
+
+	src = nhgetl(ih->src);
+	dst = nhgetl(ih->dst);
+	id = nhgets(ih->id);
+
+	/*
+	 *  block lists are too hard, pullupblock into a single block
+	 */
+	if(bp->next){
+		bp = pullupblock(bp, blocklen(bp));
+		ih = (Ip4hdr*)(bp->rp);
+	}
+
+	qlock(&ip->fraglock4);
+
+	/*
+	 *  find a reassembly queue for this fragment
+	 */
+	for(f = ip->flisthead4; f; f = fnext){
+		fnext = f->next;	/* because ipfragfree4 changes the list */
+		if(f->src == src && f->dst == dst && f->id == id)
+			break;
+		if(f->age < NOW){
+			ip->stats[ReasmTimeout]++;
+			ipfragfree4(ip, f);
+		}
+	}
+
+	/*
+	 *  if this isn't a fragmented packet, accept it
+	 *  and get rid of any fragments that might go
+	 *  with it.
+	 */
+	if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+		if(f != nil) {
+			ipfragfree4(ip, f);
+			ip->stats[ReasmFails]++;
+		}
+		qunlock(&ip->fraglock4);
+		return bp;
+	}
+
+	if(bp->base+sizeof(Ipfrag) >= bp->rp){
+		bp = padblock(bp, sizeof(Ipfrag));
+		bp->rp += sizeof(Ipfrag);
+	}
+
+	BKFG(bp)->foff = offset<<3;
+	BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+
+	/* First fragment allocates a reassembly queue */
+	if(f == nil) {
+		f = ipfragallo4(ip);
+		f->id = id;
+		f->src = src;
+		f->dst = dst;
+
+		f->blist = bp;
+
+		qunlock(&ip->fraglock4);
+		ip->stats[ReasmReqds]++;
+		return nil;
+	}
+
+	/*
+	 *  find the new fragment's position in the queue
+	 */
+	prev = nil;
+	l = &f->blist;
+	bl = f->blist;
+	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+		prev = bl;
+		l = &bl->next;
+		bl = bl->next;
+	}
+
+	/* Check overlap of a previous fragment - trim away as necessary */
+	if(prev) {
+		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+		if(ovlap > 0) {
+			if(ovlap >= BKFG(bp)->flen) {
+				freeblist(bp);
+				qunlock(&ip->fraglock4);
+				return nil;
+			}
+			BKFG(prev)->flen -= ovlap;
+		}
+	}
+
+	/* Link onto assembly queue */
+	bp->next = *l;
+	*l = bp;
+
+	/* Check to see if succeeding segments overlap */
+	if(bp->next) {
+		l = &bp->next;
+		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		/* Take completely covered segments out */
+		while(*l) {
+			ovlap = fend - BKFG(*l)->foff;
+			if(ovlap <= 0)
+				break;
+			if(ovlap < BKFG(*l)->flen) {
+				BKFG(*l)->flen -= ovlap;
+				BKFG(*l)->foff += ovlap;
+				/* move up ih hdrs */
+				memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
+				(*l)->rp += ovlap;
+				break;
+			}
+			last = (*l)->next;
+			(*l)->next = nil;
+			freeblist(*l);
+			*l = last;
+		}
+	}
+
+	/*
+	 *  look for a complete packet.  if we get to a fragment
+	 *  without IP_MF set, we're done.
+	 */
+	pktposn = 0;
+	for(bl = f->blist; bl; bl = bl->next) {
+		if(BKFG(bl)->foff != pktposn)
+			break;
+		if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
+			bl = f->blist;
+			len = nhgets(BLKIP(bl)->length);
+			bl->wp = bl->rp + len;
+
+			/* Pullup all the fragment headers and
+			 * return a complete packet
+			 */
+			for(bl = bl->next; bl; bl = bl->next) {
+				fragsize = BKFG(bl)->flen;
+				len += fragsize;
+				bl->rp += IP4HDR;
+				bl->wp = bl->rp + fragsize;
+			}
+
+			bl = f->blist;
+			f->blist = nil;
+			ipfragfree4(ip, f);
+			ih = BLKIP(bl);
+			hnputs(ih->length, len);
+			qunlock(&ip->fraglock4);
+			ip->stats[ReasmOKs]++;
+			return bl;		
+		}
+		pktposn += BKFG(bl)->flen;
+	}
+	qunlock(&ip->fraglock4);
+	return nil;
+}
+
+/*
+ * ipfragfree4 - Free a list of fragments - assume hold fraglock4
+ */
+void
+ipfragfree4(IP *ip, Fragment4 *frag)
+{
+	Fragment4 *fl, **l;
+
+	if(frag->blist)
+		freeblist(frag->blist);
+
+	frag->src = 0;
+	frag->id = 0;
+	frag->blist = nil;
+
+	l = &ip->flisthead4;
+	for(fl = *l; fl; fl = fl->next) {
+		if(fl == frag) {
+			*l = frag->next;
+			break;
+		}
+		l = &fl->next;
+	}
+
+	frag->next = ip->fragfree4;
+	ip->fragfree4 = frag;
+
+}
+
+/*
+ * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
+ */
+Fragment4 *
+ipfragallo4(IP *ip)
+{
+	Fragment4 *f;
+
+	while(ip->fragfree4 == nil) {
+		/* free last entry on fraglist */
+		for(f = ip->flisthead4; f->next; f = f->next)
+			;
+		ipfragfree4(ip, f);
+	}
+	f = ip->fragfree4;
+	ip->fragfree4 = f->next;
+	f->next = ip->flisthead4;
+	ip->flisthead4 = f;
+	f->age = NOW + 30000;
+
+	return f;
+}
+
+ushort
+ipcsum(uchar *addr)
+{
+	int len;
+	ulong sum;
+
+	sum = 0;
+	len = (addr[0]&0xf)<<2;
+
+	while(len > 0) {
+		sum += addr[0]<<8 | addr[1] ;
+		len -= 2;
+		addr += 2;
+	}
+
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+
+	return (sum^0xffff);
+}
--- /dev/null
+++ b/os/ip.original/ip.h
@@ -1,0 +1,709 @@
+typedef struct	Conv	Conv;
+typedef struct	Fs	Fs;
+typedef union	Hwaddr	Hwaddr;
+typedef struct	IP	IP;
+typedef struct	IPaux	IPaux;
+typedef struct	Ipself	Ipself;
+typedef struct	Ipselftab	Ipselftab;
+typedef struct	Iplink	Iplink;
+typedef struct	Iplifc	Iplifc;
+typedef struct	Ipmulti	Ipmulti;
+typedef struct	IProuter IProuter;
+typedef struct	Ipifc	Ipifc;
+typedef struct	Iphash	Iphash;
+typedef struct	Ipht	Ipht;
+typedef struct	Netlog	Netlog;
+typedef struct	Ifclog	Ifclog;
+typedef struct	Medium	Medium;
+typedef struct	Proto	Proto;
+typedef struct	Arpent	Arpent;
+typedef struct	Arp Arp;
+typedef struct	Route	Route;
+
+typedef struct	Routerparams	Routerparams;
+typedef struct 	Hostparams	Hostparams;
+typedef struct 	V6router	V6router;
+typedef struct	V6params	V6params;
+
+typedef struct Ip4hdr     Ip4hdr;
+typedef struct Nat	Nat;
+
+#pragma incomplete Arp
+#pragma	incomplete Ifclog
+#pragma incomplete Ipself
+#pragma incomplete Ipselftab
+#pragma incomplete IP
+#pragma incomplete Netlog
+
+enum
+{
+	Addrlen=	64,
+	Maxproto=	20,
+	Nhash=		64,
+	Maxincall=	5,
+	Nchans=		16383,
+	MAClen=		16,		/* longest mac address */
+
+	MAXTTL=		255,
+	DFLTTOS=	0,
+
+	IPaddrlen=	16,
+	IPv4addrlen=	4,
+	IPv4off=	12,
+	IPllen=		4,
+
+	/* ip versions */
+	V4=		4,
+	V6=		6,
+	IP_VER4= 	0x40,
+	IP_VER6=	0x60,
+
+	/* 2^Lroot trees in the root table */
+	Lroot=		10,
+
+	Maxpath =	64,
+};
+
+enum
+{
+	Idle=		0,
+	Announcing=	1,
+	Announced=	2,
+	Connecting=	3,
+	Connected=	4,
+};
+
+/* on the wire packet header */
+struct Ip4hdr
+{
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* ip->identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	ttl;      	/* Time to live */
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* Header checksum */
+	uchar	src[4];		/* IP source */
+	uchar	dst[4];		/* IP destination */
+	uchar	data[1];	/* start of data */
+};
+
+/*
+ *  one per conversation directory
+ */
+struct Conv
+{
+	QLock;
+
+	int	x;			/* conversation index */
+	Proto*	p;
+
+	int	restricted;		/* remote port is restricted */
+	uint	ttl;			/* max time to live */
+	uint	tos;			/* type of service */
+	int	ignoreadvice;		/* don't terminate connection on icmp errors */
+
+	uchar	ipversion;
+	uchar	laddr[IPaddrlen];	/* local IP address */
+	uchar	raddr[IPaddrlen];	/* remote IP address */
+	ushort	lport;			/* local port number */
+	ushort	rport;			/* remote port number */
+
+	char	*owner;			/* protections */
+	int	perm;
+	int	inuse;			/* opens of listen/data/ctl */
+	int	length;
+	int	state;
+
+	/* udp specific */
+	int	headers;		/* data src/dst headers in udp */
+	int	reliable;		/* true if reliable udp */
+
+	Conv*	incall;			/* calls waiting to be listened for */
+	Conv*	next;
+
+	Queue*	rq;			/* queued data waiting to be read */
+	Queue*	wq;			/* queued data waiting to be written */
+	Queue*	eq;			/* returned error packets */
+	Queue*	sq;			/* snooping queue */
+	Ref	snoopers;		/* number of processes with snoop open */
+
+	Rendez	cr;
+	char	cerr[ERRMAX];
+
+	QLock	listenq;
+	Rendez	listenr;
+
+	Ipmulti	*multi;			/* multicast bindings for this interface */
+
+	void*	ptcl;			/* protocol specific stuff */
+
+	Route	*r;			/* last route used */
+	ulong	rgen;			/* routetable generation for *r */
+};
+
+struct Medium
+{
+	char	*name;
+	int	hsize;		/* medium header size */
+	int	mintu;		/* default min mtu */
+	int	maxtu;		/* default max mtu */
+	int	maclen;		/* mac address length  */
+	void	(*bind)(Ipifc*, int, char**);
+	void	(*unbind)(Ipifc*);
+	void	(*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip);
+
+	/* for arming interfaces to receive multicast */
+	void	(*addmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+	void	(*remmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+	/* process packets written to 'data' */
+	void	(*pktin)(Fs *f, Ipifc *ifc, Block *bp);
+
+	/* routes for router boards */
+	void	(*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+	void	(*remroute)(Ipifc *ifc, int, uchar*, uchar*);
+	void	(*flushroutes)(Ipifc *ifc);
+
+	/* for routing multicast groups */
+	void	(*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+	void	(*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+	/* address resolution */
+	void	(*ares)(Fs*, int, uchar*, uchar*, int, int);	/* resolve */
+	void	(*areg)(Ipifc*, uchar*);			/* register */
+
+	/* v6 address generation */
+	void	(*pref2addr)(uchar *pref, uchar *ea);
+
+	int	unbindonclose;	/* if non-zero, unbind on last close */
+};
+
+/* logical interface associated with a physical one */
+struct Iplifc
+{
+	uchar	local[IPaddrlen];
+	uchar	mask[IPaddrlen];
+	uchar	remote[IPaddrlen];
+	uchar	net[IPaddrlen];
+	uchar	tentative;	/* =1 => v6 dup disc on, =0 => confirmed unique */
+	uchar	onlink;		/* =1 => onlink, =0 offlink. */
+	uchar	autoflag;	/* v6 autonomous flag */
+	long 	validlt;	/* v6 valid lifetime */
+	long 	preflt;		/* v6 preferred lifetime */
+	long	origint;	/* time when addr was added */
+	Iplink	*link;		/* addresses linked to this lifc */
+	Iplifc	*next;
+};
+
+/* binding twixt Ipself and Iplifc */
+struct Iplink
+{
+	Ipself	*self;
+	Iplifc	*lifc;
+	Iplink	*selflink;	/* next link for this local address */
+	Iplink	*lifclink;	/* next link for this ifc */
+	ulong	expire;
+	Iplink	*next;		/* free list */
+	int	ref;
+};
+
+/* rfc 2461, pp.40--43. */
+
+/* default values, one per stack */
+struct Routerparams {
+	int	mflag;
+	int	oflag;
+	int 	maxraint;
+	int	minraint;
+	int	linkmtu;
+	int	reachtime;
+	int	rxmitra;
+	int	ttl;
+	int	routerlt;	
+};
+
+struct Hostparams {
+	int	rxmithost;
+};
+
+struct Ipifc
+{
+	RWlock;
+	
+	Conv	*conv;		/* link to its conversation structure */
+	char	dev[64];	/* device we're attached to */
+	Medium	*m;		/* Media pointer */
+	int	maxtu;		/* Maximum transfer unit */
+	int	mintu;		/* Minumum tranfer unit */
+	int	mbps;		/* megabits per second */
+	void	*arg;		/* medium specific */
+	int	reassemble;	/* reassemble IP packets before forwarding */
+
+	/* these are used so that we can unbind on the fly */
+	Lock	idlock;
+	uchar	ifcid;		/* incremented each 'bind/unbind/add/remove' */
+	int	ref;		/* number of proc's using this ipifc */
+	Rendez	wait;		/* where unbinder waits for ref == 0 */
+	int	unbinding;
+
+	uchar	mac[MAClen];	/* MAC address */
+
+	Iplifc	*lifc;		/* logical interfaces on this physical one */
+
+	ulong	in, out;	/* message statistics */
+	ulong	inerr, outerr;	/* ... */
+
+	uchar	sendra6;	/* == 1 => send router advs on this ifc	*/
+	uchar	recvra6;	/* == 1 => recv router advs on this ifc */
+	Routerparams rp;	/* router parameters as in RFC 2461, pp.40--43. 
+					used only if node is router */
+};
+
+/*
+ *  one per multicast-lifc pair used by a Conv
+ */
+struct Ipmulti
+{
+	uchar	ma[IPaddrlen];
+	uchar	ia[IPaddrlen];
+	Ipmulti	*next;
+};
+
+/*
+ *  hash table for 2 ip addresses + 2 ports
+ */
+enum
+{
+	Nipht=		521,	/* convenient prime */
+
+	IPmatchexact=	0,	/* match on 4 tuple */
+	IPmatchany,		/* *!* */
+	IPmatchport,		/* *!port */
+	IPmatchaddr,		/* addr!* */
+	IPmatchpa,		/* addr!port */
+};
+struct Iphash
+{
+	Iphash	*next;
+	Conv	*c;
+	int	match;
+};
+struct Ipht
+{
+	Lock;
+	Iphash	*tab[Nipht];
+};
+void iphtadd(Ipht*, Conv*);
+void iphtrem(Ipht*, Conv*);
+Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
+
+/*
+ *  one per multiplexed protocol
+ */
+struct Proto
+{
+	QLock;
+	char*		name;		/* protocol name */
+	int		x;		/* protocol index */
+	int		ipproto;	/* ip protocol type */
+
+	char*		(*connect)(Conv*, char**, int);
+	char*		(*announce)(Conv*, char**, int);
+	char*		(*bind)(Conv*, char**, int);
+	int		(*state)(Conv*, char*, int);
+	void		(*create)(Conv*);
+	void		(*close)(Conv*);
+	void		(*rcv)(Proto*, Ipifc*, Block*);
+	char*		(*ctl)(Conv*, char**, int);
+	void		(*advise)(Proto*, Block*, char*);
+	int		(*stats)(Proto*, char*, int);
+	int		(*local)(Conv*, char*, int);
+	int		(*remote)(Conv*, char*, int);
+	int		(*inuse)(Conv*);
+	int		(*gc)(Proto*);	/* returns true if any conversations are freed */
+
+	Fs		*f;		/* file system this proto is part of */
+	Conv		**conv;		/* array of conversations */
+	int		ptclsize;	/* size of per protocol ctl block */
+	int		nc;		/* number of conversations */
+	int		ac;
+	Qid		qid;		/* qid for protocol directory */
+	ushort		nextport;
+	ushort		nextrport;
+
+	void		*priv;
+};
+
+/*
+ *  Stream for sending packets to user level
+ */
+struct IProuter {
+	QLock;
+	int	opens;
+	Queue	*q;
+};
+
+/*
+ *  one per IP protocol stack
+ */
+struct Fs
+{
+	RWlock;
+	int	dev;
+
+	int	np;
+	Proto*	p[Maxproto+1];		/* list of supported protocols */
+	Proto*	t2p[256];		/* vector of all protocols */
+	Proto*	ipifc;			/* kludge for ipifcremroute & ipifcaddroute */
+	Proto*	ipmux;			/* kludge for finding an ip multiplexor */
+
+	IP	*ip;
+	Ipselftab	*self;
+	Arp	*arp;
+	V6params	*v6p;
+	IProuter iprouter;
+
+	Route	*v4root[1<<Lroot];	/* v4 routing forest */
+	Route	*v6root[1<<Lroot];	/* v6 routing forest */
+	Route	*queue;			/* used as temp when reinjecting routes */
+
+	Netlog	*alog;
+	Ifclog	*ilog;
+
+	char	ndb[1024];		/* an ndb entry for this interface */
+	int	ndbvers;
+	long	ndbmtime;
+};
+
+/* one per default router known to host */
+struct V6router {
+	uchar	inuse;
+	Ipifc	*ifc;
+	int	ifcid;
+	uchar	routeraddr[IPaddrlen];
+	long	ltorigin;
+	Routerparams	rp;
+};
+
+struct V6params
+{
+	Routerparams	rp;		/* v6 params, one copy per node now */
+	Hostparams	hp;
+	V6router	v6rlist[3];	/* max 3 default routers, currently */
+	int		cdrouter;	/* uses only v6rlist[cdrouter] if   */ 
+					/* cdrouter >= 0. */
+};
+
+
+int	Fsconnected(Conv*, char*);
+Conv*	Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar);
+int	Fspcolstats(char*, int);
+int	Fsproto(Fs*, Proto*);
+int	Fsbuiltinproto(Fs*, uchar);
+Conv*	Fsprotoclone(Proto*, char*);
+Proto*	Fsrcvpcol(Fs*, uchar);
+Proto*	Fsrcvpcolx(Fs*, uchar);
+char*	Fsstdconnect(Conv*, char**, int);
+char*	Fsstdannounce(Conv*, char**, int);
+char*	Fsstdbind(Conv*, char**, int);
+ulong	scalednconv(void);
+void	closeconv(Conv*);
+
+/* 
+ *  logging
+ */
+enum
+{
+	Logip=		1<<1,
+	Logtcp=		1<<2,
+	Logfs=		1<<3,
+	Logil=		1<<4,
+	Logicmp=	1<<5,
+	Logudp=		1<<6,
+	Logcompress=	1<<7,
+	Logilmsg=	1<<8,
+	Loggre=		1<<9,
+	Logppp=		1<<10,
+	Logtcprxmt=	1<<11,
+	Logigmp=	1<<12,
+	Logudpmsg=	1<<13,
+	Logipmsg=	1<<14,
+	Logrudp=	1<<15,
+	Logrudpmsg=	1<<16,
+	Logesp=		1<<17,
+	Logtcpwin=	1<<18,
+	Lognat=		1<<19,
+};
+
+void	netloginit(Fs*);
+void	netlogopen(Fs*);
+void	netlogclose(Fs*);
+void	netlogctl(Fs*, char*, int);
+long	netlogread(Fs*, void*, ulong, long);
+void	netlog(Fs*, int, char*, ...);
+void	ifcloginit(Fs*);
+long	ifclogread(Fs*, Chan *,void*, ulong, long);
+void	ifclog(Fs*, uchar *, int);
+void	ifclogopen(Fs*, Chan*);
+void	ifclogclose(Fs*, Chan*);
+
+/*
+ *  iproute.c
+ */
+typedef	struct RouteTree RouteTree;
+typedef struct Routewalk Routewalk;
+typedef struct V4route V4route;
+typedef struct V6route V6route;
+
+enum
+{
+
+	/* type bits */
+	Rv4=		(1<<0),		/* this is a version 4 route */
+	Rifc=		(1<<1),		/* this route is a directly connected interface */
+	Rptpt=		(1<<2),		/* this route is a pt to pt interface */
+	Runi=		(1<<3),		/* a unicast self address */
+	Rbcast=		(1<<4),		/* a broadcast self address */
+	Rmulti=		(1<<5),		/* a multicast self address */
+	Rproxy=		(1<<6),		/* this route should be proxied */
+};
+
+struct Routewalk
+{
+	int	o;
+	int	h;
+	char*	p;
+	char*	e;
+	void*	state;
+	void	(*walk)(Route*, Routewalk*);
+};
+
+struct	RouteTree
+{
+	Route*	right;
+	Route*	left;
+	Route*	mid;
+	uchar	depth;
+	uchar	type;
+	uchar	ifcid;		/* must match ifc->id */
+	Ipifc	*ifc;
+	char	tag[4];
+	int	ref;
+};
+
+struct V4route
+{
+	ulong	address;
+	ulong	endaddress;
+	uchar	gate[IPv4addrlen];
+};
+
+struct V6route
+{
+	ulong	address[IPllen];
+	ulong	endaddress[IPllen];
+	uchar	gate[IPaddrlen];
+};
+
+struct Route
+{
+	RouteTree;
+
+	union {
+		V6route	v6;
+		V4route v4;
+	};
+};
+extern void	v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void	v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void	v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern void	v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern Route*	v4lookup(Fs *f, uchar *a, Conv *c);
+extern Route*	v6lookup(Fs *f, uchar *a, Conv *c);
+extern long	routeread(Fs *f, char*, ulong, int);
+extern long	routewrite(Fs *f, Chan*, char*, int);
+extern void	routetype(int, char*);
+extern void	ipwalkroutes(Fs*, Routewalk*);
+extern void	convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+
+/*
+ *  devip.c
+ */
+
+/*
+ *  Hanging off every ip channel's ->aux is the following structure.
+ *  It maintains the state used by devip and iproute.
+ */
+struct IPaux
+{
+	char	*owner;		/* the user that did the attach */
+	char	tag[4];
+};
+
+extern IPaux*	newipaux(char*, char*);
+extern void	setlport(Conv*);
+
+/*
+ *  arp.c
+ */
+struct Arpent
+{
+	uchar	ip[IPaddrlen];
+	uchar	mac[MAClen];
+	Medium	*type;			/* media type */
+	Arpent*	hash;
+	Block*	hold;
+	Block*	last;
+	uint	ctime;			/* time entry was created or refreshed */
+	uint	utime;			/* time entry was last used */
+	uchar	state;
+	Arpent	*nextrxt;		/* re-transmit chain */
+	uint	rtime;			/* time for next retransmission */
+	uchar	rxtsrem;
+	Ipifc	*ifc;
+	uchar	ifcid;			/* must match ifc->id */
+};
+
+extern void	arpinit(Fs*);
+extern int	arpread(Arp*, char*, ulong, int);
+extern int	arpwrite(Fs*, char*, int);
+extern Arpent*	arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
+extern void	arprelease(Arp*, Arpent *a);
+extern Block*	arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
+extern void	arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+
+/*
+ * ipaux.c
+ */
+
+extern int	myetheraddr(uchar*, char*);
+extern ulong	parseip(uchar*, char*);
+extern ulong	parseipmask(uchar*, char*);
+extern char*	v4parseip(uchar*, char*);
+extern void	maskip(uchar *from, uchar *mask, uchar *to);
+extern int	parsemac(uchar *to, char *from, int len);
+extern uchar*	defmask(uchar*);
+extern int	isv4(uchar*);
+extern void	v4tov6(uchar *v6, uchar *v4);
+extern int	v6tov4(uchar *v4, uchar *v6);
+extern int	eipfmt(Fmt*);
+
+#define	ipmove(x, y) memmove(x, y, IPaddrlen)
+#define	ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
+ 
+#define	ip4move(x, y) memmove(x, y, IPv4addrlen)
+#define	ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
+
+extern uchar IPv4bcast[IPaddrlen];
+extern uchar IPv4bcastobs[IPaddrlen];
+extern uchar IPv4allsys[IPaddrlen];
+extern uchar IPv4allrouter[IPaddrlen];
+extern uchar IPnoaddr[IPaddrlen];
+extern uchar v4prefix[IPaddrlen];
+extern uchar IPallbits[IPaddrlen];
+
+#define	NOW	TK2MS(MACHP(0)->ticks)
+
+/*
+ *  media
+ */
+extern Medium	ethermedium;
+extern Medium	nullmedium;
+extern Medium	pktmedium;
+extern Medium	tripmedium;
+
+/*
+ *  ipifc.c
+ */
+extern Medium*	ipfindmedium(char *name);
+extern void	addipmedium(Medium *med);
+extern int	ipforme(Fs*, uchar *addr);
+extern int	iptentative(Fs*, uchar *addr);
+extern int	ipisbm(uchar *);
+extern int	ipismulticast(uchar *);
+extern Ipifc*	findipifc(Fs*, uchar *remote, int type);
+extern void	findprimaryip(Fs*, uchar*);
+extern void	findlocalip(Fs*, uchar *local, uchar *remote);
+extern int	ipv4local(Ipifc *ifc, uchar *addr);
+extern int	ipv6local(Ipifc *ifc, uchar *addr);
+extern int	ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern Iplifc*	iplocalonifc(Ipifc *ifc, uchar *ip);
+extern int	ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
+extern int	ipismulticast(uchar *ip);
+extern int	ipisbooting(void);
+extern int	ipifccheckin(Ipifc *ifc, Medium *med);
+extern void	ipifccheckout(Ipifc *ifc);
+extern int	ipifcgrab(Ipifc *ifc);
+extern void	ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
+extern void	ipifcremroute(Fs*, int, uchar*, uchar*);
+extern void	ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
+extern void	ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
+extern char*	ipifcrem(Ipifc *ifc, char **argv, int argc);
+extern char*	ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
+extern long	ipselftabread(Fs*, char *a, ulong offset, int n);
+extern char*	ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
+extern void	ipsendra6(Fs *f, int on);
+
+/*
+ *  ip.c
+ */
+extern void	iprouting(Fs*, int);
+extern void	icmpnoconv(Fs*, Block*);
+extern void	icmpcantfrag(Fs*, Block*, int);
+extern void	icmpttlexceeded(Fs*, uchar*, Block*);
+extern ushort	ipcsum(uchar*);
+extern void	ipiput4(Fs*, Ipifc*, Block*);
+extern void	ipiput6(Fs*, Ipifc*, Block*);
+extern int	ipoput4(Fs*, Block*, int, int, int, Conv*);
+extern int	ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int	ipstats(Fs*, char*, int);
+extern ushort	ptclbsum(uchar*, int);
+extern ushort	ptclcsum(Block*, int, int);
+extern void	ip_init(Fs*);
+extern void	update_mtucache(uchar*, ulong);
+extern ulong	restrict_mtu(uchar*, ulong);
+
+/*
+ * bootp.c
+ */
+char*	(*bootp)(Ipifc*);
+int	(*bootpread)(char*, ulong, int);
+
+/*
+ *  iprouter.c
+ */
+void	useriprouter(Fs*, Ipifc*, Block*);
+void	iprouteropen(Fs*);
+void	iprouterclose(Fs*);
+long	iprouterread(Fs*, void*, int);
+
+/*
+ *  resolving inferno/plan9 differences
+ */
+Chan*		commonfdtochan(int, int, int, int);
+char*		commonuser(void);
+char*		commonerror(void);
+
+/*
+ * chandial.c
+ */
+extern Chan*	chandial(char*, char*, char*, Chan**);
+
+/*
+ *  global to all of the stack
+ */
+extern void	(*igmpreportfn)(Ipifc*, uchar*);
+
+/*
+ * nat.c
+ */
+extern int	nato(Block*, Ipifc*, Fs*);
+extern void	nati(Block*, Ipifc*);
+extern int	natgc(uchar);
+
+extern int	addnataddr(uchar*, uchar*, Iplifc*);
+extern int	removenataddr(uchar*, uchar*, Iplifc*);
+extern void	shownataddr(void);
+extern void flushnataddr(void);
--- /dev/null
+++ b/os/ip.original/ipaux.c
@@ -1,0 +1,729 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"ip.h"
+#include  "ipv6.h"
+
+/*
+ *  well known IP addresses
+ */
+uchar IPv4bcast[IPaddrlen] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff
+};
+uchar IPv4allsys[IPaddrlen] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0xff, 0xff,
+	0xe0, 0, 0, 0x01
+};
+uchar IPv4allrouter[IPaddrlen] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0xff, 0xff,
+	0xe0, 0, 0, 0x02
+};
+uchar IPallbits[IPaddrlen] = {
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff
+};
+
+uchar IPnoaddr[IPaddrlen];
+
+/*
+ *  prefix of all v4 addresses
+ */
+uchar v4prefix[IPaddrlen] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0xff, 0xff,
+	0, 0, 0, 0
+};
+
+
+char *v6hdrtypes[Maxhdrtype] =
+{
+	[HBH]		"HopbyHop",
+	[ICMP]		"ICMP",
+	[IGMP]		"IGMP",
+	[GGP]		"GGP",
+	[IPINIP]		"IP",
+	[ST]		"ST",
+	[TCP]		"TCP",
+	[UDP]		"UDP",
+	[ISO_TP4]	"ISO_TP4",
+	[RH]		"Routinghdr",
+	[FH]		"Fraghdr",
+	[IDRP]		"IDRP",
+	[RSVP]		"RSVP",
+	[AH]		"Authhdr",
+	[ESP]		"ESP",
+	[ICMPv6]	"ICMPv6",
+	[NNH]		"Nonexthdr",
+	[ISO_IP]	"ISO_IP",
+	[IGRP]		"IGRP",
+	[OSPF]		"OSPF",
+};
+
+/*
+ *  well known IPv6 addresses
+ */
+uchar v6Unspecified[IPaddrlen] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+uchar v6loopback[IPaddrlen] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x01
+};
+uchar v6linklocal[IPaddrlen] = {
+	0xfe, 0x80, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+uchar v6linklocalmask[IPaddrlen] = {
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6llpreflen = 8;	// link-local prefix length
+uchar v6sitelocal[IPaddrlen] = {
+	0xfe, 0xc0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+uchar v6sitelocalmask[IPaddrlen] = {
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6slpreflen = 6;	// site-local prefix length
+uchar v6glunicast[IPaddrlen] = {
+	0x08, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+uchar v6multicast[IPaddrlen] = {
+	0xff, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+uchar v6multicastmask[IPaddrlen] = {
+	0xff, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6mcpreflen = 1;	// multicast prefix length
+uchar v6allnodesN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x01
+};
+uchar v6allnodesNmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6aNpreflen = 2;	// all nodes (N) prefix
+uchar v6allnodesL[IPaddrlen] = {
+	0xff, 0x02, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x01
+};
+uchar v6allnodesLmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6aLpreflen = 2;	// all nodes (L) prefix
+uchar v6allroutersN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
+uchar v6allroutersL[IPaddrlen] = {
+	0xff, 0x02, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
+uchar v6allroutersS[IPaddrlen] = {
+	0xff, 0x05, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
+uchar v6solicitednode[IPaddrlen] = {
+	0xff, 0x02, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x01,
+	0xff, 0, 0, 0
+};
+uchar v6solicitednodemask[IPaddrlen] = {
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0x0, 0x0, 0x0
+};
+int v6snpreflen = 13;
+
+
+
+
+ushort
+ptclcsum(Block *bp, int offset, int len)
+{
+	uchar *addr;
+	ulong losum, hisum;
+	ushort csum;
+	int odd, blocklen, x;
+
+	/* Correct to front of data area */
+	while(bp != nil && offset && offset >= BLEN(bp)) {
+		offset -= BLEN(bp);
+		bp = bp->next;
+	}
+	if(bp == nil)
+		return 0;
+
+	addr = bp->rp + offset;
+	blocklen = BLEN(bp) - offset;
+
+	if(bp->next == nil) {
+		if(blocklen < len)
+			len = blocklen;
+		return ~ptclbsum(addr, len) & 0xffff;
+	}
+
+	losum = 0;
+	hisum = 0;
+
+	odd = 0;
+	while(len) {
+		x = blocklen;
+		if(len < x)
+			x = len;
+
+		csum = ptclbsum(addr, x);
+		if(odd)
+			hisum += csum;
+		else
+			losum += csum;
+		odd = (odd+x) & 1;
+		len -= x;
+
+		bp = bp->next;
+		if(bp == nil)
+			break;
+		blocklen = BLEN(bp);
+		addr = bp->rp;
+	}
+
+	losum += hisum>>8;
+	losum += (hisum&0xff)<<8;
+	while((csum = losum>>16) != 0)
+		losum = csum + (losum & 0xffff);
+
+	return ~losum & 0xffff;
+}
+
+enum
+{
+	Isprefix= 16,
+};
+
+static uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+int
+eipfmt(Fmt *f)
+{
+	char buf[5*8];
+	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
+	static char *ifmt = "%d.%d.%d.%d";
+	uchar *p, ip[16];
+	ulong *lp;
+	ushort s;
+	int i, j, n, eln, eli;
+
+	switch(f->r) {
+	case 'E':		/* Ethernet address */
+		p = va_arg(f->args, uchar*);
+		return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+
+	case 'I':		/* Ip address */
+		p = va_arg(f->args, uchar*);
+common:
+		if(memcmp(p, v4prefix, 12) == 0)
+			return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
+
+		/* find longest elision */
+		eln = eli = -1;
+		for(i = 0; i < 16; i += 2){
+			for(j = i; j < 16; j += 2)
+				if(p[j] != 0 || p[j+1] != 0)
+					break;
+			if(j > i && j - i > eln){
+				eli = i;
+				eln = j - i;
+			}
+		}
+
+		/* print with possible elision */
+		n = 0;
+		for(i = 0; i < 16; i += 2){
+			if(i == eli){
+				n += sprint(buf+n, "::");
+				i += eln;
+				if(i >= 16)
+					break;
+			} else if(i != 0)
+				n += sprint(buf+n, ":");
+			s = (p[i]<<8) + p[i+1];
+			n += sprint(buf+n, "%ux", s);
+		}
+		return fmtstrcpy(f, buf);
+
+	case 'i':		/* v6 address as 4 longs */
+		lp = va_arg(f->args, ulong*);
+		for(i = 0; i < 4; i++)
+			hnputl(ip+4*i, *lp++);
+		p = ip;
+		goto common;
+
+	case 'V':		/* v4 ip address */
+		p = va_arg(f->args, uchar*);
+		return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
+
+	case 'M':		/* ip mask */
+		p = va_arg(f->args, uchar*);
+
+		/* look for a prefix mask */
+		for(i = 0; i < 16; i++)
+			if(p[i] != 0xff)
+				break;
+		if(i < 16){
+			if((prefixvals[p[i]] & Isprefix) == 0)
+				goto common;
+			for(j = i+1; j < 16; j++)
+				if(p[j] != 0)
+					goto common;
+			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+		} else
+			n = 8*16;
+
+		/* got one, use /xx format */
+		return fmtprint(f, "/%d", n);
+	}
+	return fmtstrcpy(f, "(eipfmt)");
+}
+
+#define CLASS(p) ((*(uchar*)(p))>>6)
+
+extern char*
+v4parseip(uchar *to, char *from)
+{
+	int i;
+	char *p;
+
+	p = from;
+	for(i = 0; i < 4 && *p; i++){
+		to[i] = strtoul(p, &p, 0);
+		if(*p == '.')
+			p++;
+	}
+	switch(CLASS(to)){
+	case 0:	/* class A - 1 uchar net */
+	case 1:
+		if(i == 3){
+			to[3] = to[2];
+			to[2] = to[1];
+			to[1] = 0;
+		} else if(i == 2){
+			to[3] = to[1];
+			to[1] = 0;
+		}
+		break;
+	case 2:	/* class B - 2 uchar net */
+		if(i == 3){
+			to[3] = to[2];
+			to[2] = 0;
+		}
+		break;
+	}
+	return p;
+}
+
+int
+isv4(uchar *ip)
+{
+	return memcmp(ip, v4prefix, IPv4off) == 0;
+}
+
+
+/*
+ *  the following routines are unrolled with no memset's to speed
+ *  up the usual case
+ */
+void
+v4tov6(uchar *v6, uchar *v4)
+{
+	v6[0] = 0;
+	v6[1] = 0;
+	v6[2] = 0;
+	v6[3] = 0;
+	v6[4] = 0;
+	v6[5] = 0;
+	v6[6] = 0;
+	v6[7] = 0;
+	v6[8] = 0;
+	v6[9] = 0;
+	v6[10] = 0xff;
+	v6[11] = 0xff;
+	v6[12] = v4[0];
+	v6[13] = v4[1];
+	v6[14] = v4[2];
+	v6[15] = v4[3];
+}
+
+int
+v6tov4(uchar *v4, uchar *v6)
+{
+	if(v6[0] == 0
+	&& v6[1] == 0
+	&& v6[2] == 0
+	&& v6[3] == 0
+	&& v6[4] == 0
+	&& v6[5] == 0
+	&& v6[6] == 0
+	&& v6[7] == 0
+	&& v6[8] == 0
+	&& v6[9] == 0
+	&& v6[10] == 0xff
+	&& v6[11] == 0xff)
+	{
+		v4[0] = v6[12];
+		v4[1] = v6[13];
+		v4[2] = v6[14];
+		v4[3] = v6[15];
+		return 0;
+	} else {
+		memset(v4, 0, 4);
+		return -1;
+	}
+}
+
+ulong
+parseip(uchar *to, char *from)
+{
+	int i, elipsis = 0, v4 = 1;
+	ulong x;
+	char *p, *op;
+
+	memset(to, 0, IPaddrlen);
+	p = from;
+	for(i = 0; i < 16 && *p; i+=2){
+		op = p;
+		x = strtoul(p, &p, 16);
+		if(*p == '.' || (*p == 0 && i == 0)){
+			p = v4parseip(to+i, op);
+			i += 4;
+			break;
+		} else {
+			to[i] = x>>8;
+			to[i+1] = x;
+		}
+		if(*p == ':'){
+			v4 = 0;
+			if(*++p == ':'){
+				elipsis = i+2;
+				p++;
+			}
+		}
+	}
+	if(i < 16){
+		memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
+		memset(&to[elipsis], 0, 16-i);
+	}
+	if(v4){
+		to[10] = to[11] = 0xff;
+		return nhgetl(to+12);
+	} else
+		return 6;
+}
+
+/*
+ *  hack to allow ip v4 masks to be entered in the old
+ *  style
+ */
+ulong
+parseipmask(uchar *to, char *from)
+{
+	ulong x;
+	int i;
+	uchar *p;
+
+	if(*from == '/'){
+		/* as a number of prefix bits */
+		i = atoi(from+1);
+		if(i < 0)
+			i = 0;
+		if(i > 128)
+			i = 128;
+		memset(to, 0, IPaddrlen);
+		for(p = to; i >= 8; i -= 8)
+			*p++ = 0xff;
+		if(i > 0)
+			*p = ~((1<<(8-i))-1);
+		x = nhgetl(to+IPv4off);
+	} else {
+		/* as a straight bit mask */
+		x = parseip(to, from);
+		if(memcmp(to, v4prefix, IPv4off) == 0)
+			memset(to, 0xff, IPv4off);
+	}
+	return x;
+}
+
+void
+maskip(uchar *from, uchar *mask, uchar *to)
+{
+	int i;
+
+	for(i = 0; i < IPaddrlen; i++)
+		to[i] = from[i] & mask[i];
+}
+
+uchar classmask[4][16] = {
+	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
+	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
+	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0x00,0x00,
+	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0x00,
+};
+
+uchar*
+defmask(uchar *ip)
+{
+	if(isv4(ip))
+		return classmask[ip[IPv4off]>>6];
+	else {
+		if(ipcmp(ip, v6loopback) == 0)
+			return IPallbits;
+		else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
+			return v6linklocalmask;
+		else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
+			return v6sitelocalmask;
+		else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
+			return v6solicitednodemask;
+		else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
+			return v6multicastmask;
+		return IPallbits;
+	}
+}
+
+void
+ipv62smcast(uchar *smcast, uchar *a)
+{
+	assert(IPaddrlen == 16);
+	memmove(smcast, v6solicitednode, IPaddrlen);
+	smcast[13] = a[13];
+	smcast[14] = a[14];
+	smcast[15] = a[15];
+}
+
+
+/*
+ *  parse a hex mac address
+ */
+int
+parsemac(uchar *to, char *from, int len)
+{
+	char nip[4];
+	char *p;
+	int i;
+
+	p = from;
+	memset(to, 0, len);
+	for(i = 0; i < len; i++){
+		if(p[0] == '\0' || p[1] == '\0')
+			break;
+
+		nip[0] = p[0];
+		nip[1] = p[1];
+		nip[2] = '\0';
+		p += 2;
+
+		to[i] = strtoul(nip, 0, 16);
+		if(*p == ':')
+			p++;
+	}
+	return i;
+}
+
+/*
+ *  hashing tcp, udp, ... connections
+ */
+ulong
+iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+}
+
+void
+iphtadd(Ipht *ht, Conv *c)
+{
+	ulong hv;
+	Iphash *h;
+
+	hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+	h = smalloc(sizeof(*h));
+	if(ipcmp(c->raddr, IPnoaddr) != 0)
+		h->match = IPmatchexact;
+	else {
+		if(ipcmp(c->laddr, IPnoaddr) != 0){
+			if(c->lport == 0)
+				h->match = IPmatchaddr;
+			else
+				h->match = IPmatchpa;
+		} else {
+			if(c->lport == 0)
+				h->match = IPmatchany;
+			else
+				h->match = IPmatchport;
+		}
+	}
+	h->c = c;
+
+	lock(ht);
+	h->next = ht->tab[hv];
+	ht->tab[hv] = h;
+	unlock(ht);
+}
+
+void
+iphtrem(Ipht *ht, Conv *c)
+{
+	ulong hv;
+	Iphash **l, *h;
+
+	hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+	lock(ht);
+	for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next)
+		if((*l)->c == c){
+			h = *l;
+			(*l) = h->next;
+			free(h);
+			break;
+		}
+	unlock(ht);
+}
+
+/* look for a matching conversation with the following precedence
+ *	connected && raddr,rport,laddr,lport
+ *	announced && laddr,lport
+ *	announced && *,lport
+ *	announced && laddr,*
+ *	announced && *,*
+ */
+Conv*
+iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+	ulong hv;
+	Iphash *h;
+	Conv *c;
+
+	/* exact 4 pair match (connection) */
+	hv = iphash(sa, sp, da, dp);
+	lock(ht);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchexact)
+			continue;
+		c = h->c;
+		if(sp == c->rport && dp == c->lport
+		&& ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){
+			unlock(ht);
+			return c;
+		}
+	}
+	
+	/* match local address and port */
+	hv = iphash(IPnoaddr, 0, da, dp);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchpa)
+			continue;
+		c = h->c;
+		if(dp == c->lport && ipcmp(da, c->laddr) == 0){
+			unlock(ht);
+			return c;
+		}
+	}
+	
+	/* match just port */
+	hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchport)
+			continue;
+		c = h->c;
+		if(dp == c->lport){
+			unlock(ht);
+			return c;
+		}
+	}
+	
+	/* match local address */
+	hv = iphash(IPnoaddr, 0, da, 0);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchaddr)
+			continue;
+		c = h->c;
+		if(ipcmp(da, c->laddr) == 0){
+			unlock(ht);
+			return c;
+		}
+	}
+	
+	/* look for something that matches anything */
+	hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchany)
+			continue;
+		c = h->c;
+		unlock(ht);
+		return c;
+	}
+	unlock(ht);
+	return nil;
+}
--- /dev/null
+++ b/os/ip.original/ipifc.c
@@ -1,0 +1,1767 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+#define DPRINT if(0)print
+
+enum {
+	Maxmedia = 32,
+	Nself = Maxmedia*5,
+	NHASH = (1<<6),
+	NCACHE = 256,
+	QMAX = 64*1024-1,
+};
+
+Medium *media[Maxmedia] =
+{
+	0
+};
+
+/*
+ *  cache of local addresses (addresses we answer to)
+ */
+struct Ipself
+{
+	uchar	a[IPaddrlen];
+	Ipself	*hnext;		/* next address in the hash table */
+	Iplink	*link;		/* binding twixt Ipself and Ipifc */
+	ulong	expire;
+	uchar	type;		/* type of address */
+	int	ref;
+	Ipself	*next;		/* free list */
+};
+
+struct Ipselftab
+{
+	QLock;
+	int	inited;
+	int	acceptall;	/* true if an interface has the null address */
+	Ipself	*hash[NHASH];	/* hash chains */
+};
+
+/*
+ *  Multicast addresses are chained onto a Chan so that
+ *  we can remove them when the Chan is closed.
+ */
+typedef struct Ipmcast Ipmcast;
+struct Ipmcast
+{
+	Ipmcast	*next;
+	uchar	ma[IPaddrlen];	/* multicast address */
+	uchar	ia[IPaddrlen];	/* interface address */
+};
+
+/* quick hash for ip addresses */
+#define hashipa(a) ( ( ((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1] )%NHASH )
+
+static char tifc[] = "ifc ";
+
+static void	addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
+static void	remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
+static char*	ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
+static char*	ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
+static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*);
+static char*	ipifcremlifc(Ipifc*, Iplifc*);
+
+/*
+ *  link in a new medium
+ */
+void
+addipmedium(Medium *med)
+{
+	int i;
+
+	for(i = 0; i < nelem(media)-1; i++)
+		if(media[i] == nil){
+			media[i] = med;
+			break;
+		}
+}
+
+/*
+ *  find the medium with this name
+ */
+Medium*
+ipfindmedium(char *name)
+{
+	Medium **mp;
+
+	for(mp = media; *mp != nil; mp++)
+		if(strcmp((*mp)->name, name) == 0)
+			break;
+	return *mp;
+}
+
+/*
+ *  attach a device (or pkt driver) to the interface.
+ *  called with c locked
+ */
+static char*
+ipifcbind(Conv *c, char **argv, int argc)
+{
+	Ipifc *ifc;
+	Medium *m;
+
+	if(argc < 2)
+		return Ebadarg;
+
+	ifc = (Ipifc*)c->ptcl;
+
+	/* bind the device to the interface */
+	m = ipfindmedium(argv[1]);
+	if(m == nil)
+		return "unknown interface type";
+
+	wlock(ifc);
+	if(ifc->m != nil){
+		wunlock(ifc);
+		return "interface already bound";	
+	}
+	if(waserror()){
+		wunlock(ifc);
+		nexterror();
+	}
+
+	/* do medium specific binding */
+	(*m->bind)(ifc, argc, argv);
+
+	/* set the bound device name */
+	if(argc > 2)
+		strncpy(ifc->dev, argv[2], sizeof(ifc->dev));
+	else
+		snprint(ifc->dev, sizeof ifc->dev, "%s%d", m->name, c->x);
+	ifc->dev[sizeof(ifc->dev)-1] = 0;
+
+	/* set up parameters */
+	ifc->m = m;
+	ifc->mintu = ifc->m->mintu;
+	ifc->maxtu = ifc->m->maxtu;
+	if(ifc->m->unbindonclose == 0)
+		ifc->conv->inuse++;
+	ifc->rp.mflag = 0;		// default not managed
+	ifc->rp.oflag = 0;
+	ifc->rp.maxraint = 600000;	// millisecs
+	ifc->rp.minraint = 200000;
+	ifc->rp.linkmtu = 0;		// no mtu sent
+	ifc->rp.reachtime = 0;
+	ifc->rp.rxmitra = 0;
+	ifc->rp.ttl = MAXTTL;
+	ifc->rp.routerlt = 3*(ifc->rp.maxraint);
+
+	/* any ancillary structures (like routes) no longer pertain */
+	ifc->ifcid++;
+
+	/* reopen all the queues closed by a previous unbind */
+	qreopen(c->rq);
+	qreopen(c->eq);
+	qreopen(c->sq);
+
+	wunlock(ifc);
+	poperror();
+
+	return nil;
+}
+
+/*
+ *  detach a device from an interface, close the interface
+ *  called with ifc->conv closed
+ */
+static char*
+ipifcunbind(Ipifc *ifc)
+{
+	char *err;
+
+	if(waserror()){
+		wunlock(ifc);
+		nexterror();
+	}
+	wlock(ifc);
+
+	/* dissociate routes */
+	if(ifc->m != nil && ifc->m->unbindonclose == 0)
+		ifc->conv->inuse--;
+	ifc->ifcid++;
+
+	/* disassociate device */
+	if(ifc->m != nil && ifc->m->unbind)
+		(*ifc->m->unbind)(ifc);
+	memset(ifc->dev, 0, sizeof(ifc->dev));
+	ifc->arg = nil;
+	ifc->reassemble = 0;
+
+	/* close queues to stop queuing of packets */
+	qclose(ifc->conv->rq);
+	qclose(ifc->conv->wq);
+	qclose(ifc->conv->sq);
+
+	/* disassociate logical interfaces */
+	while(ifc->lifc){
+		err = ipifcremlifc(ifc, ifc->lifc);
+		if(err)
+			error(err);
+	}
+
+	ifc->m = nil;
+	wunlock(ifc);
+	poperror();
+	return nil;
+}
+
+
+
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
+
+char slineformat[] = "	%-40I %-10M %-40I %-12lud %-12lud\n";
+
+
+static int
+ipifcstate(Conv *c, char *state, int n)
+{
+	Ipifc *ifc;
+	Iplifc *lifc;
+	int m;
+
+	ifc = (Ipifc*)c->ptcl;
+
+	m = snprint(state, n, sfixedformat,
+		ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
+		ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
+		ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
+		ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
+		ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+
+	rlock(ifc);
+	for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
+		m += snprint(state+m, n - m, slineformat,
+			lifc->local, lifc->mask, lifc->remote,
+			lifc->validlt, lifc->preflt);
+	if(ifc->lifc == nil)
+		m += snprint(state+m, n - m, "\n");
+	runlock(ifc);
+	return m;
+}
+
+static int
+ipifclocal(Conv *c, char *state, int n)
+{
+	Ipifc *ifc;
+	Iplifc *lifc;
+	Iplink *link;
+	int m;
+
+	ifc = (Ipifc*)c->ptcl;
+
+	m = 0;
+
+	rlock(ifc);
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
+		for(link = lifc->link; link; link = link->lifclink)
+			m += snprint(state+m, n - m, " %-40.40I", link->self->a);
+		m += snprint(state+m, n - m, "\n");
+	}
+	runlock(ifc);
+	return m;
+}
+
+static int
+ipifcinuse(Conv *c)
+{
+	Ipifc *ifc;
+
+	ifc = (Ipifc*)c->ptcl;
+	return ifc->m != nil;
+}
+
+/*
+ *  called when a process writes to an interface's 'data'
+ */
+static void
+ipifckick(void *x)
+{
+	Conv *c = x;
+	Block *bp;
+	Ipifc *ifc;
+
+	bp = qget(c->wq);
+	if(bp == nil)
+		return;
+
+	ifc = (Ipifc*)c->ptcl;
+	if(!canrlock(ifc)){
+		freeb(bp);
+		return;
+	}
+	if(waserror()){
+		runlock(ifc);
+		nexterror();
+	}
+	if(ifc->m == nil || ifc->m->pktin == nil)
+		freeb(bp);
+	else
+		(*ifc->m->pktin)(c->p->f, ifc, bp);
+	runlock(ifc);
+	poperror();
+}
+
+/*
+ *  called when a new ipifc structure is created
+ */
+static void
+ipifccreate(Conv *c)
+{
+	Ipifc *ifc;
+
+	c->rq = qopen(QMAX, 0, 0, 0);
+	c->sq = qopen(2*QMAX, 0, 0, 0);
+	c->wq = qopen(QMAX, Qkick, ipifckick, c);
+	ifc = (Ipifc*)c->ptcl;
+	ifc->conv = c;
+	ifc->unbinding = 0;
+	ifc->m = nil;
+	ifc->reassemble = 0;
+}
+
+/*
+ *  called after last close of ipifc data or ctl
+ *  called with c locked, we must unlock
+ */
+static void
+ipifcclose(Conv *c)
+{
+	Ipifc *ifc;
+	Medium *m;
+
+	ifc = (Ipifc*)c->ptcl;
+	m = ifc->m;
+	if(m != nil && m->unbindonclose)
+		ipifcunbind(ifc);
+}
+
+/*
+ *  change an interface's mtu
+ */
+char*
+ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+{
+	int mtu;
+
+	if(argc < 2)
+		return Ebadarg;
+	if(ifc->m == nil)
+		return Ebadarg;
+	mtu = strtoul(argv[1], 0, 0);
+	if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
+		return Ebadarg;
+	ifc->maxtu = mtu;
+	return nil;
+}
+
+/*
+ *  add an address to an interface.
+ */
+char*
+ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp)
+{
+	uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+	uchar bcast[IPaddrlen], net[IPaddrlen];
+	Iplifc *lifc, **l;
+	int i, type, mtu;
+	Fs *f;
+	int sendnbrdisc = 0;
+
+	if(ifc->m == nil)
+		return "ipifc not yet bound to device";
+
+	f = ifc->conv->p->f;
+
+	type = Rifc;
+	memset(ip, 0, IPaddrlen);
+	memset(mask, 0, IPaddrlen);
+	memset(rem, 0, IPaddrlen);
+	switch(argc){
+	case 6:
+		if(strcmp(argv[5], "proxy") == 0)
+			type |= Rproxy;
+		/* fall through */
+	case 5:
+		mtu = strtoul(argv[4], 0, 0);
+		if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
+			ifc->maxtu = mtu;
+		/* fall through */
+	case 4:
+		parseip(ip, argv[1]);
+		parseipmask(mask, argv[2]);
+		parseip(rem, argv[3]);
+		maskip(rem, mask, net);
+		break;
+	case 3:
+		parseip(ip, argv[1]);
+		parseipmask(mask, argv[2]);
+		maskip(ip, mask, rem);
+		maskip(rem, mask, net);
+		break;
+	case 2:
+		parseip(ip, argv[1]);
+		memmove(mask, defmask(ip), IPaddrlen);
+		maskip(ip, mask, rem);
+		maskip(rem, mask, net);
+		break;
+	default:
+		return Ebadarg;
+		break;
+	}
+	if(isv4(ip))
+		tentative = 0;
+	wlock(ifc);
+
+	/* ignore if this is already a local address for this ifc */
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
+		if(ipcmp(lifc->local, ip) == 0) {
+			if(lifc->tentative != tentative)
+				lifc->tentative = tentative;
+			if(lifcp != nil) {
+				lifc->onlink = lifcp->onlink;
+				lifc->autoflag = lifcp->autoflag;
+				lifc->validlt = lifcp->validlt;
+				lifc->preflt = lifcp->preflt;
+				lifc->origint = lifcp->origint;
+			}
+			goto out;
+		}
+	}
+
+	/* add the address to the list of logical ifc's for this ifc */
+	lifc = smalloc(sizeof(Iplifc));
+	ipmove(lifc->local, ip);
+	ipmove(lifc->mask, mask);
+	ipmove(lifc->remote, rem);
+	ipmove(lifc->net, net);
+	lifc->tentative = tentative;
+	if(lifcp != nil) {
+		lifc->onlink = lifcp->onlink;
+		lifc->autoflag = lifcp->autoflag;
+		lifc->validlt = lifcp->validlt;
+		lifc->preflt = lifcp->preflt;
+		lifc->origint = lifcp->origint;
+	}
+	else {		// default values
+		lifc->onlink = 1;
+		lifc->autoflag = 1;
+		lifc->validlt = 0xffffffff;
+		lifc->preflt = 0xffffffff;
+		lifc->origint = NOW / 10^3;
+	}
+	lifc->next = nil;
+
+	for(l = &ifc->lifc; *l; l = &(*l)->next)
+		;
+	*l = lifc;
+
+	/* check for point-to-point interface */
+	if(ipcmp(ip, v6loopback))  /* skip v6 loopback, it's a special address */
+	if(ipcmp(mask, IPallbits) == 0)
+		type |= Rptpt;
+
+	/* add local routes */
+	if(isv4(ip))
+		v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
+	else
+		v6addroute(f, tifc, rem, mask, rem, type);
+
+	addselfcache(f, ifc, lifc, ip, Runi);
+
+	if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
+		ipifcregisterproxy(f, ifc, rem);
+		goto out;
+	}
+
+	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+		/* add subnet directed broadcast address to the self cache */
+		for(i = 0; i < IPaddrlen; i++)
+			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+		addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+		/* add subnet directed network address to the self cache */
+		for(i = 0; i < IPaddrlen; i++)
+			bcast[i] = (ip[i] & mask[i]) & mask[i];
+		addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+		/* add network directed broadcast address to the self cache */
+		memmove(mask, defmask(ip), IPaddrlen);
+		for(i = 0; i < IPaddrlen; i++)
+			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+		addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+		/* add network directed network address to the self cache */
+		memmove(mask, defmask(ip), IPaddrlen);
+		for(i = 0; i < IPaddrlen; i++)
+			bcast[i] = (ip[i] & mask[i]) & mask[i];
+		addselfcache(f, ifc, lifc, bcast, Rbcast);
+		
+		addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
+	}
+	else {
+		if(ipcmp(ip, v6loopback) == 0) {
+			/* add node-local mcast address */
+			addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
+
+			/* add route for all node multicast */
+			v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+		}
+
+		/* add all nodes multicast address */
+		addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
+		
+		/* add route for all nodes multicast */
+		v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
+		
+		/* add solicited-node multicast address */
+		ipv62smcast(bcast, ip);
+		addselfcache(f, ifc, lifc, bcast, Rmulti);
+
+		sendnbrdisc = 1;
+	}
+
+	/* register the address on this network for address resolution */
+	if(isv4(ip) && ifc->m->areg != nil)
+		(*ifc->m->areg)(ifc, ip);
+
+out:
+	wunlock(ifc);
+	if(tentative && sendnbrdisc)
+		icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	return nil;
+}
+
+/*
+ *  remove a logical interface from an ifc
+ *  always called with ifc wlock'd
+ */
+static char*
+ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+{
+	Iplifc **l;
+	Fs *f;
+
+	f = ifc->conv->p->f;
+
+	/*
+	 *  find address on this interface and remove from chain.
+	 *  for pt to pt we actually specify the remote address as the
+	 *  addresss to remove.
+	 */
+	for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
+		;
+	if(*l == nil)
+		return "address not on this interface";
+	*l = lifc->next;
+
+	/* disassociate any addresses */
+	while(lifc->link)
+		remselfcache(f, ifc, lifc, lifc->link->self->a);
+
+	/* remove the route for this logical interface */
+	if(isv4(lifc->local))
+		v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
+	else {
+		v6delroute(f, lifc->remote, lifc->mask, 1);
+		if(ipcmp(lifc->local, v6loopback) == 0)
+			/* remove route for all node multicast */
+			v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
+		else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
+			/* remove route for all link multicast */
+			v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+	}
+
+	free(lifc);
+	return nil;
+
+}
+
+/*
+ *  remove an address from an interface.
+ *  called with c locked
+ */
+char*
+ipifcrem(Ipifc *ifc, char **argv, int argc)
+{
+	uchar ip[IPaddrlen];
+	uchar mask[IPaddrlen];
+	uchar rem[IPaddrlen];
+	Iplifc *lifc;
+	char *rv;
+
+	if(argc < 3)
+		return Ebadarg;
+
+	parseip(ip, argv[1]);
+	parseipmask(mask, argv[2]);
+	if(argc < 4)
+		maskip(ip, mask, rem);
+	else
+		parseip(rem, argv[3]);
+
+	wlock(ifc);
+
+	/*
+	 *  find address on this interface and remove from chain.
+	 *  for pt to pt we actually specify the remote address as the
+	 *  addresss to remove.
+	 */
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
+		if (memcmp(ip, lifc->local, IPaddrlen) == 0
+		&& memcmp(mask, lifc->mask, IPaddrlen) == 0
+		&& memcmp(rem, lifc->remote, IPaddrlen) == 0)
+			break;
+	}
+
+	rv = ipifcremlifc(ifc, lifc);
+	wunlock(ifc);
+	return rv;
+}
+
+/*
+ * distribute routes to active interfaces like the
+ * TRIP linecards
+ */
+void
+ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
+{
+	Medium *m;
+	Conv **cp, **e;
+	Ipifc *ifc;
+
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp != nil) {
+			ifc = (Ipifc*)(*cp)->ptcl;
+			m = ifc->m;
+			if(m == nil)
+				continue;
+			if(m->addroute != nil)
+				m->addroute(ifc, vers, addr, mask, gate, type);
+		}
+	}
+}
+
+void
+ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
+{
+	Medium *m;
+	Conv **cp, **e;
+	Ipifc *ifc;
+
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp != nil) {
+			ifc = (Ipifc*)(*cp)->ptcl;
+			m = ifc->m;
+			if(m == nil)
+				continue;
+			if(m->remroute != nil)
+				m->remroute(ifc, vers, addr, mask);
+		}
+	}
+}
+
+/*
+ *  associate an address with the interface.  This wipes out any previous
+ *  addresses.  This is a macro that means, remove all the old interfaces
+ *  and add a new one.
+ */
+static char*
+ipifcconnect(Conv* c, char **argv, int argc)
+{
+	char *err;
+	Ipifc *ifc;
+
+	ifc = (Ipifc*)c->ptcl;
+
+	if(ifc->m == nil)
+		 return "ipifc not yet bound to device";
+
+	if(waserror()){
+		wunlock(ifc);
+		nexterror();
+	}
+	wlock(ifc);
+	while(ifc->lifc){
+		err = ipifcremlifc(ifc, ifc->lifc);
+		if(err)
+			error(err);
+	}
+	wunlock(ifc);
+	poperror();
+
+	err = ipifcadd(ifc, argv, argc, 0, nil);
+	if(err)
+		return err;
+
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+char*
+ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+{
+	int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+
+	argsleft = argc - 1;
+	i = 1;
+
+	if(argsleft % 2 != 0)
+		return Ebadarg;
+
+	while (argsleft > 1) {
+		if(strcmp(argv[i],"recvra")==0)
+			ifc->recvra6 = (atoi(argv[i+1]) != 0);
+		else if(strcmp(argv[i],"sendra")==0)
+			ifc->sendra6 = (atoi(argv[i+1]) != 0);
+		else if(strcmp(argv[i],"mflag")==0)
+			ifc->rp.mflag = (atoi(argv[i+1]) != 0);
+		else if(strcmp(argv[i],"oflag")==0)
+			ifc->rp.oflag = (atoi(argv[i+1]) != 0);
+		else if(strcmp(argv[i],"maxraint")==0)
+			ifc->rp.maxraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i],"minraint")==0)
+			ifc->rp.minraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i],"linkmtu")==0)
+			ifc->rp.linkmtu = atoi(argv[i+1]);
+		else if(strcmp(argv[i],"reachtime")==0)
+			ifc->rp.reachtime = atoi(argv[i+1]);
+		else if(strcmp(argv[i],"rxmitra")==0)
+			ifc->rp.rxmitra = atoi(argv[i+1]);
+		else if(strcmp(argv[i],"ttl")==0)
+			ifc->rp.ttl = atoi(argv[i+1]);
+		else if(strcmp(argv[i],"routerlt")==0)
+			ifc->rp.routerlt = atoi(argv[i+1]);
+		else
+			return Ebadarg;	
+
+		argsleft -= 2;
+		i += 2;
+	}
+
+	// consistency check
+	if(ifc->rp.maxraint < ifc->rp.minraint) {
+		ifc->rp.maxraint = vmax;
+		ifc->rp.minraint = vmin;
+		return Ebadarg;
+	}
+
+	return nil;
+}
+
+char*
+ipifcsendra6(Ipifc *ifc, char **argv, int argc)
+{
+	int i;
+	
+	i = 0;
+	if(argc > 1)
+		i = atoi(argv[1]);
+	ifc->sendra6 = (i!=0);
+	return nil;
+}
+
+char*
+ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
+{
+	int i;
+	
+	i = 0;
+	if(argc > 1)
+		i = atoi(argv[1]);
+	ifc->recvra6 = (i!=0);	
+	return nil;
+}
+
+char*
+ipifcnat(Ipifc *ifc, char **argv, int argc)
+{
+	uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
+	Iplifc *lifc;
+
+	if(argc == 2){
+		if((strcmp(argv[1], "show") == 0)){
+			shownataddr();
+			return nil;
+		}else if((strcmp(argv[1], "flush") == 0)){
+			flushnataddr();
+			return nil;
+		}else
+			return Ebadarg;
+	}
+
+	if(argc != 5)
+		return Ebadarg;
+
+	if (parseip(src, argv[2]) == -1)
+		return Ebadip;
+
+	if (parseipmask(mask, argv[3]) == -1)
+		return Ebadip;
+
+	if (parseip(dst, argv[4]) == -1)
+		return Ebadip;
+
+	if((lifc=iplocalonifc(ifc, dst)) == nil)
+		return Ebadip;
+
+	if(strcmp(argv[1], "add") == 0){
+		if(addnataddr(src, mask, lifc) != 0)
+			return Ebadarg;
+	}else if(strcmp(argv[1], "remove") == 0){
+		if(removenataddr(src, mask, lifc) != 0)
+			return Ebadarg;
+	}else
+		return Ebadarg;
+
+	return nil;
+}
+
+/*
+ *  non-standard control messages.
+ *  called with c locked.
+ */
+static char*
+ipifcctl(Conv* c, char**argv, int argc)
+{
+	Ipifc *ifc;
+	int i;
+
+	ifc = (Ipifc*)c->ptcl;
+	if(strcmp(argv[0], "add") == 0)
+		return ipifcadd(ifc, argv, argc, 0, nil);
+	else if(strcmp(argv[0], "bootp") == 0)
+		return bootp(ifc);
+	else if(strcmp(argv[0], "try") == 0)
+		return ipifcadd(ifc, argv, argc, 1, nil);
+	else if(strcmp(argv[0], "remove") == 0)
+		return ipifcrem(ifc, argv, argc);
+	else if(strcmp(argv[0], "unbind") == 0)
+		return ipifcunbind(ifc);
+	else if(strcmp(argv[0], "joinmulti") == 0)
+		return ipifcjoinmulti(ifc, argv, argc);
+	else if(strcmp(argv[0], "leavemulti") == 0)
+		return ipifcleavemulti(ifc, argv, argc);
+	else if(strcmp(argv[0], "mtu") == 0)
+		return ipifcsetmtu(ifc, argv, argc);
+	else if(strcmp(argv[0], "reassemble") == 0){
+		ifc->reassemble = 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "iprouting") == 0){
+		i = 1;
+		if(argc > 1)
+			i = atoi(argv[1]);
+		iprouting(c->p->f, i);
+		return nil;
+	}
+	else if(strcmp(argv[0], "addpref6") == 0)
+		return ipifcaddpref6(ifc, argv, argc);
+	else if(strcmp(argv[0], "setpar6") == 0)
+		return ipifcsetpar6(ifc, argv, argc);
+	else if(strcmp(argv[0], "sendra6") == 0)
+		return ipifcsendra6(ifc, argv, argc);
+	else if(strcmp(argv[0], "recvra6") == 0)
+		return ipifcrecvra6(ifc, argv, argc);
+	else if(strcmp(argv[0], "nat") == 0)
+		return ipifcnat(ifc, argv, argc);
+	return "unsupported ctl";
+}
+
+ipifcstats(Proto *ipifc, char *buf, int len)
+{
+	return ipstats(ipifc->f, buf, len);
+}
+
+void
+ipifcinit(Fs *f)
+{
+	Proto *ipifc;
+
+	ipifc = smalloc(sizeof(Proto));
+	ipifc->name = "ipifc";
+	ipifc->connect = ipifcconnect;
+	ipifc->announce = nil;
+	ipifc->bind = ipifcbind;
+	ipifc->state = ipifcstate;
+	ipifc->create = ipifccreate;
+	ipifc->close = ipifcclose;
+	ipifc->rcv = nil;
+	ipifc->ctl = ipifcctl;
+	ipifc->advise = nil;
+	ipifc->stats = ipifcstats;
+	ipifc->inuse = ipifcinuse;
+	ipifc->local = ipifclocal;
+	ipifc->ipproto = -1;
+	ipifc->nc = Maxmedia;
+	ipifc->ptclsize = sizeof(Ipifc);
+
+	f->ipifc = ipifc;			/* hack for ipifcremroute, findipifc, ... */
+	f->self = smalloc(sizeof(Ipselftab));	/* hack for ipforme */
+
+	Fsproto(f, ipifc);
+}
+
+/*
+ *  add to self routing cache
+ *	called with c locked
+ */
+static void
+addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
+{
+	Ipself *p;
+	Iplink *lp;
+	int h;
+
+	qlock(f->self);
+
+	/* see if the address already exists */
+	h = hashipa(a);
+	for(p = f->self->hash[h]; p; p = p->next)
+		if(memcmp(a, p->a, IPaddrlen) == 0)
+			break;
+
+	/* allocate a local address and add to hash chain */
+	if(p == nil){
+		p = smalloc(sizeof(*p));
+		ipmove(p->a, a);
+		p->type = type;
+		p->next = f->self->hash[h];
+		f->self->hash[h] = p;
+
+		/* if the null address, accept all packets */
+		if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+			f->self->acceptall = 1;
+	}
+
+	/* look for a link for this lifc */
+	for(lp = p->link; lp; lp = lp->selflink)
+		if(lp->lifc == lifc)
+			break;
+
+	/* allocate a lifc-to-local link and link to both */
+	if(lp == nil){
+		lp = smalloc(sizeof(*lp));
+		lp->ref = 1;
+		lp->lifc = lifc;
+		lp->self = p;
+		lp->selflink = p->link;
+		p->link = lp;
+		lp->lifclink = lifc->link;
+		lifc->link = lp;
+
+		/* add to routing table */
+		if(isv4(a))
+			v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
+		else
+			v6addroute(f, tifc, a, IPallbits, a, type);
+
+		if((type & Rmulti) && ifc->m->addmulti != nil)
+			(*ifc->m->addmulti)(ifc, a, lifc->local);
+	} else {
+		lp->ref++;
+	}
+
+	qunlock(f->self);
+}
+
+/*
+ *  These structures are unlinked from their chains while
+ *  other threads may be using them.  To avoid excessive locking,
+ *  just put them aside for a while before freeing them.
+ *	called with f->self locked
+ */
+static Iplink *freeiplink;
+static Ipself *freeipself;
+
+static void
+iplinkfree(Iplink *p)
+{
+	Iplink **l, *np;
+	ulong now = NOW;
+
+	l = &freeiplink;
+	for(np = *l; np; np = *l){
+		if(np->expire > now){
+			*l = np->next;
+			free(np);
+			continue;
+		}
+		l = &np->next;
+	}
+	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->next = nil;
+	*l = p;
+}
+static void
+ipselffree(Ipself *p)
+{
+	Ipself **l, *np;
+	ulong now = NOW;
+
+	l = &freeipself;
+	for(np = *l; np; np = *l){
+		if(np->expire > now){
+			*l = np->next;
+			free(np);
+			continue;
+		}
+		l = &np->next;
+	}
+	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->next = nil;
+	*l = p;
+}
+
+/*
+ *  Decrement reference for this address on this link.
+ *  Unlink from selftab if this is the last ref.
+ *	called with c locked
+ */
+static void
+remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
+{
+	Ipself *p, **l;
+	Iplink *link, **l_self, **l_lifc;
+
+	qlock(f->self);
+
+	/* find the unique selftab entry */
+	l = &f->self->hash[hashipa(a)];
+	for(p = *l; p; p = *l){
+		if(ipcmp(p->a, a) == 0)
+			break;
+		l = &p->next;
+	}
+
+	if(p == nil)
+		goto out;
+
+	/*
+	 *  walk down links from an ifc looking for one
+	 *  that matches the selftab entry
+	 */
+	l_lifc = &lifc->link;
+	for(link = *l_lifc; link; link = *l_lifc){
+		if(link->self == p)
+			break;
+		l_lifc = &link->lifclink;
+	}
+
+	if(link == nil)
+		goto out;
+
+	/*
+	 *  walk down the links from the selftab looking for
+	 *  the one we just found
+	 */
+	l_self = &p->link;
+	for(link = *l_self; link; link = *l_self){
+		if(link == *(l_lifc))
+			break;
+		l_self = &link->selflink;
+	}
+
+	if(link == nil)
+		panic("remselfcache");
+
+	if(--(link->ref) != 0)
+		goto out;
+
+	if((p->type & Rmulti) && ifc->m->remmulti != nil)
+		(*ifc->m->remmulti)(ifc, a, lifc->local);
+
+	/* ref == 0, remove from both chains and free the link */
+	*l_lifc = link->lifclink;
+	*l_self = link->selflink;
+	iplinkfree(link);
+
+	if(p->link != nil)
+		goto out;
+
+	/* remove from routing table */
+	if(isv4(a))
+		v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
+	else
+		v6delroute(f, a, IPallbits, 1);
+	
+	/* no more links, remove from hash and free */
+	*l = p->next;
+	ipselffree(p);
+
+	/* if IPnoaddr, forget */
+	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+		f->self->acceptall = 0;
+
+out:
+	qunlock(f->self);
+}
+
+static char *stformat = "%-44.44I %2.2d %4.4s\n";
+enum
+{
+	Nstformat= 41,
+};
+
+long
+ipselftabread(Fs *f, char *cp, ulong offset, int n)
+{
+	int i, m, nifc, off;
+	Ipself *p;
+	Iplink *link;
+	char state[8];
+
+	m = 0;
+	off = offset;
+	qlock(f->self);
+	for(i = 0; i < NHASH && m < n; i++){
+		for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
+			nifc = 0;
+			for(link = p->link; link; link = link->selflink)
+				nifc++;
+			routetype(p->type, state);
+			m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+			if(off > 0){
+				off -= m;
+				m = 0;
+			}
+		}
+	}
+	qunlock(f->self);
+	return m;
+}
+
+int
+iptentative(Fs *f, uchar *addr)
+{
+ 	Ipself *p;
+
+	p = f->self->hash[hashipa(addr)];
+	for(; p; p = p->next){
+		if(ipcmp(addr, p->a) == 0) {
+			return p->link->lifc->tentative;
+		}
+	}
+	return 0;
+}
+
+/*
+ *  returns
+ *	0		- no match
+ *	Runi
+ *	Rbcast
+ *	Rmcast
+ */
+int
+ipforme(Fs *f, uchar *addr)
+{
+	Ipself *p;
+
+	p = f->self->hash[hashipa(addr)];
+	for(; p; p = p->next){
+		if(ipcmp(addr, p->a) == 0)
+			return p->type;
+	}
+
+	/* hack to say accept anything */
+	if(f->self->acceptall)
+		return Runi;
+
+	return 0;
+}
+
+/*
+ *  find the ifc on same net as the remote system.  If none,
+ *  return nil.
+ */
+Ipifc*
+findipifc(Fs *f, uchar *remote, int type)
+{
+	Ipifc *ifc, *x;
+	Iplifc *lifc;
+	Conv **cp, **e;
+	uchar gnet[IPaddrlen];
+	uchar xmask[IPaddrlen];
+
+	x = nil; memset(xmask, 0, IPaddrlen);
+
+	/* find most specific match */
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp == 0)
+			continue;
+
+		ifc = (Ipifc*)(*cp)->ptcl;
+
+		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+			maskip(remote, lifc->mask, gnet);
+			if(ipcmp(gnet, lifc->net) == 0){
+				if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+					x = ifc;
+					ipmove(xmask, lifc->mask);
+				}
+			}
+		}
+	}
+	if(x != nil)
+		return x;
+
+	/* for now for broadcast and multicast, just use first interface */
+	if(type & (Rbcast|Rmulti)){
+		for(cp = f->ipifc->conv; cp < e; cp++){
+			if(*cp == 0)
+				continue;
+			ifc = (Ipifc*)(*cp)->ptcl;
+			if(ifc->lifc != nil)
+				return ifc;
+		}
+	}
+		
+	return nil;
+}
+
+enum {
+	unknownv6,
+	multicastv6,
+	unspecifiedv6,
+	linklocalv6,
+	sitelocalv6,
+	globalv6,
+};
+
+int
+v6addrtype(uchar *addr)
+{
+	if(isv6global(addr))
+		return globalv6;
+	if(islinklocal(addr))
+		return linklocalv6;
+	if(isv6mcast(addr))
+		return multicastv6;
+	if(issitelocal(addr))
+		return sitelocalv6;
+	return unknownv6;
+}
+
+#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
+
+static void
+findprimaryipv6(Fs *f, uchar *local)
+{
+	Conv **cp, **e;
+	Ipifc *ifc;
+	Iplifc *lifc;
+	int atype, atypel;
+
+	ipmove(local, v6Unspecified);
+	atype = unspecifiedv6;
+
+	/* find "best" (global > sitelocal > link local > unspecified)
+	 * local address; address must be current */
+
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp == 0)
+			continue;
+		ifc = (Ipifc*)(*cp)->ptcl;
+		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+			atypel = v6addrtype(lifc->local);
+			if(atypel > atype)
+			if(v6addrcurr(lifc)) {
+				ipmove(local, lifc->local);
+				atype = atypel;
+				if(atype == globalv6)
+					return;
+			}
+		}
+	}
+}
+
+/*
+ *  returns first ip address configured
+ */
+static void
+findprimaryipv4(Fs *f, uchar *local)
+{
+	Conv **cp, **e;
+	Ipifc *ifc;
+	Iplifc *lifc;
+
+	/* find first ifc local address */
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp == 0)
+			continue;
+		ifc = (Ipifc*)(*cp)->ptcl;
+		if((lifc = ifc->lifc) != nil){
+			ipmove(local, lifc->local);
+			return;
+		}
+	}
+}
+
+/*
+ *  find the local address 'closest' to the remote system, copy it to
+ *  local and return the ifc for that address
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
+{
+	Ipifc *ifc;
+	Iplifc *lifc;
+	Route *r;
+	uchar gate[IPaddrlen];
+	uchar gnet[IPaddrlen];
+	int version;
+	int atype = unspecifiedv6, atypel = unknownv6;
+
+	USED(atype);
+	USED(atypel);
+	qlock(f->ipifc);
+	r = v6lookup(f, remote, nil);
+ 	version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
+	
+	if(r != nil){
+		ifc = r->ifc;
+		if(r->type & Rv4)
+			v4tov6(gate, r->v4.gate);
+		else {
+			ipmove(gate, r->v6.gate);
+			ipmove(local, v6Unspecified);
+		}
+
+		/* find ifc address closest to the gateway to use */
+		switch(version) {
+		case V4:
+			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+				maskip(gate, lifc->mask, gnet);
+				if(ipcmp(gnet, lifc->net) == 0){
+					ipmove(local, lifc->local);
+					goto out;
+				}
+			}
+			break;
+		case V6:
+			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+				atypel = v6addrtype(lifc->local);
+				maskip(gate, lifc->mask, gnet);
+				if(ipcmp(gnet, lifc->net) == 0)
+				if(atypel > atype)
+				if(v6addrcurr(lifc)) {
+					ipmove(local, lifc->local);
+					atype = atypel;
+					if(atype == globalv6)
+						break;
+				}
+			}
+			if(atype > unspecifiedv6)
+				goto out;
+			break;
+		default:
+			panic("findlocalip: version %d", version);
+		}
+	}
+
+	switch(version){
+	case V4:
+		findprimaryipv4(f, local);
+		break;
+	case V6:
+		findprimaryipv6(f, local);
+		break;
+	default:
+		panic("findlocalip2: version %d", version);
+	}
+
+out:
+	qunlock(f->ipifc);
+}
+
+/*
+ *  return first v4 address associated with an interface
+ */
+int
+ipv4local(Ipifc *ifc, uchar *addr)
+{
+	Iplifc *lifc;
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(isv4(lifc->local)){
+			memmove(addr, lifc->local+IPv4off, IPv4addrlen);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ *  return first v6 address associated with an interface
+ */
+int
+ipv6local(Ipifc *ifc, uchar *addr)
+{
+	Iplifc *lifc;
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!isv4(lifc->local) && !(lifc->tentative)){
+			ipmove(addr, lifc->local);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int
+ipv6anylocal(Ipifc *ifc, uchar *addr)
+{
+	Iplifc *lifc;
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!isv4(lifc->local)){
+			ipmove(addr, lifc->local);
+			return SRC_UNI;
+		}
+	}
+	return SRC_UNSPEC;
+}
+
+/*
+ *  see if this address is bound to the interface
+ */
+Iplifc*
+iplocalonifc(Ipifc *ifc, uchar *ip)
+{
+	Iplifc *lifc;
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+		if(ipcmp(ip, lifc->local) == 0)
+			return lifc;
+	return nil;
+}
+
+
+/*
+ *  See if we're proxying for this address on this interface
+ */
+int
+ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
+{
+	Route *r;
+	uchar net[IPaddrlen];
+	Iplifc *lifc;
+
+	/* see if this is a direct connected pt to pt address */
+	r = v6lookup(f, ip, nil);
+	if(r == nil)
+		return 0;
+	if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
+		return 0;
+
+	/* see if this is on the right interface */
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		maskip(ip, lifc->mask, net);
+		if(ipcmp(net, lifc->remote) == 0)
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ *  return multicast version if any
+ */
+int
+ipismulticast(uchar *ip)
+{
+	if(isv4(ip)){
+		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+			return V4;
+	} else {
+		if(ip[0] == 0xff)
+			return V6;
+	}
+	return 0;
+}
+
+int
+ipisbm(uchar *ip)
+{
+	if(isv4(ip)){
+		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+			return V4;
+		if(ipcmp(ip, IPv4bcast) == 0)
+			return V4;
+	} else {
+		if(ip[0] == 0xff)
+			return V6;
+	}
+	return 0;
+}
+
+
+/*
+ *  add a multicast address to an interface, called with c locked
+ */
+void
+ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
+{
+	Ipifc *ifc;
+	Iplifc *lifc;
+	Conv **p;
+	Ipmulti *multi, **l;
+	Fs *f;
+
+	f = c->p->f;
+	
+	for(l = &c->multi; *l; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0)
+		if(ipcmp(ia, (*l)->ia) == 0)
+			return;		/* it's already there */
+
+	multi = *l = smalloc(sizeof(*multi));
+	ipmove(multi->ma, ma);
+	ipmove(multi->ia, ia);
+	multi->next = nil;
+
+	for(p = f->ipifc->conv; *p; p++){
+		if((*p)->inuse == 0)
+			continue;
+		ifc = (Ipifc*)(*p)->ptcl;
+		if(waserror()){
+			wunlock(ifc);
+			nexterror();
+		}
+		wlock(ifc);
+		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+			if(ipcmp(ia, lifc->local) == 0)
+				addselfcache(f, ifc, lifc, ma, Rmulti);
+		wunlock(ifc);
+		poperror();
+	}
+}
+
+
+/*
+ *  remove a multicast address from an interface, called with c locked
+ */
+void
+ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
+{
+	Ipmulti *multi, **l;
+	Iplifc *lifc;
+	Conv **p;
+	Ipifc *ifc;
+	Fs *f;
+
+	f = c->p->f;
+	
+	for(l = &c->multi; *l; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0)
+		if(ipcmp(ia, (*l)->ia) == 0)
+			break;
+
+	multi = *l;
+	if(multi == nil)
+		return; 	/* we don't have it open */
+
+	*l = multi->next;
+
+	for(p = f->ipifc->conv; *p; p++){
+		if((*p)->inuse == 0)
+			continue;
+
+		ifc = (Ipifc*)(*p)->ptcl;
+		if(waserror()){
+			wunlock(ifc);
+			nexterror();
+		}
+		wlock(ifc);
+		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+			if(ipcmp(ia, lifc->local) == 0)
+				remselfcache(f, ifc, lifc, ma);
+		wunlock(ifc);
+		poperror();
+	}
+
+	free(multi);
+}
+
+/*
+ *  make lifc's join and leave multicast groups
+ */
+static char*
+ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+{
+	USED(ifc, argv, argc);
+	return nil;
+}
+
+static char*
+ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
+{
+	USED(ifc, argv, argc);
+	return nil;
+}
+
+static void
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+{
+	Conv **cp, **e;
+	Ipifc *nifc;
+	Iplifc *lifc;
+	Medium *m;
+	uchar net[IPaddrlen];
+
+	/* register the address on any network that will proxy for us */
+	e = &f->ipifc->conv[f->ipifc->nc];
+
+	if(!isv4(ip)) { // V6
+		for(cp = f->ipifc->conv; cp < e; cp++){
+			if(*cp == nil)
+				continue;
+			nifc = (Ipifc*)(*cp)->ptcl;
+			if(nifc == ifc)
+				continue;
+	
+			rlock(nifc);
+			m = nifc->m;
+			if(m == nil || m->addmulti == nil) {
+				runlock(nifc);
+				continue;
+			}
+			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+				maskip(ip, lifc->mask, net);
+				if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
+					ipv62smcast(net, ip);
+					addselfcache(f, nifc, lifc, net, Rmulti);
+					arpenter(f, V6, ip, nifc->mac, 6, 0);
+					//(*m->addmulti)(nifc, net, ip);
+					break;
+				}
+			}
+			runlock(nifc);
+		}
+		return;
+	}
+	else { // V4
+		for(cp = f->ipifc->conv; cp < e; cp++){
+			if(*cp == nil)
+				continue;
+			nifc = (Ipifc*)(*cp)->ptcl;
+			if(nifc == ifc)
+				continue;
+	
+			rlock(nifc);
+			m = nifc->m;
+			if(m == nil || m->areg == nil){
+				runlock(nifc);
+				continue;
+			}
+			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+				maskip(ip, lifc->mask, net);
+				if(ipcmp(net, lifc->remote) == 0){
+					(*m->areg)(nifc, ip);
+					break;
+				}
+			}
+			runlock(nifc);
+		}
+	}
+}
+
+
+// added for new v6 mesg types
+static void
+adddefroute6(Fs *f, uchar *gate, int force)
+{
+	Route *r;
+
+	r = v6lookup(f, v6Unspecified, nil);
+	if(r!=nil)
+	if(!(force) && (strcmp(r->tag,"ra")!=0))	// route entries generated
+		return;			// by all other means take
+					// precedence over router annc
+
+	v6delroute(f, v6Unspecified, v6Unspecified, 1);
+	v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
+}
+
+enum
+{
+	Ngates = 3,
+};
+
+char*
+ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+{
+	uchar	onlink = 1;
+	uchar	autoflag = 1;
+	long 	validlt = 0xffffffff;
+	long 	preflt = 0xffffffff;
+	long	origint = NOW / 10^3;
+	uchar	prefix[IPaddrlen];
+	int	plen = 64;
+	Iplifc	*lifc;
+	char	addr[40], preflen[6];
+	char	*params[3];
+
+	switch(argc) {
+	case 7:
+		preflt = atoi(argv[6]);
+		/* fall through */
+	case 6:
+		validlt = atoi(argv[5]);
+		/* fall through */
+	case 5:
+		autoflag =  atoi(argv[4]);
+		/* fall through */
+	case 4:
+		onlink = atoi(argv[3]);
+		/* fall through */
+	case 3:
+		plen = atoi(argv[2]);
+	case 2:
+		break;
+	default:
+		return Ebadarg;
+	}
+
+	if((parseip(prefix, argv[1])!=6) ||
+	 	(validlt < preflt) ||
+		(plen < 0) || (plen > 64) ||
+		(islinklocal(prefix))
+	)
+		return Ebadarg;
+
+	lifc = smalloc(sizeof(Iplifc));
+	lifc->onlink = (onlink!=0);
+	lifc->autoflag = (autoflag!=0);
+	lifc->validlt = validlt;
+	lifc->preflt = preflt;
+	lifc->origint = origint;
+
+	if(ifc->m->pref2addr!=nil)
+		ifc->m->pref2addr(prefix, ifc->mac);
+	else
+		return Ebadarg;
+	
+	sprint(addr, "%I", prefix);
+	sprint(preflen, "/%d", plen);
+	params[0] = "add";
+	params[1] = addr;
+	params[2] = preflen;
+
+	return ipifcadd(ifc, params, 3, 0, lifc);
+}
+
--- /dev/null
+++ b/os/ip.original/ipmux.c
@@ -1,0 +1,839 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#define DPRINT if(0)print
+
+typedef struct Ipmuxrock  Ipmuxrock;
+typedef struct Ipmux      Ipmux;
+typedef struct Ip6hdr     Ip6hdr;
+
+enum
+{
+	IPHDR		= 20,		/* sizeof(Ip4hdr) */
+};
+
+struct Ip6hdr
+{
+	uchar vcf[4];		/* version, class label, and flow label */ 
+	uchar ploadlen[2];	/* payload length */
+	uchar proto;		/* next header, i.e. proto */
+	uchar ttl;		/* hop limit, i.e. ttl */
+	uchar src[16];		/* IP source */
+	uchar dst[16];		/* IP destination */
+};
+
+
+enum
+{
+	Tproto,
+	Tdata,
+	Tiph,
+	Tdst,
+	Tsrc,
+	Tifc,
+
+	Cother = 0,
+	Cbyte,		/* single byte */
+	Cmbyte,		/* single byte with mask */
+	Cshort,		/* single short */
+	Cmshort,	/* single short with mask */
+	Clong,		/* single long */
+	Cmlong,		/* single long with mask */
+	Cifc,
+	Cmifc,
+};
+
+char *ftname[] = 
+{
+[Tproto]	"proto",
+[Tdata]		"data",
+[Tiph]	 	"iph",
+[Tdst]		"dst",
+[Tsrc]		"src",
+[Tifc]		"ifc",
+};
+
+/*
+ *  a node in the decision tree
+ */
+struct Ipmux
+{
+	Ipmux	*yes;
+	Ipmux	*no;
+	uchar	type;		/* type of field(Txxxx) */
+	uchar	ctype;		/* tupe of comparison(Cxxxx) */
+	uchar	len;		/* length in bytes of item to compare */
+	uchar	n;		/* number of items val points to */
+	short	off;		/* offset of comparison */
+	short	eoff;		/* end offset of comparison */
+	uchar	skiphdr;	/* should offset start after ipheader */
+	uchar	*val;
+	uchar	*mask;
+	uchar	*e;		/* val+n*len*/
+
+	int	ref;		/* so we can garbage collect */
+	Conv	*conv;
+};
+
+/*
+ *  someplace to hold per conversation data
+ */
+struct Ipmuxrock
+{
+	Ipmux	*chain;
+};
+
+static int	ipmuxsprint(Ipmux*, int, char*, int);
+static void	ipmuxkick(void *x);
+
+static char*
+skipwhite(char *p)
+{
+	while(*p == ' ' || *p == '\t')
+		p++;
+	return p;
+}
+
+static char*
+follows(char *p, char c)
+{
+	char *f;
+
+	f = strchr(p, c);
+	if(f == nil)
+		return nil;
+	*f++ = 0;
+	f = skipwhite(f);
+	if(*f == 0)
+		return nil;
+	return f;
+}
+
+static Ipmux*
+parseop(char **pp)
+{
+	char *p = *pp;
+	int type, off, end, len;
+	Ipmux *f;
+
+	p = skipwhite(p);
+	if(strncmp(p, "dst", 3) == 0){
+		type = Tdst;
+		off = offsetof(Ip4hdr, dst[0]);
+		len = IPv4addrlen;
+		p += 3;
+	}
+	else if(strncmp(p, "src", 3) == 0){
+		type = Tsrc;
+		off = offsetof(Ip4hdr, src[0]);
+		len = IPv4addrlen;
+		p += 3;
+	}
+	else if(strncmp(p, "ifc", 3) == 0){
+		type = Tifc;
+		off = -IPv4addrlen;
+		len = IPv4addrlen;
+		p += 3;
+	}
+	else if(strncmp(p, "proto", 5) == 0){
+		type = Tproto;
+		off = offsetof(Ip4hdr, proto);
+		len = 1;
+		p += 5;
+	}
+	else if(strncmp(p, "data", 4) == 0 || strncmp(p, "iph", 3) == 0){
+		if(strncmp(p, "data", 4) == 0) {
+			type = Tdata;
+			p += 4;
+		}
+		else {
+			type = Tiph;
+			p += 3;
+		}
+		p = skipwhite(p);
+		if(*p != '[')
+			return nil;
+		p++;
+		off = strtoul(p, &p, 0);
+		if(off < 0 || off > (64-IPHDR))
+			return nil;
+		p = skipwhite(p);
+		if(*p != ':')
+			end = off;
+		else {
+			p++;
+			p = skipwhite(p);
+			end = strtoul(p, &p, 0);
+			if(end < off)
+				return nil;
+			p = skipwhite(p);
+		}
+		if(*p != ']')
+			return nil;
+		p++;
+		len = end - off + 1;
+	}
+	else
+		return nil;
+
+	f = smalloc(sizeof(*f));
+	f->type = type;
+	f->len = len;
+	f->off = off;
+	f->val = nil;
+	f->mask = nil;
+	f->n = 1;
+	f->ref = 1;
+	if(type == Tdata)
+		f->skiphdr = 1;
+	else
+		f->skiphdr = 0;
+
+	return f;	
+}
+
+static int
+htoi(char x)
+{
+	if(x >= '0' && x <= '9')
+		x -= '0';
+	else if(x >= 'a' && x <= 'f')
+		x -= 'a' - 10;
+	else if(x >= 'A' && x <= 'F')
+		x -= 'A' - 10;
+	else
+		x = 0;
+	return x;
+}
+
+static int
+hextoi(char *p)
+{
+	return (htoi(p[0])<<4) | htoi(p[1]);
+}
+
+static void
+parseval(uchar *v, char *p, int len)
+{
+	while(*p && len-- > 0){
+		*v++ = hextoi(p);
+		p += 2;
+	}
+}
+
+static Ipmux*
+parsemux(char *p)
+{
+	int n, nomask;
+	Ipmux *f;
+	char *val;
+	char *mask;
+	char *vals[20];
+	uchar *v;
+
+	/* parse operand */
+	f = parseop(&p);
+	if(f == nil)
+		return nil;
+
+	/* find value */
+	val = follows(p, '=');
+	if(val == nil)
+		goto parseerror;
+
+	/* parse mask */
+	mask = follows(val, '&');
+	if(mask != nil){
+		switch(f->type){
+		case Tsrc:
+		case Tdst:
+		case Tifc:
+			f->mask = smalloc(f->len);
+			v4parseip(f->mask, mask);
+			break;
+		case Tdata:
+		case Tiph:
+			f->mask = smalloc(f->len);
+			parseval(f->mask, mask, f->len);
+			break;
+		default:
+			goto parseerror;
+		}
+		nomask = 0;
+	} else {
+		nomask = 1;
+		f->mask = smalloc(f->len);
+		memset(f->mask, 0xff, f->len);
+	}
+
+	/* parse vals */
+	f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+	if(f->n == 0)
+		goto parseerror;
+	f->val = smalloc(f->n*f->len);
+	v = f->val;
+	for(n = 0; n < f->n; n++){
+		switch(f->type){
+		case Tsrc:
+		case Tdst:
+		case Tifc:
+			v4parseip(v, vals[n]);
+			break;
+		case Tproto:
+		case Tdata:
+		case Tiph:
+			parseval(v, vals[n], f->len);
+			break;
+		}
+		v += f->len;
+	}
+
+	f->eoff = f->off + f->len;
+	f->e = f->val + f->n*f->len;
+	f->ctype = Cother;
+	if(f->n == 1){
+		switch(f->len){
+		case 1:
+			f->ctype = nomask ? Cbyte : Cmbyte;
+			break;
+		case 2:
+			f->ctype = nomask ? Cshort : Cmshort;
+			break;
+		case 4:
+			if(f->type == Tifc)
+				f->ctype = nomask ? Cifc : Cmifc;
+			else
+				f->ctype = nomask ? Clong : Cmlong;
+			break;
+		}
+	}
+	return f;
+
+parseerror:
+	if(f->mask)
+		free(f->mask);
+	if(f->val)
+		free(f->val);
+	free(f);
+	return nil;
+}
+
+/*
+ *  Compare relative ordering of two ipmuxs.  This doesn't compare the
+ *  values, just the fields being looked at.  
+ *
+ *  returns:	<0 if a is a more specific match
+ *		 0 if a and b are matching on the same fields
+ *		>0 if b is a more specific match
+ */
+static int
+ipmuxcmp(Ipmux *a, Ipmux *b)
+{
+	int n;
+
+	/* compare types, lesser ones are more important */
+	n = a->type - b->type;
+	if(n != 0)
+		return n;
+
+	/* compare offsets, call earlier ones more specific */
+	n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) - 
+		(b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+	if(n != 0)
+		return n;
+
+	/* compare match lengths, longer ones are more specific */
+	n = b->len - a->len;
+	if(n != 0)
+		return n;
+
+	/*
+	 *  if we get here we have two entries matching
+	 *  the same bytes of the record.  Now check
+	 *  the mask for equality.  Longer masks are
+	 *  more specific.
+	 */
+	if(a->mask != nil && b->mask == nil)
+		return -1;
+	if(a->mask == nil && b->mask != nil)
+		return 1;
+	if(a->mask != nil && b->mask != nil){
+		n = memcmp(b->mask, a->mask, a->len);
+		if(n != 0)
+			return n;
+	}
+	return 0;
+}
+
+/*
+ *  Compare the values of two ipmuxs.  We're assuming that ipmuxcmp
+ *  returned 0 comparing them.
+ */
+static int
+ipmuxvalcmp(Ipmux *a, Ipmux *b)
+{
+	int n;
+
+	n = b->len*b->n - a->len*a->n;
+	if(n != 0)
+		return n;
+	return memcmp(a->val, b->val, a->len*a->n);
+} 
+
+/*
+ *  add onto an existing ipmux chain in the canonical comparison
+ *  order
+ */
+static void
+ipmuxchain(Ipmux **l, Ipmux *f)
+{
+	for(; *l; l = &(*l)->yes)
+		if(ipmuxcmp(f, *l) < 0)
+			break;
+	f->yes = *l;
+	*l = f;
+}
+
+/*
+ *  copy a tree
+ */
+static Ipmux*
+ipmuxcopy(Ipmux *f)
+{
+	Ipmux *nf;
+
+	if(f == nil)
+		return nil;
+	nf = smalloc(sizeof *nf);
+	*nf = *f;
+	nf->no = ipmuxcopy(f->no);
+	nf->yes = ipmuxcopy(f->yes);
+	nf->val = smalloc(f->n*f->len);
+	nf->e = nf->val + f->len*f->n;
+	memmove(nf->val, f->val, f->n*f->len);
+	return nf;
+}
+
+static void
+ipmuxfree(Ipmux *f)
+{
+	if(f->val != nil)
+		free(f->val);
+	free(f);
+}
+
+static void
+ipmuxtreefree(Ipmux *f)
+{
+	if(f == nil)
+		return;
+	if(f->no != nil)
+		ipmuxfree(f->no);
+	if(f->yes != nil)
+		ipmuxfree(f->yes);
+	ipmuxfree(f);
+}
+
+/*
+ *  merge two trees
+ */
+static Ipmux*
+ipmuxmerge(Ipmux *a, Ipmux *b)
+{
+	int n;
+	Ipmux *f;
+
+	if(a == nil)
+		return b;
+	if(b == nil)
+		return a;
+	n = ipmuxcmp(a, b);
+	if(n < 0){
+		f = ipmuxcopy(b);
+		a->yes = ipmuxmerge(a->yes, b);
+		a->no = ipmuxmerge(a->no, f);
+		return a;
+	}
+	if(n > 0){
+		f = ipmuxcopy(a);
+		b->yes = ipmuxmerge(b->yes, a);
+		b->no = ipmuxmerge(b->no, f);
+		return b;
+	}
+	if(ipmuxvalcmp(a, b) == 0){
+		a->yes = ipmuxmerge(a->yes, b->yes);
+		a->no = ipmuxmerge(a->no, b->no);
+		a->ref++;
+		ipmuxfree(b);
+		return a;
+	}
+	a->no = ipmuxmerge(a->no, b);
+	return a;
+}
+
+/*
+ *  remove a chain from a demux tree.  This is like merging accept that
+ *  we remove instead of insert.
+ */
+static int
+ipmuxremove(Ipmux **l, Ipmux *f)
+{
+	int n, rv;
+	Ipmux *ft;
+
+	if(f == nil)
+		return 0;		/* we've removed it all */
+	if(*l == nil)
+		return -1;
+
+	ft = *l;
+	n = ipmuxcmp(ft, f);
+	if(n < 0){
+		/* *l is maching an earlier field, descend both paths */
+		rv = ipmuxremove(&ft->yes, f);
+		rv += ipmuxremove(&ft->no, f);
+		return rv;
+	}
+	if(n > 0){
+		/* f represents an earlier field than *l, this should be impossible */
+		return -1;
+	}
+
+	/* if we get here f and *l are comparing the same fields */
+	if(ipmuxvalcmp(ft, f) != 0){
+		/* different values mean mutually exclusive */
+		return ipmuxremove(&ft->no, f);
+	}
+
+	/* we found a match */
+	if(--(ft->ref) == 0){
+		/*
+		 *  a dead node implies the whole yes side is also dead.
+		 *  since our chain is constrained to be on that side,
+		 *  we're done.
+		 */
+		ipmuxtreefree(ft->yes);
+		*l = ft->no;
+		ipmuxfree(ft);
+		return 0;
+	}
+
+	/*
+	 *  free the rest of the chain.  it is constrained to match the
+	 *  yes side.
+	 */
+	return ipmuxremove(&ft->yes, f->yes);
+}
+
+/*
+ *  connection request is a semi separated list of filters
+ *  e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *
+ *  there's no protection against overlapping specs.
+ */
+static char*
+ipmuxconnect(Conv *c, char **argv, int argc)
+{
+	int i, n;
+	char *field[10];
+	Ipmux *mux, *chain;
+	Ipmuxrock *r;
+	Fs *f;
+
+	f = c->p->f;
+
+	if(argc != 2)
+		return Ebadarg;
+
+	n = getfields(argv[1], field, nelem(field), 1, ";");
+	if(n <= 0)
+		return Ebadarg;
+
+	chain = nil;
+	mux = nil;
+	for(i = 0; i < n; i++){
+		mux = parsemux(field[i]);
+		if(mux == nil){
+			ipmuxtreefree(chain);
+			return Ebadarg;
+		}
+		ipmuxchain(&chain, mux);
+	}
+	if(chain == nil)
+		return Ebadarg;
+	mux->conv = c;
+
+	/* save a copy of the chain so we can later remove it */
+	mux = ipmuxcopy(chain);
+	r = (Ipmuxrock*)(c->ptcl);
+	r->chain = chain;
+
+	/* add the chain to the protocol demultiplexor tree */
+	wlock(f);
+	f->ipmux->priv = ipmuxmerge(f->ipmux->priv, mux);
+	wunlock(f);
+
+	Fsconnected(c, nil);
+	return nil;
+}
+
+static int
+ipmuxstate(Conv *c, char *state, int n)
+{
+	Ipmuxrock *r;
+	
+	r = (Ipmuxrock*)(c->ptcl);
+	return ipmuxsprint(r->chain, 0, state, n);
+}
+
+static void
+ipmuxcreate(Conv *c)
+{
+	Ipmuxrock *r;
+
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->wq = qopen(64*1024, Qkick, ipmuxkick, c);
+	r = (Ipmuxrock*)(c->ptcl);
+	r->chain = nil;
+}
+
+static char*
+ipmuxannounce(Conv*, char**, int)
+{
+	return "ipmux does not support announce";
+}
+
+static void
+ipmuxclose(Conv *c)
+{
+	Ipmuxrock *r;
+	Fs *f = c->p->f;
+
+	r = (Ipmuxrock*)(c->ptcl);
+
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+	c->rport = 0;
+
+	wlock(f);
+	ipmuxremove(&(c->p->priv), r->chain);
+	wunlock(f);
+	ipmuxtreefree(r->chain);
+	r->chain = nil;
+}
+
+/*
+ *  takes a fully formed ip packet and just passes it down
+ *  the stack
+ */
+static void
+ipmuxkick(void *x)
+{
+	Conv *c = x;
+	Block *bp;
+
+	bp = qget(c->wq);
+	if(bp == nil)
+		return;
+	else {
+		Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
+		if((ih4->vihl)&0xF0 != 0x60)
+			ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
+		else {
+			Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
+			ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
+		}
+	}
+}
+
+static void
+ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
+{
+	int len, hl;
+	Fs *f = p->f;
+	uchar *m, *h, *v, *e, *ve, *hp;
+	Conv *c;
+	Ipmux *mux;
+	Ip4hdr *ip;
+	Ip6hdr *ip6;
+
+	ip = (Ip4hdr*)bp->rp;
+	hl = (ip->vihl&0x0F)<<2;
+
+	if(p->priv == nil)
+		goto nomatch;
+
+	h = bp->rp;
+	len = BLEN(bp);
+
+	/* run the v4 filter */
+	rlock(f);
+	c = nil;
+	mux = f->ipmux->priv;
+	while(mux != nil){
+		if(mux->eoff > len){
+			mux = mux->no;
+			continue;
+		}
+		hp = h + mux->off + ((int)mux->skiphdr)*hl;
+		switch(mux->ctype){
+		case Cbyte:
+			if(*mux->val == *hp)
+				goto yes;
+			break;
+		case Cmbyte:
+			if((*hp & *mux->mask) == *mux->val)
+				goto yes;
+			break;
+		case Cshort:
+			if(*((ushort*)mux->val) == *(ushort*)hp)
+				goto yes;
+			break;
+		case Cmshort:
+			if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
+				goto yes;
+			break;
+		case Clong:
+			if(*((ulong*)mux->val) == *(ulong*)hp)
+				goto yes;
+			break;
+		case Cmlong:
+			if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+				goto yes;
+			break;
+		case Cifc:
+			if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
+				goto yes;
+			break;
+		case Cmifc:
+			if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+				goto yes;
+			break;
+		default:
+			v = mux->val;
+			for(e = mux->e; v < e; v = ve){
+				m = mux->mask;
+				hp = h + mux->off;
+				for(ve = v + mux->len; v < ve; v++){
+					if((*hp++ & *m++) != *v)
+						break;
+				}
+				if(v == ve)
+					goto yes;
+			}
+		}
+		mux = mux->no;
+		continue;
+yes:
+		if(mux->conv != nil)
+			c = mux->conv;
+		mux = mux->yes;
+	}
+	runlock(f);
+
+	if(c != nil){
+		/* tack on interface address */
+		bp = padblock(bp, IPaddrlen);
+		ipmove(bp->rp, ifc->lifc->local);
+		bp = concatblock(bp);
+		if(bp != nil)
+			if(qpass(c->rq, bp) < 0)
+				print("Q");
+		return;
+	}
+
+nomatch:
+	/* doesn't match any filter, hand it to the specific protocol handler */
+	ip = (Ip4hdr*)bp->rp;
+	if((ip->vihl&0xF0)==0x40) {
+		p = f->t2p[ip->proto];
+	} else {
+		ip6 = (Ip6hdr*)bp->rp;
+		p = f->t2p[ip6->proto];
+	}
+	if(p && p->rcv)
+		(*p->rcv)(p, ifc, bp);
+	else
+		freeblist(bp);
+	return;
+}
+
+static int
+ipmuxsprint(Ipmux *mux, int level, char *buf, int len)
+{
+	int i, j, n;
+	uchar *v;
+
+	n = 0;
+	for(i = 0; i < level; i++)
+		n += snprint(buf+n, len-n, " ");
+	if(mux == nil){
+		n += snprint(buf+n, len-n, "\n");
+		return n;
+	}
+	n += snprint(buf+n, len-n, "h[%d:%d]&", 
+               mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])), 
+               mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
+	for(i = 0; i < mux->len; i++)
+		n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	n += snprint(buf+n, len-n, "=");
+	v = mux->val;
+	for(j = 0; j < mux->n; j++){
+		for(i = 0; i < mux->len; i++)
+			n += snprint(buf+n, len - n, "%2.2ux", *v++);
+		n += snprint(buf+n, len-n, "|");
+	}
+	n += snprint(buf+n, len-n, "\n");
+	level++;
+	n += ipmuxsprint(mux->no, level, buf+n, len-n);
+	n += ipmuxsprint(mux->yes, level, buf+n, len-n);
+	return n;
+}
+
+static int
+ipmuxstats(Proto *p, char *buf, int len)
+{
+	int n;
+	Fs *f = p->f;
+
+	rlock(f);
+	n = ipmuxsprint(p->priv, 0, buf, len);
+	runlock(f);
+
+	return n;
+}
+
+void
+ipmuxinit(Fs *f)
+{
+	Proto *ipmux;
+
+	ipmux = smalloc(sizeof(Proto));
+	ipmux->priv = nil;
+	ipmux->name = "ipmux";
+	ipmux->connect = ipmuxconnect;
+	ipmux->announce = ipmuxannounce;
+	ipmux->state = ipmuxstate;
+	ipmux->create = ipmuxcreate;
+	ipmux->close = ipmuxclose;
+	ipmux->rcv = ipmuxiput;
+	ipmux->ctl = nil;
+	ipmux->advise = nil;
+	ipmux->stats = ipmuxstats;
+	ipmux->ipproto = -1;
+	ipmux->nc = 64;
+	ipmux->ptclsize = sizeof(Ipmuxrock);
+
+	f->ipmux = ipmux;			/* hack for Fsrcvpcol */
+
+	Fsproto(f, ipmux);
+}
--- /dev/null
+++ b/os/ip.original/iproute.c
@@ -1,0 +1,852 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+
+static void	walkadd(Fs*, Route**, Route*);
+static void	addnode(Fs*, Route**, Route*);
+static void	calcd(Route*);
+
+/* these are used for all instances of IP */
+Route*	v4freelist;
+Route*	v6freelist;
+RWlock	routelock;
+ulong	v4routegeneration, v6routegeneration;
+
+static void
+freeroute(Route *r)
+{
+	Route **l;
+
+	r->left = nil;
+	r->right = nil;
+	if(r->type & Rv4)
+		l = &v4freelist;
+	else
+		l = &v6freelist;
+	r->mid = *l;
+	*l = r;
+}
+
+static Route*
+allocroute(int type)
+{
+	Route *r;
+	int n;
+	Route **l;
+
+	if(type & Rv4){
+		n = sizeof(RouteTree) + sizeof(V4route);
+		l = &v4freelist;
+	} else {
+		n = sizeof(RouteTree) + sizeof(V6route);
+		l = &v6freelist;
+	}
+
+	r = *l;
+	if(r != nil){
+		*l = r->mid;
+	} else {
+		r = malloc(n);
+		if(r == nil)
+			panic("out of routing nodes");
+	}
+	memset(r, 0, n);
+	r->type = type;
+	r->ifc = nil;
+	r->ref = 1;
+
+	return r;
+}
+
+static void
+addqueue(Route **q, Route *r)
+{
+	Route *l;
+
+	if(r == nil)
+		return;
+
+	l = allocroute(r->type);
+	l->mid = *q;
+	*q = l;
+	l->left = r;
+}
+
+/*
+ *   compare 2 v6 addresses
+ */
+static int
+lcmp(ulong *a, ulong *b)
+{
+	int i;
+
+	for(i = 0; i < IPllen; i++){
+		if(a[i] > b[i])
+			return 1;
+		if(a[i] < b[i])
+			return -1;
+	}
+	return 0;
+}
+
+/*
+ *  compare 2 v4 or v6 ranges
+ */
+enum
+{
+	Rpreceeds,
+	Rfollows,
+	Requals,
+	Rcontains,
+	Rcontained,
+};
+
+static int
+rangecompare(Route *a, Route *b)
+{
+	if(a->type & Rv4){
+		if(a->v4.endaddress < b->v4.address)
+			return Rpreceeds;
+
+		if(a->v4.address > b->v4.endaddress)
+			return Rfollows;
+
+		if(a->v4.address <= b->v4.address
+		&& a->v4.endaddress >= b->v4.endaddress){
+			if(a->v4.address == b->v4.address
+			&& a->v4.endaddress == b->v4.endaddress)
+				return Requals;
+			return Rcontains;
+		}
+		return Rcontained;
+	}
+
+	if(lcmp(a->v6.endaddress, b->v6.address) < 0)
+		return Rpreceeds;
+
+	if(lcmp(a->v6.address, b->v6.endaddress) > 0)
+		return Rfollows;
+
+	if(lcmp(a->v6.address, b->v6.address) <= 0
+	&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
+		if(lcmp(a->v6.address, b->v6.address) == 0
+		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
+				return Requals;
+		return Rcontains;
+	}
+
+	return Rcontained;
+}
+
+static void
+copygate(Route *old, Route *new)
+{
+	if(new->type & Rv4)
+		memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
+	else
+		memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+}
+
+/*
+ *  walk down a tree adding nodes back in
+ */
+static void
+walkadd(Fs *f, Route **root, Route *p)
+{
+	Route *l, *r;
+
+	l = p->left;
+	r = p->right;
+	p->left = 0;
+	p->right = 0;
+	addnode(f, root, p);
+	if(l)
+		walkadd(f, root, l);
+	if(r)
+		walkadd(f, root, r);
+}
+
+/*
+ *  calculate depth
+ */
+static void
+calcd(Route *p)
+{
+	Route *q;
+	int d;
+
+	if(p) {
+		d = 0;
+		q = p->left;
+		if(q)
+			d = q->depth;
+		q = p->right;
+		if(q && q->depth > d)
+			d = q->depth;
+		q = p->mid;
+		if(q && q->depth > d)
+			d = q->depth;
+		p->depth = d+1;
+	}
+}
+
+/*
+ *  balance the tree at the current node
+ */
+static void
+balancetree(Route **cur)
+{
+	Route *p, *l, *r;
+	int dl, dr;
+
+	/*
+	 * if left and right are
+	 * too out of balance,
+	 * rotate tree node
+	 */
+	p = *cur;
+	dl = 0; if(l = p->left) dl = l->depth;
+	dr = 0; if(r = p->right) dr = r->depth;
+
+	if(dl > dr+1) {
+		p->left = l->right;
+		l->right = p;
+		*cur = l;
+		calcd(p);
+		calcd(l);
+	} else
+	if(dr > dl+1) {
+		p->right = r->left;
+		r->left = p;
+		*cur = r;
+		calcd(p);
+		calcd(r);
+	} else
+		calcd(p);
+}
+
+/*
+ *  add a new node to the tree
+ */
+static void
+addnode(Fs *f, Route **cur, Route *new)
+{
+	Route *p;
+
+	p = *cur;
+	if(p == 0) {
+		*cur = new;
+		new->depth = 1;
+		return;
+	}
+
+	switch(rangecompare(new, p)){
+	case Rpreceeds:
+		addnode(f, &p->left, new);
+		break;
+	case Rfollows:
+		addnode(f, &p->right, new);
+		break;
+	case Rcontains:
+		/*
+		 *  if new node is superset
+		 *  of tree node,
+		 *  replace tree node and
+		 *  queue tree node to be
+		 *  merged into root.
+		 */
+		*cur = new;
+		new->depth = 1;
+		addqueue(&f->queue, p);
+		break;
+	case Requals:
+		/*
+		 *  supercede the old entry if the old one isn't
+		 *  a local interface.
+		 */
+		if((p->type & Rifc) == 0){
+			p->type = new->type;
+			p->ifcid = -1;
+			copygate(p, new);
+		} else if(new->type & Rifc)
+			p->ref++;
+		freeroute(new);
+		break;
+	case Rcontained:
+		addnode(f, &p->mid, new);
+		break;
+	}
+	
+	balancetree(cur);
+}
+
+#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
+
+void
+v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+	Route *p;
+	ulong sa;
+	ulong m;
+	ulong ea;
+	int h, eh;
+
+	m = nhgetl(mask);
+	sa = nhgetl(a) & m;
+	ea = sa | ~m;
+
+	eh = V4H(ea);
+	for(h=V4H(sa); h<=eh; h++) {
+		p = allocroute(Rv4 | type);
+		p->v4.address = sa;
+		p->v4.endaddress = ea;
+		memmove(p->v4.gate, gate, sizeof(p->v4.gate));
+		memmove(p->tag, tag, sizeof(p->tag));
+
+		wlock(&routelock);
+		addnode(f, &f->v4root[h], p);
+		while(p = f->queue) {
+			f->queue = p->mid;
+			walkadd(f, &f->v4root[h], p->left);
+			freeroute(p);
+		}
+		wunlock(&routelock);
+	}
+	v4routegeneration++;
+
+	ipifcaddroute(f, Rv4, a, mask, gate, type);
+}
+
+#define	V6H(a)	(((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
+#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+
+void
+v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+	Route *p;
+	ulong sa[IPllen], ea[IPllen];
+	ulong x, y;
+	int h, eh;
+
+	/*
+	if(ISDFLT(a, mask, tag))
+		f->v6p->cdrouter = -1;
+	*/
+
+
+	for(h = 0; h < IPllen; h++){
+		x = nhgetl(a+4*h);
+		y = nhgetl(mask+4*h);
+		sa[h] = x & y;
+		ea[h] = x | ~y;
+	}
+
+	eh = V6H(ea);
+	for(h = V6H(sa); h <= eh; h++) {
+		p = allocroute(type);
+		memmove(p->v6.address, sa, IPaddrlen);
+		memmove(p->v6.endaddress, ea, IPaddrlen);
+		memmove(p->v6.gate, gate, IPaddrlen);
+		memmove(p->tag, tag, sizeof(p->tag));
+
+		wlock(&routelock);
+		addnode(f, &f->v6root[h], p);
+		while(p = f->queue) {
+			f->queue = p->mid;
+			walkadd(f, &f->v6root[h], p->left);
+			freeroute(p);
+		}
+		wunlock(&routelock);
+	}
+	v6routegeneration++;
+
+	ipifcaddroute(f, 0, a, mask, gate, type);
+}
+
+Route**
+looknode(Route **cur, Route *r)
+{
+	Route *p;
+
+	for(;;){
+		p = *cur;
+		if(p == 0)
+			return 0;
+	
+		switch(rangecompare(r, p)){
+		case Rcontains:
+			return 0;
+		case Rpreceeds:
+			cur = &p->left;
+			break;
+		case Rfollows:
+			cur = &p->right;
+			break;
+		case Rcontained:
+			cur = &p->mid;
+			break;
+		case Requals:
+			return cur;
+		}
+	}
+}
+
+void
+v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+	Route **r, *p;
+	Route rt;
+	int h, eh;
+	ulong m;
+
+	m = nhgetl(mask);
+	rt.v4.address = nhgetl(a) & m;
+	rt.v4.endaddress = rt.v4.address | ~m;
+	rt.type = Rv4;
+
+	eh = V4H(rt.v4.endaddress);
+	for(h=V4H(rt.v4.address); h<=eh; h++) {
+		if(dolock)
+			wlock(&routelock);
+		r = looknode(&f->v4root[h], &rt);
+		if(r) {
+			p = *r;
+			if(--(p->ref) == 0){
+				*r = 0;
+				addqueue(&f->queue, p->left);
+				addqueue(&f->queue, p->mid);
+				addqueue(&f->queue, p->right);
+				freeroute(p);
+				while(p = f->queue) {
+					f->queue = p->mid;
+					walkadd(f, &f->v4root[h], p->left);
+					freeroute(p);
+				}
+			}
+		}
+		if(dolock)
+			wunlock(&routelock);
+	}
+	v4routegeneration++;
+
+	ipifcremroute(f, Rv4, a, mask);
+}
+
+void
+v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+	Route **r, *p;
+	Route rt;
+	int h, eh;
+	ulong x, y;
+
+	for(h = 0; h < IPllen; h++){
+		x = nhgetl(a+4*h);
+		y = nhgetl(mask+4*h);
+		rt.v6.address[h] = x & y;
+		rt.v6.endaddress[h] = x | ~y;
+	}
+	rt.type = 0;
+
+	eh = V6H(rt.v6.endaddress);
+	for(h=V6H(rt.v6.address); h<=eh; h++) {
+		if(dolock)
+			wlock(&routelock);
+		r = looknode(&f->v6root[h], &rt);
+		if(r) {
+			p = *r;
+			if(--(p->ref) == 0){
+				*r = 0;
+				addqueue(&f->queue, p->left);
+				addqueue(&f->queue, p->mid);
+				addqueue(&f->queue, p->right);
+				freeroute(p);
+				while(p = f->queue) {
+					f->queue = p->mid;
+					walkadd(f, &f->v6root[h], p->left);
+					freeroute(p);
+				}
+			}
+		}
+		if(dolock)
+			wunlock(&routelock);
+	}
+	v6routegeneration++;
+
+	ipifcremroute(f, 0, a, mask);
+}
+
+Route*
+v4lookup(Fs *f, uchar *a, Conv *c)
+{
+	Route *p, *q;
+	ulong la;
+	uchar gate[IPaddrlen];
+	Ipifc *ifc;
+
+	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
+		return c->r;
+
+	la = nhgetl(a);
+	q = nil;
+	for(p=f->v4root[V4H(la)]; p;)
+		if(la >= p->v4.address) {
+			if(la <= p->v4.endaddress) {
+				q = p;
+				p = p->mid;
+			} else
+				p = p->right;
+		} else
+			p = p->left;
+
+	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+		if(q->type & Rifc) {
+			hnputl(gate+IPv4off, q->v4.address);
+			memmove(gate, v4prefix, IPv4off);
+		} else
+			v4tov6(gate, q->v4.gate);
+		ifc = findipifc(f, gate, q->type);
+		if(ifc == nil)
+			return nil;
+		q->ifc = ifc;
+		q->ifcid = ifc->ifcid;
+	}
+
+	if(c != nil){
+		c->r = q;
+		c->rgen = v4routegeneration;
+	}
+
+	return q;
+}
+
+Route*
+v6lookup(Fs *f, uchar *a, Conv *c)
+{
+	Route *p, *q;
+	ulong la[IPllen];
+	int h;
+	ulong x, y;
+	uchar gate[IPaddrlen];
+	Ipifc *ifc;
+
+	if(memcmp(a, v4prefix, IPv4off) == 0){
+		q = v4lookup(f, a+IPv4off, c);
+		if(q != nil)
+			return q;
+	}
+
+	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
+		return c->r;
+
+	for(h = 0; h < IPllen; h++)
+		la[h] = nhgetl(a+4*h);
+
+	q = 0;
+	for(p=f->v6root[V6H(la)]; p;){
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.address[h];
+			if(x == y)
+				continue;
+			if(x < y){
+				p = p->left;
+				goto next;
+			}
+			break;
+		}
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.endaddress[h];
+			if(x == y)
+				continue;
+			if(x > y){
+				p = p->right;
+				goto next;
+			}
+			break;
+		}
+		q = p;
+		p = p->mid;
+next:		;
+	}
+
+	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+		if(q->type & Rifc) {
+			for(h = 0; h < IPllen; h++)
+				hnputl(gate+4*h, q->v6.address[h]);
+			ifc = findipifc(f, gate, q->type);
+		} else
+			ifc = findipifc(f, q->v6.gate, q->type);
+		if(ifc == nil)
+			return nil;
+		q->ifc = ifc;
+		q->ifcid = ifc->ifcid;
+	}
+	if(c != nil){
+		c->r = q;
+		c->rgen = v6routegeneration;
+	}
+	
+	return q;
+}
+
+void
+routetype(int type, char *p)
+{
+	memset(p, ' ', 4);
+	p[4] = 0;
+	if(type & Rv4)
+		*p++ = '4';
+	else
+		*p++ = '6';
+	if(type & Rifc)
+		*p++ = 'i';
+	if(type & Runi)
+		*p++ = 'u';
+	else if(type & Rbcast)
+		*p++ = 'b';
+	else if(type & Rmulti)
+		*p++ = 'm';
+	if(type & Rptpt)
+		*p = 'p';
+}
+
+char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
+
+void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+{
+	int i;
+
+	if(r->type & Rv4){
+		memmove(addr, v4prefix, IPv4off);
+		hnputl(addr+IPv4off, r->v4.address);
+		memset(mask, 0xff, IPv4off);
+		hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+		memmove(gate, v4prefix, IPv4off);
+		memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
+	} else {
+		for(i = 0; i < IPllen; i++){
+			hnputl(addr + 4*i, r->v6.address[i]);
+			hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+		}
+		memmove(gate, r->v6.gate, IPaddrlen);
+	}
+
+	routetype(r->type, t);
+
+	if(r->ifc)
+		*nifc = r->ifc->conv->x;
+	else
+		*nifc = -1;
+}
+
+/*
+ *  this code is not in rr to reduce stack size
+ */
+static void
+sprintroute(Route *r, Routewalk *rw)
+{
+	int nifc, n;
+	char t[5], *iname, ifbuf[5];
+	uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
+	char *p;
+
+	convroute(r, addr, mask, gate, t, &nifc);
+	iname = "-";
+	if(nifc != -1) {
+		iname = ifbuf;
+		snprint(ifbuf, sizeof ifbuf, "%d", nifc);
+	}
+	p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+	if(rw->o < 0){
+		n = p - rw->p;
+		if(n > -rw->o){
+			memmove(rw->p, rw->p-rw->o, n+rw->o);
+			rw->p = p + rw->o;
+		}
+		rw->o += n;
+	} else
+		rw->p = p;
+}
+
+/*
+ *  recurse descending tree, applying the function in Routewalk
+ */
+static int
+rr(Route *r, Routewalk *rw)
+{
+	int h;
+
+	if(rw->e <= rw->p)
+		return 0;
+	if(r == nil)
+		return 1;
+
+	if(rr(r->left, rw) == 0)
+		return 0;
+
+	if(r->type & Rv4)
+		h = V4H(r->v4.address);
+	else
+		h = V6H(r->v6.address);
+
+	if(h == rw->h)
+		rw->walk(r, rw);
+
+	if(rr(r->mid, rw) == 0)
+		return 0;
+
+	return rr(r->right, rw);
+}
+
+void
+ipwalkroutes(Fs *f, Routewalk *rw)
+{
+	rlock(&routelock);
+	if(rw->e > rw->p) {
+		for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
+			if(rr(f->v4root[rw->h], rw) == 0)
+				break;
+	}
+	if(rw->e > rw->p) {
+		for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
+			if(rr(f->v6root[rw->h], rw) == 0)
+				break;
+	}
+	runlock(&routelock);
+}
+
+long
+routeread(Fs *f, char *p, ulong offset, int n)
+{
+	Routewalk rw;
+
+	rw.p = p;
+	rw.e = p+n;
+	rw.o = -offset;
+	rw.walk = sprintroute;
+
+	ipwalkroutes(f, &rw);
+
+	return rw.p - p;
+}
+
+/*
+ *  this code is not in routeflush to reduce stack size
+ */
+void
+delroute(Fs *f, Route *r, int dolock)
+{
+	uchar addr[IPaddrlen];
+	uchar mask[IPaddrlen];
+	uchar gate[IPaddrlen];
+	char t[5];
+	int nifc;
+
+	convroute(r, addr, mask, gate, t, &nifc);
+	if(r->type & Rv4)
+		v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
+	else
+		v6delroute(f, addr, mask, dolock);
+}
+
+/*
+ *  recurse until one route is deleted
+ *    returns 0 if nothing is deleted, 1 otherwise
+ */
+int
+routeflush(Fs *f, Route *r, char *tag)
+{
+	if(r == nil)
+		return 0;
+	if(routeflush(f, r->mid, tag))
+		return 1;
+	if(routeflush(f, r->left, tag))
+		return 1;
+	if(routeflush(f, r->right, tag))
+		return 1;
+	if((r->type & Rifc) == 0){
+		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
+			delroute(f, r, 0);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+long
+routewrite(Fs *f, Chan *c, char *p, int n)
+{
+	int h, changed;
+	char *tag;
+	Cmdbuf *cb;
+	uchar addr[IPaddrlen];
+	uchar mask[IPaddrlen];
+	uchar gate[IPaddrlen];
+	IPaux *a, *na;
+
+	cb = parsecmd(p, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+
+	if(strcmp(cb->f[0], "flush") == 0){
+		tag = cb->f[1];
+		for(h = 0; h < nelem(f->v4root); h++)
+			for(changed = 1; changed;){
+				wlock(&routelock);
+				changed = routeflush(f, f->v4root[h], tag);
+				wunlock(&routelock);
+			}
+		for(h = 0; h < nelem(f->v6root); h++)
+			for(changed = 1; changed;){
+				wlock(&routelock);
+				changed = routeflush(f, f->v6root[h], tag);
+				wunlock(&routelock);
+			}
+	} else if(strcmp(cb->f[0], "remove") == 0){
+		if(cb->nf < 3)
+			error(Ebadarg);
+		parseip(addr, cb->f[1]);
+		parseipmask(mask, cb->f[2]);
+		if(memcmp(addr, v4prefix, IPv4off) == 0)
+			v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
+		else
+			v6delroute(f, addr, mask, 1);
+	} else if(strcmp(cb->f[0], "add") == 0){
+		if(cb->nf < 4)
+			error(Ebadarg);
+		parseip(addr, cb->f[1]);
+		parseipmask(mask, cb->f[2]);
+		parseip(gate, cb->f[3]);
+		tag = "none";
+		if(c != nil){
+			a = c->aux;
+			tag = a->tag;
+		}
+		if(memcmp(addr, v4prefix, IPv4off) == 0)
+			v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+		else
+			v6addroute(f, tag, addr, mask, gate, 0);
+	} else if(strcmp(cb->f[0], "tag") == 0) {
+		if(cb->nf < 2)
+			error(Ebadarg);
+
+		a = c->aux;
+		na = newipaux(a->owner, cb->f[1]);
+		c->aux = na;
+		free(a);
+	}
+
+	poperror();
+	free(cb);
+	return n;
+}
--- /dev/null
+++ b/os/ip.original/iprouter.c
@@ -1,0 +1,56 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"../ip/ip.h"
+
+IProuter iprouter;
+
+/*
+ *  User level routing.  Ip packets we don't know what to do with
+ *  come here.
+ */
+void
+useriprouter(Fs *f, Ipifc *ifc, Block *bp)
+{
+	qlock(&f->iprouter);
+	if(f->iprouter.q != nil){
+		bp = padblock(bp, IPaddrlen);
+		if(bp == nil)
+			return;
+		ipmove(bp->rp, ifc->lifc->local);
+		qpass(f->iprouter.q, bp);
+	}else
+		freeb(bp);
+	qunlock(&f->iprouter);
+}
+
+void
+iprouteropen(Fs *f)
+{
+	qlock(&f->iprouter);
+	f->iprouter.opens++;
+	if(f->iprouter.q == nil)
+		f->iprouter.q = qopen(64*1024, 0, 0, 0);
+	else if(f->iprouter.opens == 1)
+		qreopen(f->iprouter.q);
+	qunlock(&f->iprouter);
+}
+
+void
+iprouterclose(Fs *f)
+{
+	qlock(&f->iprouter);
+	f->iprouter.opens--;
+	if(f->iprouter.opens == 0)
+		qclose(f->iprouter.q);
+	qunlock(&f->iprouter);
+}
+
+long
+iprouterread(Fs *f, void *a, int n)
+{
+	return qread(f->iprouter.q, a, n);
+}
--- /dev/null
+++ b/os/ip.original/ipv6.c
@@ -1,0 +1,747 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+#include	"ipv6.h"
+
+enum
+{
+	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
+	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
+	IP_HLEN4	= 0x05,		/* Header length in words */
+	IP_DF		= 0x4000,	/* Don't fragment */
+	IP_MF		= 0x2000,	/* More fragments */
+	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
+	IP_MAX		= (32*1024),	/* Maximum Internet packet size */
+};
+
+#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
+#define BLKIPVER(xp)	(((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
+/*
+ * This sleazy macro is stolen shamelessly from ip.c, see comment there.
+ */
+#define BKFG(xp)	((Ipfrag*)((xp)->base))
+
+typedef struct	IP	IP;
+typedef struct	Fragment4	Fragment4;
+typedef struct	Fragment6	Fragment6;
+typedef struct	Ipfrag	Ipfrag;
+
+Block*		ip6reassemble(IP*, int, Block*, Ip6hdr*);
+void		ipfragfree6(IP*, Fragment6*);
+Fragment6*	ipfragallo6(IP*);
+static Block*		procxtns(IP *ip, Block *bp, int doreasm);
+int		unfraglen(Block *bp, uchar *nexthdr, int setfh);
+Block*		procopts(Block *bp);
+
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nstats,
+};
+
+static char *statnames[] =
+{
+[Forwarding]	"Forwarding",
+[DefaultTTL]	"DefaultTTL",
+[InReceives]	"InReceives",
+[InHdrErrors]	"InHdrErrors",
+[InAddrErrors]	"InAddrErrors",
+[ForwDatagrams]	"ForwDatagrams",
+[InUnknownProtos]	"InUnknownProtos",
+[InDiscards]	"InDiscards",
+[InDelivers]	"InDelivers",
+[OutRequests]	"OutRequests",
+[OutDiscards]	"OutDiscards",
+[OutNoRoutes]	"OutNoRoutes",
+[ReasmTimeout]	"ReasmTimeout",
+[ReasmReqds]	"ReasmReqds",
+[ReasmOKs]	"ReasmOKs",
+[ReasmFails]	"ReasmFails",
+[FragOKs]	"FragOKs",
+[FragFails]	"FragFails",
+[FragCreates]	"FragCreates",
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+	ulong		stats[Nstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
+int
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+	int tentative;
+	Ipifc *ifc;
+	uchar *gate, nexthdr;
+	Ip6hdr *eh;
+	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
+	Route *r, *sr;
+	Fraghdr6 fraghdr;
+	Block *xp, *nb;
+	IP *ip;
+	int rv = 0;
+
+	ip = f->ip;
+
+	/* Fill out the ip header */
+	eh = (Ip6hdr*)(bp->rp);
+
+	ip->stats[OutRequests]++;
+
+	/* Number of uchars in data and ip header to write */
+	len = blocklen(bp);
+	
+	tentative = iptentative(f, eh->src);
+	if(tentative){
+		netlog(f, Logip, "reject tx of packet with tentative src address\n");
+		goto free;
+	}
+
+	if(gating){
+		chunk = nhgets(eh->ploadlen);
+		if(chunk > len){
+			ip->stats[OutDiscards]++;
+			netlog(f, Logip, "short gated packet\n");
+			goto free;
+		}
+		if(chunk + IPV6HDR_LEN < len)
+			len = chunk + IPV6HDR_LEN;
+	}
+
+	if(len >= IP_MAX){
+//		print("len > IP_MAX, free\n");
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+		goto free;
+	}
+
+	r = v6lookup(f, eh->dst, c);
+	if(r == nil){
+//		print("no route for %I, src %I free\n", eh->dst, eh->src);
+		ip->stats[OutNoRoutes]++;
+		netlog(f, Logip, "no interface %I\n", eh->dst);
+		rv = -1;
+		goto free;
+	}
+
+	ifc = r->ifc;
+	if(r->type & (Rifc|Runi))
+		gate = eh->dst;
+	else
+	if(r->type & (Rbcast|Rmulti)) {
+		gate = eh->dst;
+		sr = v6lookup(f, eh->src, nil);
+		if(sr != nil && (sr->type & Runi))
+			ifc = sr->ifc;
+	}
+	else
+		gate = r->v6.gate;
+
+	if(!gating)
+		eh->vcf[0] = IP_VER6;
+	eh->ttl = ttl;
+	if(!gating) {
+		eh->vcf[0] |= (tos >> 4);
+		eh->vcf[1] = (tos << 4);
+	}
+
+	if(!canrlock(ifc)) {
+		goto free;
+	}
+
+	if(waserror()){
+		runlock(ifc);
+		nexterror();
+	}
+
+	if(ifc->m == nil) {
+		goto raise;
+	}
+
+	/* If we dont need to fragment just send it */
+	medialen = ifc->maxtu - ifc->m->hsize;
+	if(len <= medialen) {
+		hnputs(eh->ploadlen, len-IPV6HDR_LEN);
+		ifc->m->bwrite(ifc, bp, V6, gate);
+		runlock(ifc);
+		poperror();
+		return 0;
+	}
+
+	if(gating) 
+	if(ifc->reassemble <= 0) {
+
+		/* v6 intermediate nodes are not supposed to fragment pkts;
+		   we fragment if ifc->reassemble is turned on; an exception
+		   needed for nat.
+		 */
+
+		ip->stats[OutDiscards]++;
+		icmppkttoobig6(f, ifc, bp);
+		netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+		goto raise;
+	}
+		
+	/* start v6 fragmentation */
+	uflen = unfraglen(bp, &nexthdr, 1);
+	if(uflen > medialen) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+		goto raise;
+	}
+
+	flen = len - uflen;
+	seglen = (medialen - (uflen + IP6FHDR)) & ~7;
+	if(seglen < 8) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+		goto raise;
+	}
+
+	lid = incref(&ip->id6);
+	fraghdr.nexthdr = nexthdr;
+	fraghdr.res = 0;
+	hnputl(fraghdr.id, lid);
+
+	xp = bp;
+	offset = uflen;
+	while (xp != nil && offset && offset >= BLEN(xp)) {
+		offset -= BLEN(xp);
+		xp = xp->next;
+	}
+	xp->rp += offset;
+
+	fragoff = 0; 
+	morefrags = 1;
+
+	for(; fragoff < flen; fragoff += seglen) {
+		nb = allocb(uflen + IP6FHDR + seglen);
+
+		if(fragoff + seglen >= flen) {
+			seglen = flen - fragoff;
+			morefrags = 0;
+		}
+
+		hnputs(eh->ploadlen, seglen+IP6FHDR);
+		memmove(nb->wp, eh, uflen);
+		nb->wp += uflen;
+
+		hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+		fraghdr.offsetRM[1] |= morefrags;
+		memmove(nb->wp, &fraghdr, IP6FHDR);
+		nb->wp += IP6FHDR;
+
+		/* Copy data */
+		chunk = seglen;
+		while (chunk) {
+			if(!xp) {
+				ip->stats[OutDiscards]++;
+				ip->stats[FragFails]++;
+				freeblist(nb);
+				netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+				goto raise;
+			}
+			blklen = chunk;
+			if(BLEN(xp) < chunk)
+				blklen = BLEN(xp);
+			memmove(nb->wp, xp->rp, blklen);
+
+			nb->wp += blklen;
+			xp->rp += blklen;
+			chunk -= blklen;
+			if(xp->rp == xp->wp)
+				xp = xp->next; 
+		}
+
+		ifc->m->bwrite(ifc, nb, V6, gate);
+		ip->stats[FragCreates]++;
+	}
+	ip->stats[FragOKs]++;
+
+raise:
+	runlock(ifc);
+	poperror();
+free:
+	freeblist(bp);	
+	return rv;
+}
+
+void
+ipiput6(Fs *f, Ipifc *ifc, Block *bp)
+{
+	int hl;
+	int hop, tos;
+	uchar proto;
+	Ip6hdr *h;
+	Proto *p;
+	int notforme;
+	int tentative;
+	uchar v6dst[IPaddrlen];
+	IP *ip;
+	Route *r, *sr;
+
+	ip = f->ip;
+	ip->stats[InReceives]++;
+
+	/*
+	 *  Ensure we have all the header info in the first
+	 *  block.  Make life easier for other protocols by
+	 *  collecting up to the first 64 bytes in the first block.
+	 */
+	if(BLEN(bp) < 64) {
+		hl = blocklen(bp);
+		if(hl < IP6HDR)
+			hl = IP6HDR;
+		if(hl > 64)
+			hl = 64;
+		bp = pullupblock(bp, hl);
+		if(bp == nil)
+			return;
+	}
+
+	h = (Ip6hdr *)(bp->rp);
+
+	memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
+	notforme = ipforme(f, v6dst) == 0;
+	tentative = iptentative(f, v6dst);
+  
+	if(tentative && (h->proto != ICMPv6)) {
+		print("tentative addr, drop\n");
+		freeblist(bp);
+		return;
+	}
+
+	/* Check header version */
+	if(BLKIPVER(bp) != IP_VER6) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
+		freeblist(bp);
+		return;
+	}
+
+	/* route */
+	if(notforme) {
+		if(!ip->iprouting){
+			freeb(bp);
+			return;
+		}
+		/* don't forward to source's network */
+		sr = v6lookup(f, h->src, nil);
+		r = v6lookup(f, h->dst, nil);
+
+		if(r == nil || sr == r){
+			ip->stats[OutDiscards]++;
+			freeblist(bp);
+			return;
+		}
+
+		/* don't forward if packet has timed out */
+		hop = h->ttl;
+		if(hop < 1) {
+			ip->stats[InHdrErrors]++;
+			icmpttlexceeded6(f, ifc, bp);
+			freeblist(bp);
+			return;
+		}
+
+		/* process headers & reassemble if the interface expects it */
+		bp = procxtns(ip, bp, r->ifc->reassemble);
+
+		if(bp == nil)
+			return;
+
+		ip->stats[ForwDatagrams]++;
+		h = (Ip6hdr *) (bp->rp);
+		tos = IPV6CLASS(h);
+		hop = h->ttl;
+		ipoput6(f, bp, 1, hop-1, tos, nil);
+		return;
+	}
+
+	/* reassemble & process headers if needed */
+	bp = procxtns(ip, bp, 1);
+
+	if(bp == nil)
+		return;
+
+	h = (Ip6hdr *) (bp->rp);
+	proto = h->proto;
+	p = Fsrcvpcol(f, proto);
+	if(p != nil && p->rcv != nil) {
+		ip->stats[InDelivers]++;
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+
+	ip->stats[InDiscards]++;
+	ip->stats[InUnknownProtos]++;
+	freeblist(bp);
+}
+
+/*
+ * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
+ */
+void
+ipfragfree6(IP *ip, Fragment6 *frag)
+{
+	Fragment6 *fl, **l;
+
+	if(frag->blist)
+		freeblist(frag->blist);
+
+	memset(frag->src, 0, IPaddrlen);
+	frag->id = 0;
+	frag->blist = nil;
+
+	l = &ip->flisthead6;
+	for(fl = *l; fl; fl = fl->next) {
+		if(fl == frag) {
+			*l = frag->next;
+			break;
+		}
+		l = &fl->next;
+	}
+
+	frag->next = ip->fragfree6;
+	ip->fragfree6 = frag;
+
+}
+
+/*
+ * ipfragallo6 - copied from ipfragalloc4
+ */
+Fragment6*
+ipfragallo6(IP *ip)
+{
+	Fragment6 *f;
+
+	while(ip->fragfree6 == nil) {
+		/* free last entry on fraglist */
+		for(f = ip->flisthead6; f->next; f = f->next)
+			;
+		ipfragfree6(ip, f);
+	}
+	f = ip->fragfree6;
+	ip->fragfree6 = f->next;
+	f->next = ip->flisthead6;
+	ip->flisthead6 = f;
+	f->age = NOW + 30000;
+
+	return f;
+}
+
+static Block*
+procxtns(IP *ip, Block *bp, int doreasm) {
+
+	int offset;
+	uchar proto;
+	Ip6hdr *h;
+
+	h = (Ip6hdr *) (bp->rp);
+	offset = unfraglen(bp, &proto, 0);
+
+	if((proto == FH) && (doreasm != 0)) {
+		bp = ip6reassemble(ip, offset, bp, h);
+		if(bp == nil) 
+			return nil; 
+		offset = unfraglen(bp, &proto, 0);
+	}
+
+	if(proto == DOH || offset > IP6HDR) 
+		bp = procopts(bp);
+
+	return bp;
+}
+
+
+/*	returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ *	hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ *	of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ *	field of the last header in the "Unfragmentable part" is set to FH.
+ */
+int
+unfraglen(Block *bp, uchar *nexthdr, int setfh)
+{
+	uchar *p, *q;
+	int ufl, hs;
+
+	p = bp->rp;
+	q = p+6;	/* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+	*nexthdr = *q;
+	ufl = IP6HDR;
+	p += ufl;
+
+	for(;;) {
+		if(*nexthdr == HBH || *nexthdr == RH) {
+			*nexthdr = *p;
+			hs = ((int)*(p+1) + 1) * 8;
+			ufl += hs;
+			q = p;
+			p += hs;
+		}
+		else
+			break;
+	}
+
+	if(*nexthdr == FH)
+		*q = *p;
+
+	if(setfh)
+		*q = FH;
+
+	return ufl;
+}
+
+Block*
+procopts(Block *bp)
+{
+	return bp;
+}
+
+Block*
+ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+{
+
+	int fend, offset;
+	uint id;
+	Fragment6 *f, *fnext;
+	Fraghdr6 *fraghdr;
+	uchar src[IPaddrlen], dst[IPaddrlen];
+	Block *bl, **l, *last, *prev;
+	int ovlap, len, fragsize, pktposn;
+
+	fraghdr = (Fraghdr6 *) (bp->rp + uflen);
+	memmove(src, ih->src, IPaddrlen);
+	memmove(dst, ih->dst, IPaddrlen);
+	id = nhgetl(fraghdr->id);
+	offset = nhgets(fraghdr->offsetRM) & ~7;
+
+	/*
+	 *  block lists are too hard, pullupblock into a single block
+	 */
+	if(bp->next){
+		bp = pullupblock(bp, blocklen(bp));
+		ih = (Ip6hdr *)(bp->rp);
+	}
+
+
+	qlock(&ip->fraglock6);
+
+	/*
+	 *  find a reassembly queue for this fragment
+	 */
+	for(f = ip->flisthead6; f; f = fnext){
+		fnext = f->next;
+		if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+			break;
+		if(f->age < NOW){
+			ip->stats[ReasmTimeout]++;
+			ipfragfree6(ip, f);
+		}
+	}
+
+
+	/*
+	 *  if this isn't a fragmented packet, accept it
+	 *  and get rid of any fragments that might go
+	 *  with it.
+	 */
+	if(nhgets(fraghdr->offsetRM)==0) {	// first frag is also the last
+		if(f != nil) {
+			ipfragfree6(ip, f);
+			ip->stats[ReasmFails]++;
+		}
+		qunlock(&ip->fraglock6);
+		return bp;
+	}
+
+	if(bp->base+sizeof(Ipfrag) >= bp->rp){
+		bp = padblock(bp, sizeof(Ipfrag));
+		bp->rp += sizeof(Ipfrag);
+	}
+
+	BKFG(bp)->foff = offset;
+	BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+
+	/* First fragment allocates a reassembly queue */
+	if(f == nil) {
+		f = ipfragallo6(ip);
+		f->id = id;
+		memmove(f->src, src, IPaddrlen);
+		memmove(f->dst, dst, IPaddrlen);
+
+		f->blist = bp;
+
+		qunlock(&ip->fraglock6);
+		ip->stats[ReasmReqds]++;
+		return nil;
+	}
+
+	/*
+	 *  find the new fragment's position in the queue
+	 */
+	prev = nil;
+	l = &f->blist;
+	bl = f->blist;
+	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+		prev = bl;
+		l = &bl->next;
+		bl = bl->next;
+	}
+
+	/* Check overlap of a previous fragment - trim away as necessary */
+	if(prev) {
+		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+		if(ovlap > 0) {
+			if(ovlap >= BKFG(bp)->flen) {
+				freeblist(bp);
+				qunlock(&ip->fraglock6);
+				return nil;
+			}
+			BKFG(prev)->flen -= ovlap;
+		}
+	}
+
+	/* Link onto assembly queue */
+	bp->next = *l;
+	*l = bp;
+
+	/* Check to see if succeeding segments overlap */
+	if(bp->next) {
+		l = &bp->next;
+		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+
+		/* Take completely covered segments out */
+
+		while(*l) {
+			ovlap = fend - BKFG(*l)->foff;
+
+			if(ovlap <= 0) 
+				break; 
+			if(ovlap < BKFG(*l)->flen) {
+				BKFG(*l)->flen -= ovlap;
+				BKFG(*l)->foff += ovlap;
+				/* move up ih hdrs */
+				memmove((*l)->rp + ovlap, (*l)->rp, uflen);
+				(*l)->rp += ovlap;
+				break;
+			}
+			last = (*l)->next;
+			(*l)->next = nil;
+			freeblist(*l);
+			*l = last;
+		}
+	}
+
+	/*
+	 *  look for a complete packet.  if we get to a fragment
+	 *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
+	 */
+	pktposn = 0;
+	for(bl = f->blist; bl; bl = bl->next) {
+		if(BKFG(bl)->foff != pktposn)
+			break;
+	
+		fraghdr = (Fraghdr6 *) (bl->rp + uflen);
+		if((fraghdr->offsetRM[1] & 1) == 0) {
+
+			bl = f->blist;
+
+			/* get rid of frag header in first fragment */
+
+			memmove(bl->rp + IP6FHDR, bl->rp, uflen);
+			bl->rp += IP6FHDR;
+			len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
+			bl->wp = bl->rp + len + IP6HDR;
+
+			/* Pullup all the fragment headers and
+			 * return a complete packet
+			 */
+			for(bl = bl->next; bl; bl = bl->next) {
+				fragsize = BKFG(bl)->flen;
+				len += fragsize;
+				bl->rp += uflen + IP6FHDR;
+				bl->wp = bl->rp + fragsize;
+			}
+
+			bl = f->blist;
+			f->blist = nil;
+			ipfragfree6(ip, f);
+			ih = (Ip6hdr*)(bl->rp);
+			hnputs(ih->ploadlen, len);
+			qunlock(&ip->fraglock6);
+			ip->stats[ReasmOKs]++;
+			return bl;		
+		}
+		pktposn += BKFG(bl)->flen;
+	}
+	qunlock(&ip->fraglock6);
+	return nil;
+}
+
--- /dev/null
+++ b/os/ip.original/ipv6.h
@@ -1,0 +1,186 @@
+#undef MIN
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+/* rfc 3513 defines the address prefices */
+#define isv6mcast(addr)	  ((addr)[0] == 0xff)
+#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
+#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
+#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
+
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+
+/* from RFC 2460 */
+
+typedef struct Ip6hdr     Ip6hdr;
+typedef struct Opthdr     Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6    Fraghdr6;
+
+struct Ip6hdr {
+	uchar vcf[4];       	// version:4, traffic class:8, flow label:20
+	uchar ploadlen[2];  	// payload length: packet length - 40
+	uchar proto;		// next header type
+	uchar ttl;          	// hop limit
+	uchar src[IPaddrlen];
+	uchar dst[IPaddrlen];
+};
+
+struct Opthdr {
+	uchar nexthdr;
+	uchar len;
+};
+
+struct Routinghdr {
+	uchar nexthdr;
+	uchar len;
+	uchar rtetype;
+	uchar segrem;
+};
+
+struct Fraghdr6 {
+	uchar nexthdr;
+	uchar res;
+	uchar offsetRM[2];	// Offset, Res, M flag
+	uchar id[4];
+};
+
+
+enum {			/* Header Types */
+	HBH		= 0,	//?
+	ICMP		= 1,
+	IGMP		= 2,
+	GGP		= 3,
+	IPINIP		= 4,
+	ST		= 5,
+	TCP		= 6,
+	UDP		= 17,
+	ISO_TP4		= 29,
+	RH		= 43,
+	FH		= 44,
+	IDRP		= 45,
+	RSVP		= 46,
+	AH		= 51,
+	ESP		= 52,
+	ICMPv6		= 58,
+	NNH		= 59,
+	DOH		= 60,
+	ISO_IP		= 80,
+	IGRP		= 88,
+	OSPF		= 89,
+
+	Maxhdrtype	= 256,
+};
+
+
+enum {
+	//	multicast flgs and scop
+
+	well_known_flg				= 0,
+	transient_flg				= 1,
+
+	node_local_scop 			= 1,
+	link_local_scop 			= 2,
+	site_local_scop 			= 5,
+	org_local_scop				= 8,
+	global_scop				= 14,
+
+	//	various prefix lengths
+
+	SOLN_PREF_LEN				= 13,
+
+	//	icmpv6 unreach codes
+	icmp6_no_route				= 0,
+	icmp6_ad_prohib				= 1,
+	icmp6_unassigned			= 2,
+	icmp6_adr_unreach			= 3,
+	icmp6_port_unreach			= 4,
+	icmp6_unkn_code				= 5,
+
+	// 	various flags & constants
+
+	v6MINTU      				= 1280,
+	HOP_LIMIT    				= 255,
+	ETHERHDR_LEN 				= 14,
+	IPV6HDR_LEN  				= 40,
+	IPV4HDR_LEN  				= 20,
+
+	// 	option types
+
+	SRC_LLADDRESS    			= 1,
+	TARGET_LLADDRESS 			= 2,
+	PREFIX_INFO      			= 3,
+	REDIR_HEADER     			= 4,
+	MTU_OPTION       			= 5,
+
+	SRC_UNSPEC  				= 0,
+	SRC_UNI     				= 1,
+	TARG_UNI    				= 2,
+	TARG_MULTI  				= 3,
+
+	t_unitent   				= 1,
+	t_uniproxy  				= 2,
+	t_unirany   				= 3,
+
+	//	Router constants (all times in milliseconds)
+
+	MAX_INITIAL_RTR_ADVERT_INTERVAL 	= 16000,
+	MAX_INITIAL_RTR_ADVERTISEMENTS  	= 3,
+	MAX_FINAL_RTR_ADVERTISEMENTS    	= 3,
+	MIN_DELAY_BETWEEN_RAS 			= 3000,
+	MAX_RA_DELAY_TIME     			= 500,
+
+	//	Host constants
+
+	MAX_RTR_SOLICITATION_DELAY 		= 1000,
+	RTR_SOLICITATION_INTERVAL  		= 4000,
+	MAX_RTR_SOLICITATIONS      		= 3,
+
+	//	Node constants
+
+	MAX_MULTICAST_SOLICIT   		= 3,
+	MAX_UNICAST_SOLICIT     		= 3,
+	MAX_ANYCAST_DELAY_TIME  		= 1000,
+	MAX_NEIGHBOR_ADVERTISEMENT 		= 3,
+	REACHABLE_TIME 				= 30000,
+	RETRANS_TIMER  				= 1000,
+	DELAY_FIRST_PROBE_TIME 			= 5000,
+
+};
+
+extern void ipv62smcast(uchar *, uchar *);
+extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
+
+extern uchar v6allnodesN[IPaddrlen];
+extern uchar v6allnodesL[IPaddrlen];
+extern uchar v6allroutersN[IPaddrlen];
+extern uchar v6allroutersL[IPaddrlen];
+extern uchar v6allnodesNmask[IPaddrlen];
+extern uchar v6allnodesLmask[IPaddrlen];
+extern uchar v6allroutersS[IPaddrlen];
+extern uchar v6solicitednode[IPaddrlen];
+extern uchar v6solicitednodemask[IPaddrlen];
+extern uchar v6Unspecified[IPaddrlen];
+extern uchar v6loopback[IPaddrlen];
+extern uchar v6loopbackmask[IPaddrlen];
+extern uchar v6linklocal[IPaddrlen];
+extern uchar v6linklocalmask[IPaddrlen];
+extern uchar v6sitelocal[IPaddrlen];
+extern uchar v6sitelocalmask[IPaddrlen];
+extern uchar v6glunicast[IPaddrlen];
+extern uchar v6multicast[IPaddrlen];
+extern uchar v6multicastmask[IPaddrlen];
+
+extern int v6llpreflen;
+extern int v6slpreflen;
+extern int v6lbpreflen;
+extern int v6mcpreflen;
+extern int v6snpreflen;
+extern int v6aNpreflen;
+extern int v6aLpreflen;
+
+extern int ReTransTimer;
--- /dev/null
+++ b/os/ip.original/kernel.h
@@ -1,0 +1,10 @@
+extern	int	kclose(int);
+extern	int	kdial(char*, char*, char*, int*);
+extern	int	kannounce(char*, char*);
+extern	void	kerrstr(char*);
+extern	void	kgerrstr(char*);
+extern	int	kopen(char*, int);
+extern	long	kread(int, void*, long);
+extern	long	kseek(int, vlong, int);
+extern	long	kwrite(int, void*, long);
+extern	void	kwerrstr(char *, ...);
--- /dev/null
+++ b/os/ip.original/loopbackmedium.c
@@ -1,0 +1,121 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+enum
+{
+	Maxtu=	16*1024,
+};
+
+typedef struct LB LB;
+struct LB
+{
+	Proc	*readp;
+	Queue	*q;
+	Fs	*f;
+};
+
+static void loopbackread(void *a);
+
+static void
+loopbackbind(Ipifc *ifc, int, char**)
+{
+	LB *lb;
+
+	lb = smalloc(sizeof(*lb));
+	lb->f = ifc->conv->p->f;
+	/* TO DO: make queue size a function of kernel memory */
+	lb->q = qopen(128*1024, Qmsg, nil, nil);
+	ifc->arg = lb;
+	ifc->mbps = 1000;
+
+	kproc("loopbackread", loopbackread, ifc, 0);
+
+}
+
+static void
+loopbackunbind(Ipifc *ifc)
+{
+	LB *lb = ifc->arg;
+
+	if(lb->readp)
+		postnote(lb->readp, 1, "unbind", 0);
+
+	/* wait for reader to die */
+	while(lb->readp != 0)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	/* clean up */
+	qfree(lb->q);
+	free(lb);
+}
+
+static void
+loopbackbwrite(Ipifc *ifc, Block *bp, int, uchar*)
+{
+	LB *lb;
+
+	lb = ifc->arg;
+	if(qpass(lb->q, bp) < 0)
+		ifc->outerr++;
+	ifc->out++;
+}
+
+static void
+loopbackread(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	LB *lb;
+
+	ifc = a;
+	lb = ifc->arg;
+	lb->readp = up;	/* hide identity under a rock for unbind */
+	if(waserror()){
+		lb->readp = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		bp = qbread(lb->q, Maxtu);
+		if(bp == nil)
+			continue;
+		ifc->in++;
+		if(!canrlock(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			runlock(ifc);
+			nexterror();
+		}
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput4(lb->f, ifc, bp);
+		runlock(ifc);
+		poperror();
+	}
+}
+
+Medium loopbackmedium =
+{
+.hsize=		0,
+.mintu=		0,
+.maxtu=		Maxtu,
+.maclen=	0,
+.name=		"loopback",
+.bind=		loopbackbind,
+.unbind=	loopbackunbind,
+.bwrite=	loopbackbwrite,
+};
+
+void
+loopbackmediumlink(void)
+{
+	addipmedium(&loopbackmedium);
+}
--- /dev/null
+++ b/os/ip.original/nat.c
@@ -1,0 +1,549 @@
+#include		"u.h"
+#include		"../port/lib.h"
+#include		"mem.h"
+#include		"dat.h"
+#include		"fns.h"
+#include		"../port/error.h"
+
+#include		"ip.h"
+
+typedef struct NatProto NatProto;
+typedef struct NatAddr NatAddr;
+
+/*
+ * NAT.
+ */
+struct Nat
+{
+	uchar	src[IPv4addrlen];	/* Source address */
+	uchar	sport[2];		/* Source port */
+	uchar	lport[2];		/* Local port */
+	uchar	proto;			/* Protocol */
+	long	time;			/* Time */
+	Conv	*conv;			/* Conversation */
+	Nat	*next;			/* Next node */
+};
+
+/*
+ * Protocol list.
+ */
+struct NatProto
+{
+	uchar	proto;			/* Protocol */
+	int	sport;			/* Source port offset */
+	int	dport;			/* Destination port offset */
+	int	cksum;			/* Checksum offset */
+	int	timeout;		/* Timeout */
+};
+
+/*
+ * Address list.
+ */
+struct NatAddr
+{
+	uchar	src[IPaddrlen];		/* Source address */
+	uchar	mask[IPaddrlen];	/* Source address mask */
+	uchar	net[IPaddrlen];		/* Source network address */
+	Iplifc	*dst;			/* Destination interface */
+	NatAddr	*next;			/* Next node */
+};
+
+static Nat *head = nil;
+static NatAddr *addrhead = nil;
+
+/*
+ * Timeouts for ICMP, TCP and UDP are respectively confirmed
+ * in RFC 5508, RFC 5382 and RFC 4787.
+ */
+static NatProto prototab[] =
+{
+	{ 1, 4, 4, 2, 60*1000 },		/* ICMP */
+	{ 6, 0, 2, 16, (2*60*60+4*60)*1000 },	/* TCP */
+	{ 17, 0, 2, 6, 2*60*1000 },		/* UDP */
+	{ 40, 6, 8, 0, 10*30*1000 },		/* IL */
+	{ 255, 0, 2, 6, 2*60*1000 },		/* RUDP */
+	{ 0 }
+};
+
+NatProto*	parseproto(uchar);
+void		natprepend(Nat*);
+Nat*		natexistout(uchar*, uchar, uchar*);
+Nat*		natexistin(uchar, uchar*);
+int		natdelete(uchar*, uchar, uchar*);
+int		natpurge(uchar);
+Nat*		natlport(Proto*, Ip4hdr*, uchar*);
+int		natgc(uchar);
+void		checksumadjust(uchar*, uchar*, int, uchar*, int);
+Iplifc*		natonifco(Ipifc*, Ip4hdr*);
+Iplifc*		natonifci(Ipifc*);
+void		nataddrprepend(NatAddr*);
+NatAddr*	nataddrexist(uchar*, uchar*, Iplifc*);
+int		addnataddr(uchar*, uchar*, Iplifc*);
+int		removenataddr(uchar*, uchar*, Iplifc*);
+void		shownataddr(void);
+void		flushnataddr(void);
+
+/*
+ * Return protocol attributes if known.
+ */
+NatProto*
+parseproto(uchar proto)
+{
+	NatProto *np;
+
+	for(np = prototab; np->proto; np++)
+		if(proto == np->proto)
+			return np;
+
+	return nil;
+}
+
+/*
+ * Output NAT.
+ * Return -1 if the packet must be NATed but the protocol is unknown.
+ */
+int
+nato(Block *b, Ipifc *ifc, Fs *f)
+{
+	Nat *n;		/* NAT table */
+	NatProto *np;	/* Protocol list */
+	Iplifc *lifc;	/* Logical interface */
+	Ip4hdr *h;	/* Source IPv4 header */
+	Proto *p;	/* New protocol */
+	uchar *laddr;	/* Local address on Iplifc */
+	uchar *sport;	/* Source port */
+	uchar *cksum;	/* Source checksum */
+
+	h = (Ip4hdr*)(b->rp);
+
+	/* Verify on which logical interface NAT is enabled,
+           and if this source address must be translated */
+	if((lifc=natonifco(ifc, h)) == nil)
+		return 0;
+
+	laddr = lifc->local+IPv4off;
+	p = Fsrcvpcolx(f, h->proto);
+
+	if(ip4cmp(h->src, laddr) != 0){
+		if((np=parseproto(h->proto)) != nil){
+			/* Protocol layer */
+			sport = (b->rp)+sizeof(Ip4hdr)+np->sport;
+			cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
+			if((n = natlport(p, h, sport)) == nil)
+				return -1;
+			memmove(sport, n->lport, 2);
+			checksumadjust(cksum, n->sport, 2, n->lport, 2);
+			if(np->proto != 1)
+				/* ICMP checksum doesn't include IP header */
+				checksumadjust(cksum, n->src, IPv4addrlen,
+					laddr, IPv4addrlen);
+			/* IP layer */
+			ip4move(h->src, laddr);
+			checksumadjust(h->cksum, n->src, IPv4addrlen,
+				h->src, IPv4addrlen);
+			return 0;
+		}else{
+			netlog(f, Lognat, "nat: unknown protocol %d\n", h->proto);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Input NAT.
+ */
+void
+nati(Block *b, Ipifc *ifc)
+{
+	Nat *n;		/* NAT table */
+	NatProto *np;	/* Protocol list */
+	Ip4hdr *h;	/* Source IPv4 header */
+	uchar *lport;	/* Our local port, and dst port for the packet */
+	uchar *cksum;	/* Source checksum */
+
+	h = (Ip4hdr*)(b->rp);
+
+	/* Verify if NAT is enabled on this interface */
+	if(natonifci(ifc) == nil)
+		return;
+
+	if((np=parseproto(h->proto)) != nil){
+		lport = (b->rp)+sizeof(Ip4hdr)+np->dport;
+		if((n=natexistin(h->proto, lport)) != nil){
+			/* Protocol layer */
+			cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
+			checksumadjust(cksum, lport, 2, n->sport, 2);
+			memmove(lport, n->sport, 2);
+			if(np->proto != 1)
+				/* ICMP checksum doesn't include IP header */
+		   		checksumadjust(cksum, h->dst, IPv4addrlen,
+					n->src, IPv4addrlen);
+			/* IP layer */
+			checksumadjust(h->cksum, h->dst, IPv4addrlen,
+				n->src, IPv4addrlen);
+			ip4move(h->dst, n->src);
+		}
+	}
+}
+
+/*
+ * Add Nat to Nat list.
+ */
+void
+natprepend(Nat *n)
+{
+	n->next = head;
+	head = n;
+}
+
+/*
+ * Return Nat if it exists in Nat list.
+ */
+Nat*
+natexistout(uchar *src, uchar proto, uchar *sport)
+{
+	Nat *c;		/* Current node */
+
+	for(c=head; c!=nil; c=c->next)
+		if(ip4cmp(src, c->src) == 0 &&
+			memcmp(sport, c->sport, 2) == 0 &&
+			proto == c->proto){
+			c->time = NOW;
+			return c;
+		}
+
+	return nil;
+}
+
+/*
+ * Return Nat if it exists in Nat list.
+ */
+Nat*
+natexistin(uchar proto, uchar *lport)
+{
+	Nat *c;		/* Current node */
+
+	for(c=head; c!=nil; c=c->next)
+		if(memcmp(lport, c->lport, 2) == 0 &&
+			proto == c->proto){
+			c->time = NOW;
+			return c;
+		}
+
+	return nil;
+}
+
+/*
+ * Delete Nat in Nat list.
+ * Return -1 if it doesn't exist.
+ */
+int
+natdelete(uchar src[IPv4addrlen], uchar proto, uchar sport[2])
+{
+	Nat *p;		/* Precedent node */
+	Nat *c;		/* Current node */
+
+	for(p=nil, c=head; c!=nil; p=c, c=c->next)
+		if(ip4cmp(src, c->src) == 0 &&
+			memcmp(sport, c->sport, 2) == 0 &&
+			proto == c->proto)
+			break;
+
+	if(c == nil)
+		return -1;
+
+	if(p == nil)
+		head = head->next;
+	else
+		p->next = c->next;
+
+	closeconv(c->conv);
+	free(c);
+
+	return 0;
+}
+
+/*
+ * Purge Nat list.
+ */
+int
+natpurge(uchar proto)
+{
+	Nat *c;		/* Current node */
+	int n;		/* Number of purged connections */
+
+	for(n = 0;; n++){
+		do{
+			if((c = head) == nil)
+				return n;
+			head = head->next;
+		}while(c->proto != proto);
+		closeconv(c->conv);
+		free(c);
+	}
+}
+
+/*
+ * Create a new Nat if necessary.
+ */
+Nat*
+natlport(Proto *p, Ip4hdr *h, uchar *sport)
+{
+	Nat *n;		/* New NAT node */
+	Conv *s;	/* New conversation */
+
+	if((n=natexistout(h->src, h->proto, sport)) == nil){
+		qlock(p);
+		s = Fsprotoclone(p, "network");
+		qunlock(p);
+		if(s == nil){
+			error(Enodev);
+			return nil;
+		}
+		setlport(s);
+		n = malloc(sizeof(Nat));
+		ip4move(n->src, h->src);
+		memmove(n->sport, sport, 2);
+		memmove(n->lport, &s->lport, 2);
+		n->proto = h->proto;
+		n->time = NOW;
+		n->conv = s;
+		natprepend(n);
+	}
+
+	return n;
+}
+
+/*
+ * Nat list garbage collector.
+ */
+int
+natgc(uchar proto){
+	Nat *p;		/* Precedent node */
+	Nat *c;		/* Current node */
+	NatProto *np;	/* Protocol list */
+	int n;		/* Number of garbage collected connections */
+
+	n = 0;
+	p = nil;
+	c = head;
+
+	np = parseproto(proto);
+
+	while(c != nil){
+		if(NOW - c->time > np->timeout){
+ 			if(p == nil){
+ 				head = head->next;
+				if(proto == c->proto)
+					n++;
+				closeconv(c->conv);
+				free(c);
+				p = nil;
+				c = head;
+ 			}else{
+ 				p->next = c->next;
+				if(proto == c->proto)
+					n++;
+				closeconv(c->conv);
+				free(c);
+ 				c = p->next;
+ 			}
+		}else{
+			p = c;
+			c = c->next;
+		}
+	}
+
+	if(n == 0)	/* Prevent Conv saturation */
+		n = natpurge(proto);
+
+	return n;
+}
+
+/*
+ * Function checksumadjust from RFC 3022.
+ */
+void
+checksumadjust(uchar *chksum, uchar *optr, int olen, uchar *nptr, int nlen)
+{
+	long x, old, new;
+
+	x=chksum[0]*256+chksum[1];
+	x=~x & 0xffff;
+	while(olen){
+		old=optr[0]*256+optr[1];
+		optr+=2;
+		x-=old & 0xffff;
+		if(x<=0){
+			x--;
+			x&=0xffff;
+		}
+		olen-=2;
+	}
+	while(nlen){
+		new=nptr[0]*256+nptr[1];
+		nptr+=2;
+		x+=new & 0xffff;
+		if(x & 0x10000){
+			x++;
+			x&=0xffff;
+		}
+		nlen-=2;
+	}
+	x=~x & 0xffff;
+	chksum[0]=x/256;
+	chksum[1]=x & 0xff;
+}
+
+/*
+ * Add NatAddr to NatAddr list.
+ */
+void
+nataddrprepend(NatAddr *na)
+{
+	na->next = addrhead;
+	addrhead = na;
+}
+
+/*
+ * Return NatAddr if it exists in NatAddr list.
+ */
+NatAddr*
+nataddrexist(uchar *src, uchar *mask, Iplifc *dst)
+{
+	NatAddr *c;	/* Current node */
+
+	for(c=addrhead; c!=nil; c=c->next)
+		if(ipcmp(src, c->src) == 0 &&
+			ipcmp(mask, c->mask) == 0 &&
+			dst == c->dst)
+			return c;
+
+	return nil;
+}
+
+/*
+ * Create a new NatAddr.
+ * Return -1 if it already exist.
+ */
+int
+addnataddr(uchar *src, uchar *mask, Iplifc *dst)
+{
+	NatAddr *na;		/* New address node */
+	uchar net[IPaddrlen];	/* Network address */
+
+	maskip(src, mask, net);
+
+	if(nataddrexist(src, mask, dst) != nil)
+		return -1;
+
+	na = malloc(sizeof(NatAddr));
+	ipmove(na->src, src);
+	ipmove(na->mask, mask);
+	ipmove(na->net, net);
+	na->dst = dst;
+
+	nataddrprepend(na);
+
+	return 0;
+}
+
+/*
+ * Remove a NatAddr.
+ * Return -1 if it doesn't exist.
+ */
+int
+removenataddr(uchar *src, uchar *mask, Iplifc *dst)
+{
+	NatAddr *c;	/* Current node */
+	NatAddr *p;	/* Precedent node */
+
+	for(p=nil, c=addrhead; c!=nil; p=c, c=c->next)
+		if(ipcmp(src, c->src) == 0 &&
+			ipcmp(mask, c->mask) == 0 &&
+			dst == c->dst)
+			break;
+
+	if(c == nil)
+		return -1;
+
+	if(p == nil)
+		addrhead = addrhead->next;
+	else
+		p->next = c->next;
+
+	return 0;
+}
+
+/*
+ * Display NatAddr list.
+ */
+void
+shownataddr(void)
+{
+	NatAddr *c;	/* Current node */
+
+	for(c=addrhead; c!=nil; c=c->next)
+		print("%I %V %I\n", c->src, c->mask+IPv4off, c->dst->local);
+}
+
+/*
+ * Flush NatAddr list.
+ */
+void
+flushnataddr(void)
+{
+	NatAddr *c;	/* Current node */
+
+	while((c=addrhead) != nil){
+		addrhead = addrhead->next;
+		free(c);
+	}
+}
+
+/*
+ * Return logical interface if NAT is enabled on this interface,
+ * and the source address must be translated.
+ */
+Iplifc*
+natonifco(Ipifc *ifc, Ip4hdr* h)
+{
+	NatAddr *na;		/* Address list */
+	Iplifc *lifc;		/* Logical interface */
+	uchar src[IPaddrlen];	/* Source address */
+	uchar net[IPaddrlen];	/* Source network address */
+
+	for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
+		for(na=addrhead; na; na=na->next)
+			if(lifc == na->dst){
+				/* NAT enabled on this logical interface */
+				v4tov6(src, h->src);
+				maskip(src, na->mask, net);
+				if(ipcmp(net, na->net) == 0)
+					/* Source address must be translated */
+					return lifc;
+			}
+
+	return nil;
+}
+
+/*
+ * Return logical interface if NAT is enabled on this interface.
+ */
+Iplifc*
+natonifci(Ipifc *ifc)
+{
+	NatAddr *na;		/* Address list */
+	Iplifc *lifc;		/* Logical interface */
+
+	for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
+		for(na=addrhead; na; na=na->next)
+			if(lifc == na->dst){
+				/* NAT enabled on this logical interface */
+				return lifc;
+			}
+
+	return nil;
+}
--- /dev/null
+++ b/os/ip.original/netdevmedium.c
@@ -1,0 +1,153 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+static void	netdevbind(Ipifc *ifc, int argc, char **argv);
+static void	netdevunbind(Ipifc *ifc);
+static void	netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void	netdevread(void *a);
+
+typedef struct	Netdevrock Netdevrock;
+struct Netdevrock
+{
+	Fs	*f;		/* file system we belong to */
+	Proc	*readp;		/* reading process */
+	Chan	*mchan;		/* Data channel */
+};
+
+Medium netdevmedium =
+{
+.name=		"netdev",
+.hsize=		0,
+.mintu=	0,
+.maxtu=	64000,
+.maclen=	0,
+.bind=		netdevbind,
+.unbind=	netdevunbind,
+.bwrite=	netdevbwrite,
+.unbindonclose=	0,
+};
+
+/*
+ *  called to bind an IP ifc to a generic network device
+ *  called with ifc qlock'd
+ */
+static void
+netdevbind(Ipifc *ifc, int argc, char **argv)
+{
+	Chan *mchan;
+	Netdevrock *er;
+
+	if(argc < 2)
+		error(Ebadarg);
+
+	mchan = namec(argv[2], Aopen, ORDWR, 0);
+
+	er = smalloc(sizeof(*er));
+	er->mchan = mchan;
+	er->f = ifc->conv->p->f;
+
+	ifc->arg = er;
+
+	kproc("netdevread", netdevread, ifc, 0);
+}
+
+/*
+ *  called with ifc wlock'd
+ */
+static void
+netdevunbind(Ipifc *ifc)
+{
+	Netdevrock *er = ifc->arg;
+
+	if(er->readp != nil)
+		postnote(er->readp, 1, "unbind", 0);
+
+	/* wait for readers to die */
+	while(er->readp != nil)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->mchan != nil)
+		cclose(er->mchan);
+
+	free(er);
+}
+
+/*
+ *  called by ipoput with a single block to write
+ */
+static void
+netdevbwrite(Ipifc *ifc, Block *bp, int, uchar*)
+{
+	Netdevrock *er = ifc->arg;
+
+	if(bp->next)
+		bp = concatblock(bp);
+	if(BLEN(bp) < ifc->mintu)
+		bp = adjustblock(bp, ifc->mintu);
+
+	devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
+	ifc->out++;
+}
+
+/*
+ *  process to read from the device
+ */
+static void
+netdevread(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	Netdevrock *er;
+	char *argv[1];
+
+	ifc = a;
+	er = ifc->arg;
+	er->readp = up;	/* hide identity under a rock for unbind */
+	if(waserror()){
+		er->readp = nil;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
+		if(bp == nil){
+			/*
+			 * get here if mchan is a pipe and other side hangs up
+			 * clean up this interface & get out
+ZZZ is this a good idea?
+			 */
+			poperror();
+			er->readp = nil;
+			argv[0] = "unbind";
+			if(!waserror())
+				ifc->conv->p->ctl(ifc->conv, argv, 1);
+			pexit("hangup", 1);
+		}
+		if(!canrlock(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			runlock(ifc);
+			nexterror();
+		}
+		ifc->in++;
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput4(er->f, ifc, bp);
+		runlock(ifc);
+		poperror();
+	}
+}
+
+void
+netdevmediumlink(void)
+{
+	addipmedium(&netdevmedium);
+}
--- /dev/null
+++ b/os/ip.original/netlog.c
@@ -1,0 +1,263 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"../ip/ip.h"
+
+enum {
+	Nlog		= 4*1024,
+};
+
+/*
+ *  action log
+ */
+struct Netlog {
+	Lock;
+	int	opens;
+	char*	buf;
+	char	*end;
+	char	*rptr;
+	int	len;
+
+	int	logmask;			/* mask of things to debug */
+	uchar	iponly[IPaddrlen];		/* ip address to print debugging for */
+	int	iponlyset;
+
+	QLock;
+	Rendez;
+};
+
+typedef struct Netlogflag {
+	char*	name;
+	int	mask;
+} Netlogflag;
+
+static Netlogflag flags[] =
+{
+	{ "ppp",	Logppp, },
+	{ "ip",		Logip, },
+	{ "fs",		Logfs, },
+	{ "tcp",	Logtcp, },
+	{ "il",		Logil, },
+	{ "icmp",	Logicmp, },
+	{ "udp",	Logudp, },
+	{ "compress",	Logcompress, },
+	{ "ilmsg",	Logil|Logilmsg, },
+	{ "gre",	Loggre, },
+	{ "tcpwin",	Logtcp|Logtcpwin, },
+	{ "tcprxmt",	Logtcp|Logtcprxmt, },
+	{ "udpmsg",	Logudp|Logudpmsg, },
+	{ "ipmsg",	Logip|Logipmsg, },
+	{ "esp",	Logesp, },
+	{ nil,		0, },
+};
+
+char Ebadnetctl[] = "too few arguments for netlog control message";
+
+enum
+{
+	CMset,
+	CMclear,
+	CMonly,
+};
+
+static
+Cmdtab routecmd[] = {
+	CMset,		"set",		0,
+	CMclear,	"clear",	0,
+	CMonly,		"only",		0,
+};
+
+void
+netloginit(Fs *f)
+{
+	f->alog = smalloc(sizeof(Netlog));
+}
+
+void
+netlogopen(Fs *f)
+{
+	lock(f->alog);
+	if(waserror()){
+		unlock(f->alog);
+		nexterror();
+	}
+	if(f->alog->opens == 0){
+		if(f->alog->buf == nil)
+			f->alog->buf = malloc(Nlog);
+		f->alog->rptr = f->alog->buf;
+		f->alog->end = f->alog->buf + Nlog;
+	}
+	f->alog->opens++;
+	unlock(f->alog);
+	poperror();
+}
+
+void
+netlogclose(Fs *f)
+{
+	lock(f->alog);
+	if(waserror()){
+		unlock(f->alog);
+		nexterror();
+	}
+	f->alog->opens--;
+	if(f->alog->opens == 0){
+		free(f->alog->buf);
+		f->alog->buf = nil;
+	}
+	unlock(f->alog);
+	poperror();
+}
+
+static int
+netlogready(void *a)
+{
+	Fs *f = a;
+
+	return f->alog->len;
+}
+
+long
+netlogread(Fs *f, void *a, ulong, long n)
+{
+	int i, d;
+	char *p, *rptr;
+
+	qlock(f->alog);
+	if(waserror()){
+		qunlock(f->alog);
+		nexterror();
+	}
+
+	for(;;){
+		lock(f->alog);
+		if(f->alog->len){
+			if(n > f->alog->len)
+				n = f->alog->len;
+			d = 0;
+			rptr = f->alog->rptr;
+			f->alog->rptr += n;
+			if(f->alog->rptr >= f->alog->end){
+				d = f->alog->rptr - f->alog->end;
+				f->alog->rptr = f->alog->buf + d;
+			}
+			f->alog->len -= n;
+			unlock(f->alog);
+
+			i = n-d;
+			p = a;
+			memmove(p, rptr, i);
+			memmove(p+i, f->alog->buf, d);
+			break;
+		}
+		else
+			unlock(f->alog);
+
+		sleep(f->alog, netlogready, f);
+	}
+
+	qunlock(f->alog);
+	poperror();
+
+	return n;
+}
+
+void
+netlogctl(Fs *f, char* s, int n)
+{
+	int i, set;
+	Netlogflag *fp;
+	Cmdbuf *cb;
+	Cmdtab *ct;
+
+	cb = parsecmd(s, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+
+	if(cb->nf < 2)
+		error(Ebadnetctl);
+
+	ct = lookupcmd(cb, routecmd, nelem(routecmd));
+
+	SET(set);
+
+	switch(ct->index){
+	case CMset:
+		set = 1;
+		break;
+
+	case CMclear:
+		set = 0;
+		break;
+
+	case CMonly:
+		parseip(f->alog->iponly, cb->f[1]);
+		if(ipcmp(f->alog->iponly, IPnoaddr) == 0)
+			f->alog->iponlyset = 0;
+		else
+			f->alog->iponlyset = 1;
+		free(cb);
+		return;
+
+	default:
+		cmderror(cb, "unknown ip control message");
+	}
+
+	for(i = 1; i < cb->nf; i++){
+		for(fp = flags; fp->name; fp++)
+			if(strcmp(fp->name, cb->f[i]) == 0)
+				break;
+		if(fp->name == nil)
+			continue;
+		if(set)
+			f->alog->logmask |= fp->mask;
+		else
+			f->alog->logmask &= ~fp->mask;
+	}
+
+	free(cb);
+	poperror();
+}
+
+void
+netlog(Fs *f, int mask, char *fmt, ...)
+{
+	char buf[128], *t, *fp;
+	int i, n;
+	va_list arg;
+
+	if(!(f->alog->logmask & mask))
+		return;
+
+	if(f->alog->opens == 0)
+		return;
+
+	va_start(arg, fmt);
+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+
+	lock(f->alog);
+	i = f->alog->len + n - Nlog;
+	if(i > 0){
+		f->alog->len -= i;
+		f->alog->rptr += i;
+		if(f->alog->rptr >= f->alog->end)
+			f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end);
+	}
+	t = f->alog->rptr + f->alog->len;
+	fp = buf;
+	f->alog->len += n;
+	while(n-- > 0){
+		if(t >= f->alog->end)
+			t = f->alog->buf + (t - f->alog->end);
+		*t++ = *fp++;
+	}
+	unlock(f->alog);
+
+	wakeup(f->alog);
+}
--- /dev/null
+++ b/os/ip.original/nullmedium.c
@@ -1,0 +1,39 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+static void
+nullbind(Ipifc*, int, char**)
+{
+	error("cannot bind null device");
+}
+
+static void
+nullunbind(Ipifc*)
+{
+}
+
+static void
+nullbwrite(Ipifc*, Block*, int, uchar*)
+{
+	error("nullbwrite");
+}
+
+Medium nullmedium =
+{
+.name=		"null",
+.bind=		nullbind,
+.unbind=	nullunbind,
+.bwrite=	nullbwrite,
+};
+
+void
+nullmediumlink(void)
+{
+	addipmedium(&nullmedium);
+}
--- /dev/null
+++ b/os/ip.original/pktmedium.c
@@ -1,0 +1,79 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+
+static void	pktbind(Ipifc*, int, char**);
+static void	pktunbind(Ipifc*);
+static void	pktbwrite(Ipifc*, Block*, int, uchar*);
+static void	pktin(Fs*, Ipifc*, Block*);
+
+Medium pktmedium =
+{
+.name=		"pkt",
+.hsize=		14,
+.mintu=		40,
+.maxtu=		4*1024,
+.maclen=	6,
+.bind=		pktbind,
+.unbind=	pktunbind,
+.bwrite=	pktbwrite,
+.pktin=		pktin,
+.unbindonclose=	1,
+};
+
+/*
+ *  called to bind an IP ifc to an ethernet device
+ *  called with ifc wlock'd
+ */
+static void
+pktbind(Ipifc*, int, char**)
+{
+}
+
+/*
+ *  called with ifc wlock'd
+ */
+static void
+pktunbind(Ipifc*)
+{
+}
+
+/*
+ *  called by ipoput with a single packet to write
+ */
+static void
+pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
+{
+	/* enqueue onto the conversation's rq */
+	bp = concatblock(bp);
+	if(ifc->conv->snoopers.ref > 0)
+		qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+	qpass(ifc->conv->rq, bp);
+}
+
+/*
+ *  called with ifc rlocked when someone write's to 'data'
+ */
+static void
+pktin(Fs *f, Ipifc *ifc, Block *bp)
+{
+	if(ifc->lifc == nil)
+		freeb(bp);
+	else {
+		if(ifc->conv->snoopers.ref > 0)
+			qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+		ipiput4(f, ifc, bp);
+	}
+}
+
+void
+pktmediumlink(void)
+{
+	addipmedium(&pktmedium);
+}
--- /dev/null
+++ b/os/ip.original/plan9.c
@@ -1,0 +1,36 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"ip.h"
+
+/*
+ *  some hacks for commonality twixt inferno and plan9
+ */
+
+char*
+commonuser(void)
+{
+	return up->env->user;
+}
+
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+	return fdtochan(up->env->fgrp, fd, mode, a, b);
+}
+
+char*
+commonerror(void)
+{
+	return up->env->errstr;
+}
+
+int
+postnote(Proc *p, int, char *, int)
+{
+	swiproc(p, 0);
+	return 0;
+}
--- /dev/null
+++ b/os/ip.original/ppp.c
@@ -1,0 +1,1656 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	<libcrypt.h>
+#include	<kernel.h>
+#include	"ip.h"
+#include	"ppp.h"
+
+int	nocompress;
+Ipaddr	pppdns[2];
+
+/*
+ * Calculate FCS - rfc 1331
+ */
+ushort fcstab[256] =
+{
+      0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+      0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+      0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+      0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+      0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+      0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+      0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+      0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+      0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+      0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+      0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+      0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+      0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+      0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+      0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+      0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+      0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+      0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+      0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+      0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+      0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+      0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+      0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+      0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+      0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+      0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+      0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+      0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+      0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+      0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+      0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+      0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+static char *snames[] =
+{
+	"Sclosed",
+	"Sclosing",
+	"Sreqsent",
+	"Sackrcvd",
+	"Sacksent",
+	"Sopened",
+};
+
+static void	init(PPP*);
+static void	setphase(PPP*, int);
+static void	pinit(PPP*, Pstate*);
+static void	ppptimer(void*);
+static void	ptimer(PPP*, Pstate*);
+static int	getframe(PPP*, Block**);
+static Block*	putframe(PPP*, int, Block*);
+static uchar*	escapebyte(PPP*, ulong, uchar*, ushort*);
+static void	config(PPP*, Pstate*, int);
+static int	getopts(PPP*, Pstate*, Block*);
+static void	rejopts(PPP*, Pstate*, Block*, int);
+static void	newstate(PPP*, Pstate*, int);
+static void	rcv(PPP*, Pstate*, Block*);
+static void	getchap(PPP*, Block*);
+static void	getpap(PPP*, Block*);
+static void	sendpap(PPP*);
+static void	getlqm(PPP*, Block*);
+static void	putlqm(PPP*);
+static void	hangup(PPP*);
+static void	remove(PPP*);
+
+static	int		validv4(Ipaddr);
+static	void		invalidate(Ipaddr);
+static	void		ipconnect(PPP *);
+static	void		setdefroute(PPP *, Ipaddr);
+static	void		printopts(PPP *, Pstate*, Block*, int);
+static	void		sendtermreq(PPP*, Pstate*);
+
+static void
+errlog(PPP *ppp, char *err)
+{
+	int n;
+	char msg[64];
+
+	n = snprint(msg, sizeof(msg), "%s\n", err);
+	qproduce(ppp->ifc->conv->eq, msg, n);
+}
+
+static void
+init(PPP* ppp)
+{
+	if(ppp->inbuf == nil){
+		ppp->inbuf = allocb(4096);
+		ppp->outbuf = allocb(4096);
+
+		ppp->lcp = malloc(sizeof(Pstate));
+		ppp->ipcp = malloc(sizeof(Pstate));
+		if(ppp->lcp == nil || ppp->ipcp == nil)
+			error("ppp init: malloc");
+
+		ppp->lcp->proto = Plcp;
+		ppp->lcp->state = Sclosed;
+		ppp->ipcp->proto = Pipcp;
+		ppp->ipcp->state = Sclosed;
+
+		kproc("ppptimer", ppptimer, ppp, KPDUPPG|KPDUPFDG);
+	}
+
+	pinit(ppp, ppp->lcp);
+	setphase(ppp, Plink);
+}
+
+static void
+setphase(PPP *ppp, int phase)
+{
+	int oldphase;
+
+	oldphase = ppp->phase;
+
+	ppp->phase = phase;
+	switch(phase){
+	default:
+		panic("ppp: unknown phase %d", phase);
+	case Pdead:
+		/* restart or exit? */
+		pinit(ppp, ppp->lcp);
+		setphase(ppp, Plink);
+		break;
+	case Plink:
+		/* link down */
+		switch(oldphase) {
+		case Pnet:
+			newstate(ppp, ppp->ipcp, Sclosed);
+		}
+		break;
+	case Pauth:
+		if(ppp->usepap)
+			sendpap(ppp);
+		else if(!ppp->usechap)
+			setphase(ppp, Pnet);
+		break;
+	case Pnet:
+		pinit(ppp, ppp->ipcp);
+		break;
+	case Pterm:
+		/* what? */
+		break;
+	}
+}
+
+static void
+pinit(PPP *ppp, Pstate *p)
+{
+	p->timeout = 0;
+
+	switch(p->proto){
+	case Plcp:
+		ppp->magic = TK2MS(MACHP(0)->ticks);
+		ppp->xctlmap = 0xffffffff;
+		ppp->period = 0;
+		p->optmask = 0xffffffff;
+		ppp->rctlmap = 0;
+		ppp->ipcp->state = Sclosed;
+		ppp->ipcp->optmask = 0xffffffff;
+
+		/* quality goo */
+		ppp->timeout = 0;
+		memset(&ppp->in, 0, sizeof(ppp->in));
+		memset(&ppp->out, 0, sizeof(ppp->out));
+		memset(&ppp->pin, 0, sizeof(ppp->pin));
+		memset(&ppp->pout, 0, sizeof(ppp->pout));
+		memset(&ppp->sin, 0, sizeof(ppp->sin));
+		break;
+	case Pipcp:
+		if(ppp->localfrozen == 0)
+			invalidate(ppp->local);
+		if(ppp->remotefrozen == 0)
+			invalidate(ppp->remote);
+		p->optmask = 0xffffffff;
+		ppp->ctcp = compress_init(ppp->ctcp);
+		ppp->usedns = 3;
+		invalidate(ppp->dns1);
+		invalidate(ppp->dns2);
+		break;
+	}
+	p->confid = p->rcvdconfid = -1;
+	config(ppp, p, 1);
+	newstate(ppp, p, Sreqsent);
+}
+
+/*
+ *  change protocol to a new state.
+ */
+static void
+newstate(PPP *ppp, Pstate *p, int state)
+{
+	netlog(ppp->f, Logppp, "%ux %ux %s->%s ctlmap %lux/%lux flags %ux mtu %d mru %d\n", ppp, p->proto,
+		snames[p->state], snames[state], ppp->rctlmap, ppp->xctlmap, p->flags,
+		ppp->mtu, ppp->mru);
+
+	if(p->proto == Plcp) {
+		if(state == Sopened)
+			setphase(ppp, Pauth);
+		else if(state == Sclosed)
+			setphase(ppp, Pdead);
+		else if(p->state == Sopened)
+			setphase(ppp, Plink);
+	}
+
+	if(p->proto == Pipcp && state == Sopened && validv4(ppp->local) && validv4(ppp->remote)){
+		netlog(ppp->f, Logppp, "pppnewstate: local %I remote %I\n", ppp->local, ppp->remote);
+		ipmove(pppdns[0], ppp->dns1);
+		ipmove(pppdns[1], ppp->dns2);
+		ipconnect(ppp);
+		/* if this is the only network, set up a default route */
+//		if(ppp->ifc->link==nil)		/* how??? */
+			setdefroute(ppp, ppp->remote);
+		errlog(ppp, Enoerror);
+	}
+
+	p->state = state;
+}
+
+static void
+remove(PPP *ppp)
+{
+	free(ppp->ipcp);
+	ppp->ipcp = 0;
+	free(ppp->ctcp);
+	ppp->ctcp = 0;
+	free(ppp->lcp);
+	ppp->lcp = 0;
+	if (ppp->inbuf) {
+		freeb(ppp->inbuf);
+		ppp->inbuf = nil;
+	}
+	if (ppp->outbuf) {
+		freeb(ppp->outbuf);
+		ppp->outbuf = nil;
+	}
+	free(ppp);
+}
+
+void
+pppclose(PPP *ppp)
+{
+	hangup(ppp);
+	remove(ppp);
+}
+
+static void
+dumpblock(Block *b)
+{
+	char x[256];
+	int i;
+
+	for(i = 0; i < (sizeof(x)-1)/3 && b->rp+i < b->wp; i++)
+		sprint(&x[3*i], "%2.2ux ", b->rp[i]);
+	print("%s\n", x);
+}
+
+/* returns (protocol, information) */
+static int
+getframe(PPP *ppp, Block **info)
+{
+	uchar *p, *from, *to;
+	int n, len, proto;
+	ulong c;
+	ushort fcs;
+	Block *buf, *b;
+
+	buf = ppp->inbuf;
+	for(;;){
+		/* read till we hit a frame byte or run out of room */
+		for(p = buf->rp; buf->wp < buf->lim;){
+			for(; p < buf->wp; p++)
+				if(*p == HDLC_frame)
+					goto break2;
+
+			len = buf->lim - buf->wp;
+			n = 0;
+			if(ppp->dchan != nil)
+				n = kchanio(ppp->dchan, buf->wp, len, OREAD);
+				netlog(ppp->f, Logppp, "ppp kchanio %d bytes\n", n);
+			if(n <= 0){
+				buf->wp = buf->rp;
+//				if(n < 0)
+//					print("ppp kchanio(%s) returned %d: %r",
+//						ppp->dchan->path->elem, n);
+				*info = nil;
+				return 0;
+			}
+			buf->wp += n;
+		}
+break2:
+
+		/* copy into block, undoing escapes, and caculating fcs */
+		fcs = PPP_initfcs;
+		b = allocb(p - buf->rp);
+		to = b->wp;
+		for(from = buf->rp; from != p;){
+			c = *from++;
+			if(c == HDLC_esc){
+				if(from == p)
+					break;
+				c = *from++ ^ 0x20;
+			} else if((c < 0x20) && (ppp->rctlmap & (1 << c)))
+				continue;
+			*to++ = c;
+			fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
+		}
+
+		/* copy down what's left in buffer */
+		p++;
+		memmove(buf->rp, p, buf->wp - p);
+		n = p - buf->rp;
+		buf->wp -= n;
+		b->wp = to - 2;
+
+		/* return to caller if checksum matches */
+		if(fcs == PPP_goodfcs){
+			if(b->rp[0] == PPP_addr && b->rp[1] == PPP_ctl)
+				b->rp += 2;
+			proto = *b->rp++;
+			if((proto & 0x1) == 0)
+				proto = (proto<<8) | *b->rp++;
+			if(b->rp < b->wp){
+				ppp->in.bytes += n;
+				ppp->in.packets++;
+				*info = b;
+				return proto;
+			}
+		} else if(BLEN(b) > 0){
+			ppp->ifc->inerr++;
+			ppp->in.discards++;
+			netlog(ppp->f, Logppp, "len %d/%d cksum %ux (%ux %ux %ux %ux)\n",
+				BLEN(b), BLEN(buf), fcs, b->rp[0],
+				b->rp[1], b->rp[2], b->rp[3]);
+		}
+
+		freeblist(b);
+	}
+	*info = nil;
+	return 0;
+}
+
+/* send a PPP frame */
+static Block *
+putframe(PPP *ppp, int proto, Block *b)
+{
+	Block *buf;
+	uchar *to, *from;
+	ushort fcs;
+	ulong ctlmap;
+	int c;
+	Block *bp;
+
+	if(ppp->dchan == nil){
+		netlog(ppp->f, Logppp, "putframe: dchan down\n");
+		errlog(ppp, Ehungup);
+		return b;
+	}
+	netlog(ppp->f, Logppp, "putframe %ux %d %d (%d bytes)\n", proto, b->rp[0], b->rp[1], BLEN(b));
+
+	ppp->out.packets++;
+
+	if(proto == Plcp)
+		ctlmap = 0xffffffff;
+	else
+		ctlmap = ppp->xctlmap;
+
+	/* make sure we have head room */
+	if(b->rp - b->base < 4){
+		b = padblock(b, 4);
+		b->rp += 4;
+	}
+
+	/* add in the protocol and address, we'd better have left room */
+	from = b->rp;
+	*--from = proto;
+	if(!(ppp->lcp->flags&Fpc) || proto > 0x100 || proto == Plcp)
+		*--from = proto>>8;
+	if(!(ppp->lcp->flags&Fac) || proto == Plcp){
+		*--from = PPP_ctl;
+		*--from = PPP_addr;
+	}
+
+	qlock(&ppp->outlock);
+	buf = ppp->outbuf;
+
+	/* escape and checksum the body */
+	fcs = PPP_initfcs;
+	to = buf->rp;
+
+	*to++ = HDLC_frame;
+
+	for(bp = b; bp; bp = bp->next){
+		if(bp != b)
+			from = bp->rp;
+		for(; from < bp->wp; from++){
+			c = *from;
+			if(c == HDLC_frame || c == HDLC_esc
+			   || (c < 0x20 && ((1<<c) & ctlmap))){
+				*to++ = HDLC_esc;
+				*to++ = c ^ 0x20;
+			} else 
+				*to++ = c;
+			fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
+		}
+	}
+
+	/* add on and escape the checksum */
+	fcs = ~fcs;
+	c = fcs;
+	if(c == HDLC_frame || c == HDLC_esc
+	   || (c < 0x20 && ((1<<c) & ctlmap))){
+		*to++ = HDLC_esc;
+		*to++ = c ^ 0x20;
+	} else 
+		*to++ = c;
+	c = fcs>>8;
+	if(c == HDLC_frame || c == HDLC_esc
+	   || (c < 0x20 && ((1<<c) & ctlmap))){
+		*to++ = HDLC_esc;
+		*to++ = c ^ 0x20;
+	} else 
+		*to++ = c;
+
+	/* add frame marker and send */
+	*to++ = HDLC_frame;
+	buf->wp = to;
+	if(ppp->dchan == nil){
+		netlog(ppp->f, Logppp, "putframe: dchan down\n");
+		errlog(ppp, Ehungup);
+	}else{
+		kchanio(ppp->dchan, buf->rp, BLEN(buf), OWRITE);
+		ppp->out.bytes += BLEN(buf);
+	}
+
+	qunlock(&ppp->outlock);
+	return b;
+}
+
+#define IPB2LCP(b) ((Lcpmsg*)((b)->wp-4))
+
+static Block*
+alloclcp(int code, int id, int len)
+{
+	Block *b;
+	Lcpmsg *m;
+
+	/*
+	 *  leave room for header
+	 */
+	b = allocb(len);
+
+	m = (Lcpmsg*)b->wp;
+	m->code = code;
+	m->id = id;
+	b->wp += 4;
+
+	return b;
+}
+
+static void
+putao(Block *b, int type, int aproto, int alg)
+{
+	*b->wp++ = type;
+	*b->wp++ = 5;
+	hnputs(b->wp, aproto);
+	b->wp += 2;
+	*b->wp++ = alg;
+}
+
+static void
+putlo(Block *b, int type, ulong val)
+{
+	*b->wp++ = type;
+	*b->wp++ = 6;
+	hnputl(b->wp, val);
+	b->wp += 4;
+}
+
+static void
+putv4o(Block *b, int type, Ipaddr val)
+{
+	*b->wp++ = type;
+	*b->wp++ = 6;
+	if(v6tov4(b->wp, val) < 0){
+		/*panic("putv4o")*/;
+	}
+	b->wp += 4;
+}
+
+static void
+putso(Block *b, int type, ulong val)
+{
+	*b->wp++ = type;
+	*b->wp++ = 4;
+	hnputs(b->wp, val);
+	b->wp += 2;
+}
+
+static void
+puto(Block *b, int type)
+{
+	*b->wp++ = type;
+	*b->wp++ = 2;
+}
+
+/*
+ *  send configuration request
+ */
+static void
+config(PPP *ppp, Pstate *p, int newid)
+{
+	Block *b;
+	Lcpmsg *m;
+	int id;
+
+	if(newid){
+		id = ++(p->id);
+		p->confid = id;
+		p->timeout = Timeout;
+	} else
+		id = p->confid;
+	b = alloclcp(Lconfreq, id, 256);
+	m = IPB2LCP(b);
+	USED(m);
+
+	switch(p->proto){
+	case Plcp:
+		if(p->optmask & Fmagic)
+			putlo(b, Omagic, ppp->magic);
+		if(p->optmask & Fmtu)
+			putso(b, Omtu, ppp->mru);
+		if(p->optmask & Fac)
+			puto(b, Oac);
+		if(p->optmask & Fpc)
+			puto(b, Opc);
+		if(p->optmask & Fctlmap)
+			putlo(b, Octlmap, 0);	/* we don't want anything escaped */
+		break;
+	case Pipcp:
+		if((p->optmask & Fipaddr) /*&& validv4(ppp->local)*/)
+			putv4o(b, Oipaddr, ppp->local);
+		if(!nocompress && (p->optmask & Fipcompress)){
+			*b->wp++ = Oipcompress;
+			*b->wp++ = 6;
+			hnputs(b->wp, Pvjctcp);
+			b->wp += 2;
+			*b->wp++ = MAX_STATES-1;
+			*b->wp++ = 1;
+		}
+		if(ppp->usedns & 1)
+			putlo(b, Oipdns, 0);
+		if(ppp->usedns & 2)
+			putlo(b, Oipdns2, 0);
+		break;
+	}
+
+	hnputs(m->len, BLEN(b));
+	b = putframe(ppp, p->proto, b);
+	freeblist(b);
+}
+
+/*
+ *  parse configuration request, sends an ack or reject packet
+ *
+ *	returns:	-1 if request was syntacticly incorrect
+ *			 0 if packet was accepted
+ *			 1 if packet was rejected
+ */
+static int
+getopts(PPP *ppp, Pstate *p, Block *b)
+{
+	Lcpmsg *m, *repm;	
+	Lcpopt *o;
+	uchar *cp;
+	ulong rejecting, nacking, flags, proto;
+	ulong mtu, ctlmap, period;
+	ulong x;
+	Block *repb;
+	Ipaddr ipaddr;
+
+	rejecting = 0;
+	nacking = 0;
+	flags = 0;
+
+	/* defaults */
+	invalidate(ipaddr);
+	mtu = ppp->mtu;
+
+	ctlmap = 0xffffffff;
+	period = 0;
+
+	m = (Lcpmsg*)b->rp;
+	repb = alloclcp(Lconfack, m->id, BLEN(b));
+	repm = IPB2LCP(repb);
+
+	/* copy options into ack packet */
+	memmove(repm->data, m->data, b->wp - m->data);
+	repb->wp += b->wp - m->data;
+
+	/* look for options we don't recognize or like */
+	for(cp = m->data; cp < b->wp; cp += o->len){
+		o = (Lcpopt*)cp;
+		if(cp + o->len > b->wp || o->len == 0){
+			freeblist(repb);
+			netlog(ppp->f, Logppp, "ppp %s: bad option length %ux\n", ppp->ifc->dev,
+				o->type);
+			return -1;
+		}
+
+		switch(p->proto){
+		case Plcp:
+			switch(o->type){
+			case Oac:
+				flags |= Fac;
+				continue;
+			case Opc:
+				flags |= Fpc;
+				continue;
+			case Omtu:
+				mtu = nhgets(o->data);
+				if(mtu < ppp->ifc->m->mintu){
+					netlog(ppp->f, Logppp, "bogus mtu %d\n", mtu);
+					mtu = ppp->ifc->m->mintu;
+				}
+				continue;
+			case Omagic:
+				if(ppp->magic == nhgetl(o->data))
+					netlog(ppp->f, Logppp, "ppp: possible loop\n");
+				continue;
+			case Octlmap:
+				ctlmap = nhgetl(o->data);
+				continue;
+			case Oquality:
+				proto = nhgets(o->data);
+				if(proto != Plqm)
+					break;
+				x = nhgetl(o->data+2)*10;
+				period = (x+Period-1)/Period;
+				continue;
+			case Oauth:
+				proto = nhgets(o->data);
+				if(proto == Ppap && ppp->chapname[0] && ppp->secret[0]){
+					ppp->usepap = 1;
+					netlog(ppp->f, Logppp, "PPP %s: select PAP\n", ppp->ifc->dev);
+					continue;
+				}
+				if(proto != Pchap || o->data[2] != APmd5){
+					if(!nacking){
+						nacking = 1;
+						repb->wp = repm->data;
+						repm->code = Lconfnak;
+					}
+					putao(repb, Oauth, Pchap, APmd5);
+				}
+				else
+					ppp->usechap = 1;
+				ppp->usepap = 0;
+				continue;
+			}
+			break;
+		case Pipcp:
+			switch(o->type){
+			case Oipaddr:	
+				v4tov6(ipaddr, o->data);
+				if(!validv4(ppp->remote))
+					continue;
+				if(!validv4(ipaddr) && !rejecting){
+					/* other side requesting an address */
+					if(!nacking){
+						nacking = 1;
+						repb->wp = repm->data;
+						repm->code = Lconfnak;
+					}
+					putv4o(repb, Oipaddr, ppp->remote);
+				}
+				continue;
+			case Oipcompress:
+				proto = nhgets(o->data);
+				if(nocompress || proto != Pvjctcp || compress_negotiate(ppp->ctcp, o->data+2) < 0)
+					break;
+				flags |= Fipcompress;
+				continue;
+			}
+			break;
+		}
+
+		/* come here if option is not recognized */
+		if(!rejecting){
+			rejecting = 1;
+			repb->wp = repm->data;
+			repm->code = Lconfrej;
+		}
+		netlog(ppp->f, Logppp, "ppp %s: bad %ux option %d\n", ppp->ifc->dev, p->proto, o->type);
+		memmove(repb->wp, o, o->len);
+		repb->wp += o->len;
+	}
+
+	/* permanent changes only after we know that we liked the packet */
+	if(!rejecting && !nacking){
+		switch(p->proto){
+		case Plcp:
+			netlog(ppp->f, Logppp, "Plcp: mtu: %d %d x:%lux/r:%lux %lux\n", mtu, ppp->mtu, ppp->xctlmap, ppp->rctlmap, ctlmap);
+			ppp->period = period;
+			ppp->xctlmap = ctlmap;
+			if(mtu > Maxmtu)
+				mtu = Maxmtu;
+			if(mtu < Minmtu)
+				mtu = Minmtu;
+			ppp->mtu = mtu;
+			break;
+		case Pipcp:
+			if(validv4(ipaddr) && ppp->remotefrozen == 0)
+ 				ipmove(ppp->remote, ipaddr);
+			break;
+		}
+		p->flags = flags;
+	}
+
+	hnputs(repm->len, BLEN(repb));
+	repb = putframe(ppp, p->proto, repb);
+	freeblist(repb);
+
+	return rejecting || nacking;
+}
+
+/*
+ *  parse configuration rejection, just stop sending anything that they
+ *  don't like (except for ipcp address nak).
+ */
+static void
+rejopts(PPP *ppp, Pstate *p, Block *b, int code)
+{
+	Lcpmsg *m;
+	Lcpopt *o;
+
+	/* just give up trying what the other side doesn't like */
+	m = (Lcpmsg*)b->rp;
+	for(b->rp = m->data; b->rp < b->wp; b->rp += o->len){
+		o = (Lcpopt*)b->rp;
+		if(b->rp + o->len > b->wp || o->len == 0){
+			netlog(ppp->f, Logppp, "ppp %s: bad roption length %ux\n", ppp->ifc->dev,
+				o->type);
+			return;
+		}
+
+		if(code == Lconfrej){
+			if(o->type < 8*sizeof(p->optmask))
+				p->optmask &= ~(1<<o->type);
+			if(o->type == Oipdns)
+				ppp->usedns &= ~1;
+			else if(o->type == Oipdns2)
+				ppp->usedns &= ~2;
+			netlog(ppp->f, Logppp, "ppp %s: %ux rejecting %d\n", ppp->ifc->dev, p->proto,
+				o->type);
+			continue;
+		}
+
+		switch(p->proto){
+		case Plcp:
+			switch(o->type){
+			case Octlmap:
+				ppp->rctlmap = nhgetl(o->data);
+				break;
+			default:
+				if(o->type < 8*sizeof(p->optmask))
+					p->optmask &= ~(1<<o->type);
+				break;
+			};
+		case Pipcp:
+			switch(o->type){
+			case Oipaddr:
+				if(!validv4(ppp->local))
+					v4tov6(ppp->local, o->data);
+//				if(o->type < 8*sizeof(p->optmask))
+//					p->optmask &= ~(1<<o->type);
+				break;
+			case Oipdns:
+				if(!validv4(ppp->dns1))
+					v4tov6(ppp->dns1, o->data);
+				ppp->usedns &= ~1;
+				break;
+			case Oipdns2:
+				if(!validv4(ppp->dns2))
+					v4tov6(ppp->dns2, o->data);
+				ppp->usedns &= ~2;
+				break;
+			default:
+				if(o->type < 8*sizeof(p->optmask))
+					p->optmask &= ~(1<<o->type);
+				break;
+			}
+			break;
+		}
+	}
+}
+
+
+/*
+ *  put a messages through the lcp or ipcp state machine.  They are
+ *  very similar.
+ */
+static void
+rcv(PPP *ppp, Pstate *p, Block *b)
+{
+	ulong len;
+	int err;
+	Lcpmsg *m;
+
+	if(BLEN(b) < 4){
+		netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
+		freeblist(b);
+		return;
+	}
+	m = (Lcpmsg*)b->rp;
+	len = nhgets(m->len);
+	if(BLEN(b) < len){
+		netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
+		freeblist(b);
+		return;
+	}
+
+	netlog(ppp->f, Logppp, "ppp: %ux rcv %d len %d id %d/%d/%d\n",
+		p->proto, m->code, len, m->id, p->confid, p->id);
+
+	if(p->proto != Plcp && ppp->lcp->state != Sopened){
+		netlog(ppp->f, Logppp, "ppp: non-lcp with lcp not open\n");
+		freeb(b);
+		return;
+	}
+
+	qlock(ppp);
+	switch(m->code){
+	case Lconfreq:
+		/* flush the output queue */
+		if(p->state == Sopened && p->proto == Plcp)
+			kchanio(ppp->cchan, "f", 1, OWRITE);
+
+		printopts(ppp, p, b, 0);
+		err = getopts(ppp, p, b);
+		if(err < 0)
+			break;
+
+		if(m->id == p->rcvdconfid)
+			break;			/* don't change state for duplicates */
+		p->rcvdconfid = m->id;
+
+		switch(p->state){
+		case Sackrcvd:
+			if(err)
+				break;
+			newstate(ppp, p, Sopened);
+			break;
+		case Sclosed:
+		case Sopened:
+			config(ppp, p, 1);
+			if(err == 0)
+				newstate(ppp, p, Sacksent);
+			else
+				newstate(ppp, p, Sreqsent);
+			break;
+			break;
+		case Sreqsent:
+		case Sacksent:
+			if(err == 0)
+				newstate(ppp, p, Sacksent);
+			else
+				newstate(ppp, p, Sreqsent);
+			break;
+		}
+		break;
+	case Lconfack:
+		if(p->confid != m->id){
+			/* ignore if it isn't the message we're sending */
+			netlog(ppp->f, Logppp, "ppp: dropping confack\n");
+			break;
+		}
+		p->confid = -1;		/* ignore duplicates */
+		p->id++;		/* avoid sending duplicates */
+
+		switch(p->state){
+		case Sopened:
+		case Sackrcvd:
+			config(ppp, p, 1);
+			newstate(ppp, p, Sreqsent);
+			break;
+		case Sreqsent:
+			newstate(ppp, p, Sackrcvd);
+			break;
+		case Sacksent:
+			newstate(ppp, p, Sopened);
+			break;
+		}
+		break;
+	case Lconfrej:
+	case Lconfnak:
+		if(p->confid != m->id) {
+			/* ignore if it isn't the message we're sending */
+			netlog(ppp->f, Logppp, "ppp: dropping confrej or confnak\n");
+			break;
+		}
+		p->confid = -1;		/* ignore duplicates */
+		p->id++;		/* avoid sending duplicates */
+
+		switch(p->state){
+		case Sopened:
+		case Sackrcvd:
+			config(ppp, p, 1);
+			newstate(ppp, p, Sreqsent);
+			break;
+		case Sreqsent:
+		case Sacksent:
+			printopts(ppp, p, b, 0);
+			rejopts(ppp, p, b, m->code);
+			config(ppp, p, 1);
+			break;
+		}
+		break;
+	case Ltermreq:
+		m->code = Ltermack;
+		b = putframe(ppp, p->proto, b);
+
+		switch(p->state){
+		case Sackrcvd:
+		case Sacksent:
+			newstate(ppp, p, Sreqsent);
+			break;
+		case Sopened:
+			newstate(ppp, p, Sclosing);
+			break;
+		}
+		break;
+	case Ltermack:
+		if(p->termid != m->id)	/* ignore if it isn't the message we're sending */
+			break;
+
+		if(p->proto == Plcp)
+			ppp->ipcp->state = Sclosed;
+		switch(p->state){
+		case Sclosing:
+			newstate(ppp, p, Sclosed);
+			break;
+		case Sackrcvd:
+			newstate(ppp, p, Sreqsent);
+			break;
+		case Sopened:
+			config(ppp, p, 0);
+			newstate(ppp, p, Sreqsent);
+			break;
+		}
+		break;
+	case Lcoderej:
+		netlog(ppp->f, Logppp, "ppp %s: code reject %d\n", ppp->ifc->dev, m->data[0]);
+		break;
+	case Lprotorej:
+		netlog(ppp->f, Logppp, "ppp %s: proto reject %lux\n", ppp->ifc->dev, nhgets(m->data));
+		break;
+	case Lechoreq:
+		m->code = Lechoack;
+		b = putframe(ppp, p->proto, b);
+		break;
+	case Lechoack:
+	case Ldiscard:
+		/* nothing to do */
+		break;
+	}
+
+	qunlock(ppp);
+	freeblist(b);
+}
+
+/*
+ *  timer for protocol state machine
+ */
+static void
+ptimer(PPP *ppp, Pstate *p)
+{
+	if(p->state == Sopened || p->state == Sclosed)
+		return;
+
+	p->timeout--;
+	switch(p->state){
+	case Sclosing:
+		sendtermreq(ppp, p);
+		break;
+	case Sreqsent:
+	case Sacksent:
+		if(p->timeout <= 0){
+			if(p->proto && ppp->cchan != nil)
+				kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
+			newstate(ppp, p, Sclosed);
+		} else {
+			config(ppp, p, 0);
+		}
+		break;
+	case Sackrcvd:
+		if(p->timeout <= 0){
+			if(p->proto && ppp->cchan != nil)
+				kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
+			newstate(ppp, p, Sclosed);
+		}
+		else {
+			config(ppp, p, 0);
+			newstate(ppp, p, Sreqsent);
+		}
+		break;
+	}
+}
+
+/*
+ *  timer for ppp
+ */
+static void
+ppptimer(void *arg)
+{
+	PPP *ppp;
+
+	ppp = arg;
+	ppp->timep = up;
+	if(waserror()){
+		netlog(ppp->f, Logppp, "ppptimer: %I: %s\n", ppp->local, up->env->errstr);
+		ppp->timep = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		tsleep(&up->sleep, return0, nil, Period);
+		if(ppp->pppup){
+			qlock(ppp);
+
+			ptimer(ppp, ppp->lcp);
+			if(ppp->lcp->state == Sopened)
+				ptimer(ppp, ppp->ipcp);
+
+			if(ppp->period && --(ppp->timeout) <= 0){
+				ppp->timeout = ppp->period;
+				putlqm(ppp);
+			}
+
+			qunlock(ppp);
+		}
+	}
+}
+
+static void
+setdefroute(PPP *ppp, Ipaddr gate)
+{
+	int fd, n;
+	char path[128], msg[128];
+
+	snprint(path, sizeof path, "#I%d/iproute", ppp->f->dev);
+	fd = kopen(path, ORDWR);
+	if(fd < 0)
+		return;
+	n = snprint(msg, sizeof(msg), "add 0 0 %I", gate);
+	kwrite(fd, msg, n);
+	kclose(fd);
+}
+
+static void
+ipconnect(PPP *ppp)
+{
+	int fd, n;
+	char path[128], msg[128];
+
+	snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ppp->ifc->conv->x);
+	fd = kopen(path, ORDWR);
+	if(fd < 0)
+		return;
+	n = snprint(msg, sizeof(msg), "connect %I 255.255.255.255 %I", ppp->local, ppp->remote);
+	if (kwrite(fd, msg, n) != n)
+		print("ppp ipconnect: %s: %r\n", msg);
+	kclose(fd);
+}
+
+PPP*
+pppopen(PPP *ppp, char *dev,
+	Ipaddr ipaddr, Ipaddr remip,
+	int mtu, int framing,
+	char *chapname, char *secret)
+{
+	int fd, cfd;
+	char ctl[Maxpath];
+
+	invalidate(ppp->remote);
+	invalidate(ppp->local);
+	invalidate(ppp->dns1);
+	invalidate(ppp->dns2);
+	ppp->mtu = Defmtu;
+	ppp->mru = mtu;
+	ppp->framing = framing;
+
+	if(remip != nil && validv4(remip)){
+		ipmove(ppp->remote, remip);
+		ppp->remotefrozen = 1;
+	}
+	if(ipaddr != nil && validv4(ipaddr)){
+		ipmove(ppp->local, ipaddr);
+		ppp->localfrozen = 1;
+	}
+
+	/* authentication goo */
+	ppp->secret[0] = 0;
+	if(secret != nil)
+		strncpy(ppp->secret, secret, sizeof(ppp->secret));
+	ppp->chapname[0] = 0;
+	if(chapname != nil)
+		strncpy(ppp->chapname, chapname, sizeof(ppp->chapname));
+
+	if(strchr(dev, '!'))
+		fd = kdial(dev, nil, nil, nil);
+	else
+		fd = kopen(dev, ORDWR);
+	if(fd < 0){
+		netlog(ppp->f, Logppp, "ppp: can't open %s\n", dev);
+		return nil;
+	}
+	ppp->dchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
+	kclose(fd);
+
+	/* set up serial line */
+/* XXX this stuff belongs in application, not driver */
+	sprint(ctl, "%sctl", dev);
+	cfd = kopen(ctl, ORDWR);
+	if(cfd >= 0){
+		ppp->cchan = fdtochan(up->env->fgrp, cfd, ORDWR, 0, 1);
+		kclose(cfd);
+		kchanio(ppp->cchan, "m1", 2, OWRITE);	/* cts/rts flow control/fifo's) on */
+		kchanio(ppp->cchan, "q64000", 6, OWRITE);/* increas q size to 64k */
+		kchanio(ppp->cchan, "n1", 2, OWRITE);	/* nonblocking writes on */
+		kchanio(ppp->cchan, "r1", 2, OWRITE);	/* rts on */
+		kchanio(ppp->cchan, "d1", 2, OWRITE);	/* dtr on */
+	}
+
+	ppp->pppup = 1;
+	init(ppp);
+	return ppp;
+}
+
+static void
+hangup(PPP *ppp)
+{
+	qlock(ppp);
+	if(waserror()){
+		qunlock(ppp);
+		nexterror();
+	}
+	netlog(ppp->f, Logppp, "PPP Hangup\n");
+	errlog(ppp, Ehungup);
+	if(ppp->pppup && ppp->cchan != nil){
+		kchanio(ppp->cchan, "f", 1, OWRITE);	/* flush */
+		kchanio(ppp->cchan, "h", 1, OWRITE);	/* hangup */
+	}
+	cclose(ppp->dchan);
+	cclose(ppp->cchan);
+	ppp->dchan = nil;
+	ppp->cchan = nil;
+	ppp->pppup = 0;
+	qunlock(ppp);
+	poperror();
+}
+
+/* return next input IP packet */
+Block*
+pppread(PPP *ppp)
+{
+	Block *b;
+	int proto;
+	Lcpmsg *m;
+
+	for(;;){
+		proto = getframe(ppp, &b);
+		if(b == nil)
+			return nil;
+		netlog(ppp->f, Logppp, "ppp: read proto %d len %d\n", proto, blocklen(b));
+		switch(proto){
+		case Plcp:
+			rcv(ppp, ppp->lcp, b);
+			break;
+		case Pipcp:
+			rcv(ppp, ppp->ipcp, b);
+			break;
+		case Pip:
+			if(ppp->ipcp->state == Sopened)
+				return b;
+			freeblist(b);
+			break;
+		case Plqm:
+			getlqm(ppp, b);
+			break;
+		case Pchap:
+			getchap(ppp, b);
+			break;
+		case Ppap:
+			getpap(ppp, b);
+			break;
+		case Pvjctcp:
+		case Pvjutcp:
+			if(ppp->ipcp->state == Sopened){
+				b = tcpuncompress(ppp->ctcp, b, proto, ppp->f);
+				if(b != nil)
+					return b;
+			}
+			freeblist(b);
+			break;
+		default:
+			netlog(ppp->f, Logppp, "unknown proto %ux\n", proto);
+			if(ppp->lcp->state == Sopened){
+				/* reject the protocol */
+				b->rp -= 6;
+				m = (Lcpmsg*)b->rp;
+				m->code = Lprotorej;
+				m->id = ++ppp->lcp->id;
+				hnputs(m->data, proto);
+				hnputs(m->len, BLEN(b));
+				b = putframe(ppp, Plcp, b);
+			}
+			freeblist(b);
+			break;
+		}
+	}
+	return nil;		/* compiler confused */
+}
+
+/* transmit an IP packet */
+int
+pppwrite(PPP *ppp, Block *b)
+{
+	ushort proto;
+	int r;
+
+	qlock(ppp);
+
+	/* can't send ip packets till we're established */
+	if(ppp->ipcp->state != Sopened)
+		goto ret;
+
+	/* link hung up */
+	if(ppp->dchan == nil)
+		goto ret;
+
+	b = concatblock(b);		/* or else compression will barf */
+
+	proto = Pip;
+	if(ppp->ipcp->flags & Fipcompress)
+		proto = compress(ppp->ctcp, b, ppp->f);
+	b = putframe(ppp, proto, b);
+
+
+ret:
+	qunlock(ppp);
+
+	r = blocklen(b);
+	netlog(ppp->f, Logppp, "ppp wrt len %d\n", r);
+
+	freeblist(b);
+	return r;
+}
+
+/*
+ *  link quality management
+ */
+static void
+getlqm(PPP *ppp, Block *b)
+{
+	Qualpkt *p;
+
+	p = (Qualpkt*)b->rp;
+	if(BLEN(b) == sizeof(Qualpkt)){
+		ppp->in.reports++;
+		ppp->pout.reports = nhgetl(p->peeroutreports);
+		ppp->pout.packets = nhgetl(p->peeroutpackets);
+		ppp->pout.bytes = nhgetl(p->peeroutbytes);
+		ppp->pin.reports = nhgetl(p->peerinreports);
+		ppp->pin.packets = nhgetl(p->peerinpackets);
+		ppp->pin.discards = nhgetl(p->peerindiscards);
+		ppp->pin.errors = nhgetl(p->peerinerrors);
+		ppp->pin.bytes = nhgetl(p->peerinbytes);
+
+		/* save our numbers at time of reception */
+		memmove(&ppp->sin, &ppp->in, sizeof(Qualstats));
+
+	}
+	freeblist(b);
+	if(ppp->period == 0)
+		putlqm(ppp);
+
+}
+static void
+putlqm(PPP *ppp)
+{
+	Qualpkt *p;
+	Block *b;
+
+	b = allocb(sizeof(Qualpkt));
+	b->wp += sizeof(Qualpkt);
+	p = (Qualpkt*)b->rp;
+	hnputl(p->magic, 0);
+
+	/* heresay (what he last told us) */
+	hnputl(p->lastoutreports, ppp->pout.reports);
+	hnputl(p->lastoutpackets, ppp->pout.packets);
+	hnputl(p->lastoutbytes, ppp->pout.bytes);
+
+	/* our numbers at time of last reception */
+	hnputl(p->peerinreports, ppp->sin.reports);
+	hnputl(p->peerinpackets, ppp->sin.packets);
+	hnputl(p->peerindiscards, ppp->sin.discards);
+	hnputl(p->peerinerrors, ppp->sin.errors);
+	hnputl(p->peerinbytes, ppp->sin.bytes);
+
+	/* our numbers now */
+	hnputl(p->peeroutreports, ppp->out.reports+1);
+	hnputl(p->peeroutpackets, ppp->out.packets+1);
+	hnputl(p->peeroutbytes, ppp->out.bytes+53/*hack*/);
+
+	b = putframe(ppp, Plqm, b);
+	freeblist(b);
+	ppp->out.reports++;
+}
+
+/*
+ *  challenge response dialog
+ */
+static void
+getchap(PPP *ppp, Block *b)
+{
+	Lcpmsg *m;
+	int len, vlen, n;
+	char md5buf[512];
+
+	m = (Lcpmsg*)b->rp;
+	len = nhgets(m->len);
+	if(BLEN(b) < len){
+		netlog(ppp->f, Logppp, "ppp %s: short chap message\n", ppp->ifc->dev);
+		freeblist(b);
+		return;
+	}
+
+	switch(m->code){
+	case Cchallenge:
+		vlen = m->data[0];
+		if(vlen > len - 5){
+			netlog(ppp->f, Logppp, "PPP %s: bad challenge len\n", ppp->ifc->dev);
+			freeblist(b);
+			break;
+		}
+
+		netlog(ppp->f, Logppp, "PPP %s: CHAP Challenge\n", ppp->ifc->dev);
+netlog(ppp->f, Logppp, "(secret %s chapname %s id %d)\n", ppp->secret, ppp->chapname, m->id);
+		/* create string to hash */
+		md5buf[0] = m->id;
+		strcpy(md5buf+1, ppp->secret);
+		n = strlen(ppp->secret) + 1;
+		memmove(md5buf+n, m->data+1, vlen);
+		n += vlen;
+		freeblist(b);
+
+		/* send reply */
+		len = 4 + 1 + 16 + strlen(ppp->chapname);
+		b = alloclcp(2, md5buf[0], len);
+		m = IPB2LCP(b);
+		m->data[0] = 16;
+		md5((uchar*)md5buf, n, m->data+1, 0);
+		memmove((char*)m->data+17, ppp->chapname, strlen(ppp->chapname));
+		hnputs(m->len, len);
+		b->wp += len-4;
+		b = putframe(ppp, Pchap, b);
+		break;
+	case Cresponse:
+		netlog(ppp->f, Logppp, "PPP %s: chap response?\n", ppp->ifc->dev);
+		break;
+	case Csuccess:
+		netlog(ppp->f, Logppp, "PPP %s: chap succeeded\n", ppp->ifc->dev);
+		setphase(ppp, Pnet);
+		break;
+	case Cfailure:
+		netlog(ppp->f, Logppp, "PPP %s: chap failed: %.*s\n", ppp->ifc->dev, len-4, m->data);
+		errlog(ppp, Eperm);
+		break;
+	default:
+		netlog(ppp->f, Logppp, "PPP %s: chap code %d?\n", ppp->ifc->dev, m->code);
+		break;
+	}
+	freeblist(b);
+}
+
+/*
+ *  password authentication protocol dialog
+ *	-- obsolete but all we know how to use with NT just now
+ */
+static void
+sendpap(PPP *ppp)
+{
+	Lcpmsg *m;
+	int clen, slen, len;
+	Block *b;
+	uchar *p;
+
+	clen = strlen(ppp->chapname);
+	slen = strlen(ppp->secret);
+	len = 4 + 1 + clen + 1 + slen;
+	ppp->papid = ++ppp->lcp->id;
+	b = alloclcp(Cpapreq, ppp->papid, len);
+	m = IPB2LCP(b);
+	p = m->data;
+	p[0] = clen;
+	memmove(p+1, ppp->chapname, clen);
+	p += clen + 1;
+	p[0] = slen;
+	memmove(p+1, ppp->secret, slen);
+	hnputs(m->len, len);
+	b->wp += len-4;
+	b = putframe(ppp, Ppap, b);
+	netlog(ppp->f, Logppp, "PPP %s: sent pap auth req (%d)\n", ppp->ifc->dev, len);
+	freeblist(b);
+}
+
+static void
+getpap(PPP *ppp, Block *b)
+{
+	Lcpmsg *m;
+	int len;
+
+	m = (Lcpmsg*)b->rp;
+	len = nhgets(m->len);
+	if(BLEN(b) < len){
+		netlog(ppp->f, Logppp, "ppp %s: short pap message\n", ppp->ifc->dev);
+		freeblist(b);
+		return;
+	}
+
+	switch(m->code){
+	case Cpapreq:
+		netlog(ppp->f, Logppp, "PPP %s: pap request?\n", ppp->ifc->dev);
+		break;
+	case Cpapack:
+		netlog(ppp->f, Logppp, "PPP %s: PAP succeeded\n", ppp->ifc->dev);
+		setphase(ppp, Pnet);
+		break;
+	case Cpapnak:
+		if(m->data[0])
+			netlog(ppp->f, Logppp, "PPP %s: PAP failed: %.*s\n", ppp->ifc->dev, len-5, m->data+1);
+		else
+			netlog(ppp->f, Logppp, "PPP %s: PAP failed\n", ppp->ifc->dev);
+		errlog(ppp, Eperm);
+		break;
+	default:
+		netlog(ppp->f, Logppp, "PPP %s: pap code %d?\n", ppp->ifc->dev, m->code);
+		break;
+	}
+	freeblist(b);
+}
+
+static void
+printopts(PPP *ppp, Pstate *p, Block *b, int send)
+{
+	Lcpmsg *m;	
+	Lcpopt *o;
+	int proto, x, period;
+	uchar *cp;
+	char *code, *dir;
+
+	m = (Lcpmsg*)b->rp;
+	switch(m->code) {
+	default: code = "<unknown>"; break;
+	case Lconfreq: code = "confrequest"; break;
+	case Lconfack: code = "confack"; break;
+	case Lconfnak: code = "confnak"; break;
+	case Lconfrej: code = "confreject"; break;
+	}
+
+	if(send)
+		dir = "send";
+	else
+		dir = "recv";
+
+	netlog(ppp->f, Logppp, "ppp: %s %s: id=%d\n", dir, code, m->id);
+
+	for(cp = m->data; cp < b->wp; cp += o->len){
+		o = (Lcpopt*)cp;
+		if(cp + o->len > b->wp || o->len == 0){
+			netlog(ppp->f, Logppp, "\tbad option length %ux\n", o->type);
+			return;
+		}
+
+		switch(p->proto){
+		case Plcp:
+			switch(o->type){
+			default:
+				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
+				break;
+			case Omtu:
+				netlog(ppp->f, Logppp, "\tmtu = %d\n", nhgets(o->data));
+				break;
+			case Octlmap:
+				netlog(ppp->f, Logppp, "\tctlmap = %ux\n", nhgetl(o->data));
+				break;
+			case Oauth:
+				netlog(ppp->f, Logppp, "\tauth = ", nhgetl(o->data));
+				proto = nhgets(o->data);
+				switch(proto) {
+				default:
+					netlog(ppp->f, Logppp, "unknown auth proto %d\n", proto);
+					break;
+				case Ppap:
+					netlog(ppp->f, Logppp, "password\n");
+					break;
+				case Pchap:
+					netlog(ppp->f, Logppp, "chap %ux\n", o->data[2]);
+					break;
+				}
+				break;
+			case Oquality:
+				proto = nhgets(o->data);
+				switch(proto) {
+				default:
+					netlog(ppp->f, Logppp, "\tunknown quality proto %d\n", proto);
+					break;
+				case Plqm:
+					x = nhgetl(o->data+2)*10;
+					period = (x+Period-1)/Period;
+					netlog(ppp->f, Logppp, "\tlqm period = %d\n", period);
+					break;
+				}
+			case Omagic:
+				netlog(ppp->f, Logppp, "\tmagic = %ux\n", nhgetl(o->data));
+				break;
+			case Opc:
+				netlog(ppp->f, Logppp, "\tprotocol compress\n");
+				break;
+			case Oac:
+				netlog(ppp->f, Logppp, "\taddr compress\n");
+				break;
+			}
+			break;
+		case Pccp:
+			switch(o->type){
+			default:
+				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
+				break;
+			case Ocoui:	
+				netlog(ppp->f, Logppp, "\tOUI\n");
+				break;
+			case Ocstac:
+				netlog(ppp->f, Logppp, "\tstac LZS\n");
+				break;
+			case Ocmppc:	
+				netlog(ppp->f, Logppp, "\tMicrosoft PPC len=%d %ux\n", o->len, nhgetl(o->data));
+				break;
+			}
+			break;
+		case Pecp:
+			switch(o->type){
+			default:
+				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
+				break;
+			case Oeoui:	
+				netlog(ppp->f, Logppp, "\tOUI\n");
+				break;
+			case Oedese:
+				netlog(ppp->f, Logppp, "\tDES\n");
+				break;
+			}
+			break;
+		case Pipcp:
+			switch(o->type){
+			default:
+				netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
+				break;
+			case Oipaddrs:	
+				netlog(ppp->f, Logppp, "\tip addrs - deprecated\n");
+				break;
+			case Oipcompress:
+				netlog(ppp->f, Logppp, "\tip compress\n");
+				break;
+			case Oipaddr:	
+				netlog(ppp->f, Logppp, "\tip addr %V\n", o->data);
+				break;
+			case Oipdns:
+				netlog(ppp->f, Logppp, "\tdns addr %V\n", o->data);
+				break;
+			case Oipwins:	
+				netlog(ppp->f, Logppp, "\twins addr %V\n", o->data);
+				break;
+			case Oipdns2:
+				netlog(ppp->f, Logppp, "\tdns2 addr %V\n", o->data);
+				break;
+			case Oipwins2:	
+				netlog(ppp->f, Logppp, "\twins2 addr %V\n", o->data);
+				break;
+			}
+			break;
+		}
+	}
+}
+
+static void
+sendtermreq(PPP *ppp, Pstate *p)
+{
+	Block *b;
+	Lcpmsg *m;
+
+	p->termid = ++(p->id);
+	b = alloclcp(Ltermreq, p->termid, 4);
+	m = IPB2LCP(b);
+	hnputs(m->len, 4);
+	putframe(ppp, p->proto, b);
+	freeb(b);
+	newstate(ppp, p, Sclosing);
+}
+
+static void
+sendechoreq(PPP *ppp, Pstate *p)
+{
+	Block *b;
+	Lcpmsg *m;
+
+	p->termid = ++(p->id);
+	b = alloclcp(Lechoreq, p->id, 4);
+	m = IPB2LCP(b);
+	hnputs(m->len, 4);
+	putframe(ppp, p->proto, b);
+	freeb(b);
+}
+
+/*
+ *  return non-zero if this is a valid v4 address
+ */
+static int
+validv4(Ipaddr addr)
+{
+	return memcmp(addr, v4prefix, IPv4off) == 0;
+}
+
+static void
+invalidate(Ipaddr addr)
+{
+	ipmove(addr, IPnoaddr);
+}
--- /dev/null
+++ b/os/ip.original/ppp.h
@@ -1,0 +1,258 @@
+typedef struct PPP	PPP;
+typedef struct Pstate	Pstate;
+typedef struct Lcpmsg	Lcpmsg;
+typedef struct Lcpopt	Lcpopt;
+typedef struct Qualpkt	Qualpkt;
+typedef struct Qualstats Qualstats;
+typedef struct Tcpc	Tcpc;
+
+typedef uchar Ipaddr[IPaddrlen];
+
+enum
+{
+	HDLC_frame=	0x7e,
+	HDLC_esc=	0x7d,
+
+	/* PPP frame fields */
+	PPP_addr=	0xff,
+	PPP_ctl=	0x3,
+	PPP_initfcs=	0xffff,
+	PPP_goodfcs=	0xf0b8,
+
+	/* PPP phases */
+	Pdead=		0,	
+	Plink,				/* doing LCP */
+	Pauth,				/* doing chap */
+	Pnet,				/* doing IPCP, CCP */
+	Pterm,				/* closing down */
+
+	/* PPP protocol types */
+	Pip=		0x21,		/* internet */
+	Pvjctcp=	0x2d,		/* compressing van jacobson tcp */
+	Pvjutcp=	0x2f,		/* uncompressing van jacobson tcp */
+	Pcdata=		0xfd,		/* compressed datagram */
+	Pipcp=		0x8021,		/* ip control */
+	Pecp=		0x8053,		/* encryption control */
+	Pccp=		0x80fd,		/* compressed datagram control */
+	Plcp=		0xc021,		/* link control */
+	Ppap=		0xc023,		/* password auth. protocol */
+	Plqm=		0xc025,		/* link quality monitoring */
+	Pchap=		0xc223,		/* challenge/response */
+
+	/* LCP codes */
+	Lconfreq=	1,
+	Lconfack=	2,
+	Lconfnak=	3,
+	Lconfrej=	4,
+	Ltermreq=	5,
+	Ltermack=	6,
+	Lcoderej=	7,
+	Lprotorej=	8,
+	Lechoreq=	9,
+	Lechoack=	10,
+	Ldiscard=	11,
+
+	/* Lcp configure options */
+	Omtu=		1,
+	Octlmap=	2,
+	Oauth=		3,
+	Oquality=	4,
+	Omagic=		5,
+	Opc=		7,
+	Oac=		8,
+	Obad=		12,		/* for testing */
+
+	/* authentication protocols */
+	APmd5=		5,
+
+	/* lcp flags */
+	Fmtu=		1<<Omtu,
+	Fctlmap=	1<<Octlmap,
+	Fauth=		1<<Oauth,
+	Fquality=	1<<Oquality,
+	Fmagic=		1<<Omagic,
+	Fpc=		1<<Opc,
+	Fac=		1<<Oac,
+	Fbad=		1<<Obad,
+
+	/* Chap codes */
+	Cchallenge=	1,
+	Cresponse=	2,
+	Csuccess=	3,
+	Cfailure=	4,
+
+	/* Pap codes */
+	Cpapreq=		1,
+	Cpapack=		2,
+	Cpapnak=		3,
+
+	/* link states */
+	Sclosed=		0,
+	Sclosing,
+	Sreqsent,
+	Sackrcvd,
+	Sacksent,
+	Sopened,
+
+	/* ccp configure options */
+	Ocoui=		0,	/* proprietary compression */
+	Ocstac=		17,	/* stac electronics LZS */
+	Ocmppc=		18,	/* microsoft ppc */
+
+	/* ccp flags */
+	Fcoui=		1<<Ocoui,
+	Fcstac=		1<<Ocstac,
+	Fcmppc=		1<<Ocmppc,
+
+	/* ecp configure options */
+	Oeoui=		0,	/* proprietary compression */
+	Oedese=		1,	/* DES */
+
+	/* ecp flags */
+	Feoui=		1<<Oeoui,
+	Fedese=		1<<Oedese,
+
+	/* ipcp configure options */
+	Oipaddrs=	1,
+	Oipcompress=	2,
+	Oipaddr=	3,
+	Oipdns=		129,
+	Oipwins=	130,
+	Oipdns2=	131,
+	Oipwins2=	132,
+
+	/* ipcp flags */
+	Fipaddrs=	1<<Oipaddrs,
+	Fipcompress=	1<<Oipcompress,
+	Fipaddr=	1<<Oipaddr,
+
+	Period=		3*1000,	/* period of retransmit process (in ms) */
+	Timeout=	10,	/* xmit timeout (in Periods) */
+
+	MAX_STATES	= 16,		/* van jacobson compression states */
+	Defmtu=		1450,		/* default that we will ask for */
+	Minmtu=		128,		/* minimum that we will accept */
+	Maxmtu=		2000,		/* maximum that we will accept */
+};
+
+
+struct Pstate
+{
+	int	proto;		/* protocol type */
+	int	timeout;		/* for current state */
+	int	rxtimeout;	/* for current retransmit */
+	ulong	flags;		/* options received */
+	uchar	id;		/* id of current message */
+	uchar	confid;		/* id of current config message */
+	uchar	termid;		/* id of current termination message */
+	uchar	rcvdconfid;	/* id of last conf message received */
+	uchar	state;		/* PPP link state */
+	ulong	optmask;		/* which options to request */
+	int	echoack;	/* recieved echo ack */
+	int	echotimeout;	/* echo timeout */
+};
+
+struct Qualstats
+{
+	ulong	reports;
+	ulong	packets;
+	ulong	bytes;
+	ulong	discards;
+	ulong	errors;
+};
+
+struct PPP
+{
+	QLock;
+
+	Chan*	dchan;			/* serial line */
+	Chan*	cchan;			/* serial line control */
+	int		framing;	/* non-zero to use framing characters */
+	Ipaddr	local;
+	int		localfrozen;
+	Ipaddr	remote;
+	int		remotefrozen;
+
+	int	pppup;
+	Fs	*f;		/* file system we belong to */
+	Ipifc*	ifc;
+	Proc*	readp;			/* reading process */
+	Proc*	timep;			/* timer process */
+	Block*	inbuf;			/* input buffer */
+	Block*	outbuf;			/* output buffer */
+	QLock	outlock;		/*  and its lock */
+
+	ulong	magic;			/* magic number to detect loop backs */
+	ulong	rctlmap;		/* map of chars to ignore in rcvr */
+	ulong	xctlmap;		/* map of chars to excape in xmit */
+	int		phase;		/* PPP phase */
+	Pstate*	lcp;			/* lcp state */
+	Pstate*	ipcp;			/* ipcp state */
+	char	secret[256];		/* md5 key */
+	char	chapname[256];		/* chap system name */
+	Tcpc*	ctcp;
+	ulong		mtu;		/* maximum xmit size */
+	ulong		mru;		/* maximum recv size */
+
+	int	baud;
+	int	usepap;	/* authentication is PAP in every sense, not CHAP */
+	int	papid;
+	int	usechap;
+
+	/* rfc */
+	int	usedns;
+	Ipaddr	dns1;
+	Ipaddr	dns2;
+
+	/* link quality monitoring */
+	int		period;		/* lqm period */
+	int		timeout;	/* time to next lqm packet */
+	Qualstats	in;		/* local */
+	Qualstats	out;
+	Qualstats	pin;		/* peer */
+	Qualstats	pout;
+	Qualstats	sin;		/* saved */
+};
+
+PPP*		pppopen(PPP*, char*, Ipaddr, Ipaddr, int, int, char*, char*);
+Block*	pppread(PPP*);
+int		pppwrite(PPP*, Block*);
+void		pppclose(PPP*);
+
+struct Lcpmsg
+{
+	uchar	code;
+	uchar	id;
+	uchar	len[2];
+	uchar	data[1];
+};
+
+struct Lcpopt
+{
+	uchar	type;
+	uchar	len;
+	uchar	data[1];
+};
+
+struct Qualpkt
+{
+	uchar	magic[4];
+
+	uchar	lastoutreports[4];
+	uchar	lastoutpackets[4];
+	uchar	lastoutbytes[4];
+	uchar	peerinreports[4];
+	uchar	peerinpackets[4];
+	uchar	peerindiscards[4];
+	uchar	peerinerrors[4];
+	uchar	peerinbytes[4];
+	uchar	peeroutreports[4];
+	uchar	peeroutpackets[4];
+	uchar	peeroutbytes[4];
+};
+
+ushort	compress(Tcpc*, Block*, Fs*);
+Tcpc*	compress_init(Tcpc*);
+int		compress_negotiate(Tcpc*, uchar*);
+ushort	tcpcompress(Tcpc*, Block*, Fs*);
+Block*	tcpuncompress(Tcpc*, Block*, ushort, Fs*);
--- /dev/null
+++ b/os/ip.original/pppmedium.c
@@ -1,0 +1,192 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "kernel.h"
+#include "ppp.h"
+
+static void	pppreader(void *a);
+static void	pppbind(Ipifc *ifc, int argc, char **argv);
+static void	pppunbind(Ipifc *ifc);
+static void	pppbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void	deadremote(Ipifc *ifc);
+
+Medium pppmedium =
+{
+.name=	"ppp",
+.hsize=	4,
+.mintu=	Minmtu,
+.maxtu=	Maxmtu,
+.maclen=	0,
+.bind=	pppbind,
+.unbind=	pppunbind,
+.bwrite=	pppbwrite,
+.unbindonclose=	0,		/* don't unbind on last close */
+};
+
+/*
+ *  called to bind an IP ifc to an ethernet device
+ *  called with ifc wlock'd
+ */
+static void
+pppbind(Ipifc *ifc, int argc, char **argv)
+{
+	PPP *ppp;
+	Ipaddr ipaddr, remip;
+	int mtu, framing;
+	char *chapname, *secret;
+
+	if(argc < 3)
+		error(Ebadarg);
+
+	ipmove(ipaddr, IPnoaddr);
+	ipmove(remip, IPnoaddr);
+	mtu = Defmtu;
+	framing = 1;
+	chapname = nil;
+	secret = nil;
+
+	switch(argc){
+	default:
+	case 9:
+		if(argv[8][0] != '-')
+			secret = argv[8];
+	case 8:
+		if(argv[7][0] != '-')
+			chapname = argv[7];
+	case 7:
+		if(argv[6][0] != '-')
+			framing = strtoul(argv[6], 0, 0);
+	case 6:
+		if(argv[5][0] != '-')
+			mtu = strtoul(argv[5], 0, 0);
+	case 5:
+		if(argv[4][0] != '-')
+			parseip(remip, argv[4]);
+	case 4:
+		if(argv[3][0] != '-')
+			parseip(ipaddr, argv[3]);
+	case 3:
+		break;
+	}
+
+	ppp = smalloc(sizeof(*ppp));
+	ppp->ifc = ifc;
+	ppp->f = ifc->conv->p->f;
+	ifc->arg = ppp;
+	if(waserror()){
+		pppunbind(ifc);
+		nexterror();
+	}
+	if(pppopen(ppp, argv[2], ipaddr, remip, mtu, framing, chapname, secret) == nil)
+		error("ppp open failed");
+	poperror();
+	kproc("pppreader", pppreader, ifc, KPDUPPG|KPDUPFDG);
+}
+
+static void
+pppreader(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	PPP *ppp;
+
+	ifc = a;
+	ppp = ifc->arg;
+	ppp->readp = up;	/* hide identity under a rock for unbind */
+	setpri(PriHi);
+
+	if(waserror()){
+		netlog(ppp->f, Logppp, "pppreader: %I: %s\n", ppp->local, up->env->errstr);
+		ppp->readp = 0;
+		deadremote(ifc);
+		pexit("hangup", 1);
+	}
+
+	for(;;){
+		bp = pppread(ppp);
+		if(bp == nil)
+			error("hungup");
+		if(!canrlock(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			runlock(ifc);
+			nexterror();
+		}
+		ifc->in++;
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput(ppp->f, ifc, bp);
+		runlock(ifc);
+		poperror();
+	}
+}
+
+/*
+ *  called with ifc wlock'd
+ */
+static void
+pppunbind(Ipifc *ifc)
+{
+	PPP *ppp = ifc->arg;
+
+	if(ppp == nil)
+		return;
+	if(ppp->readp)
+		postnote(ppp->readp, 1, "unbind", 0);
+	if(ppp->timep)
+		postnote(ppp->timep, 1, "unbind", 0);
+
+	/* wait for kprocs to die */
+	while(ppp->readp != 0 || ppp->timep != 0)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	pppclose(ppp);
+	qclose(ifc->conv->eq);
+	ifc->arg = nil;
+}
+
+/*
+ *  called by ipoput with a single packet to write with ifc rlock'd
+ */
+static void
+pppbwrite(Ipifc *ifc, Block *bp, int, uchar*)
+{
+	PPP *ppp = ifc->arg;
+
+	pppwrite(ppp, bp);
+	ifc->out++;
+}
+
+/*
+ *	If the other end hangs up, we have to unbind the interface.  An extra
+ *	unbind (in the case where we are hanging up) won't do any harm.
+ */
+static void
+deadremote(Ipifc *ifc)
+{
+	int fd;
+	char path[128];
+	PPP *ppp;
+
+	ppp = ifc->arg;
+	snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ifc->conv->x);
+	fd = kopen(path, ORDWR);
+	if(fd < 0)
+		return;
+	kwrite(fd, "unbind", sizeof("unbind")-1);
+	kclose(fd);
+}
+
+void
+pppmediumlink(void)
+{
+	addipmedium(&pppmedium);
+}
--- /dev/null
+++ b/os/ip.original/ptclbsum.c
@@ -1,0 +1,72 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"ip.h"
+
+static	short	endian	= 1;
+static	uchar*	aendian	= (uchar*)&endian;
+#define	LITTLE	*aendian
+
+ushort
+ptclbsum(uchar *addr, int len)
+{
+	ulong losum, hisum, mdsum, x;
+	ulong t1, t2;
+
+	losum = 0;
+	hisum = 0;
+	mdsum = 0;
+
+	x = 0;
+	if((uintptr)addr & 1) {
+		if(len) {
+			hisum += addr[0];
+			len--;
+			addr++;
+		}
+		x = 1;
+	}
+	while(len >= 16) {
+		t1 = *(ushort*)(addr+0);
+		t2 = *(ushort*)(addr+2);	mdsum += t1;
+		t1 = *(ushort*)(addr+4);	mdsum += t2;
+		t2 = *(ushort*)(addr+6);	mdsum += t1;
+		t1 = *(ushort*)(addr+8);	mdsum += t2;
+		t2 = *(ushort*)(addr+10);	mdsum += t1;
+		t1 = *(ushort*)(addr+12);	mdsum += t2;
+		t2 = *(ushort*)(addr+14);	mdsum += t1;
+		mdsum += t2;
+		len -= 16;
+		addr += 16;
+	}
+	while(len >= 2) {
+		mdsum += *(ushort*)addr;
+		len -= 2;
+		addr += 2;
+	}
+	if(x) {
+		if(len)
+			losum += addr[0];
+		if(LITTLE)
+			losum += mdsum;
+		else
+			hisum += mdsum;
+	} else {
+		if(len)
+			hisum += addr[0];
+		if(LITTLE)
+			hisum += mdsum;
+		else
+			losum += mdsum;
+	}
+
+	losum += hisum >> 8;
+	losum += (hisum & 0xff) << 8;
+	while(hisum = losum>>16)
+		losum = hisum + (losum & 0xffff);
+
+	return losum & 0xffff;
+}
--- /dev/null
+++ b/os/ip.original/rudp.c
@@ -1,0 +1,1092 @@
+/*
+ *  This protocol is compatible with UDP's packet format.
+ *  It could be done over UDP if need be.
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+
+#define DEBUG	0
+#define DPRINT if(DEBUG)print
+
+#define SEQDIFF(a,b) ( (a)>=(b)?\
+			(a)-(b):\
+			0xffffffffUL-((b)-(a)) )
+#define INSEQ(a,start,end) ( (start)<=(end)?\
+				((a)>(start)&&(a)<=(end)):\
+				((a)>(start)||(a)<=(end)) )
+#define UNACKED(r) SEQDIFF(r->sndseq, r->ackrcvd)
+#define NEXTSEQ(a) ( (a)+1 == 0 ? 1 : (a)+1 )
+
+enum
+{
+	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
+	UDP_PHDRSIZE	= 12,	/* pseudo header */
+	UDP_RHDRSIZE	= 36,	/* pseudo header + udp header + rudp header */
+	UDP_IPHDR	= 8,	/* ip header */
+	IP_UDPPROTO	= 254,
+	UDP_USEAD7	= 52,
+	UDP_USEAD6	= 36,
+	UDP_USEAD4	= 12,
+
+	Rudprxms	= 200,
+	Rudptickms	= 50,
+	Rudpmaxxmit	= 10,
+	Maxunacked	= 100,
+
+};
+
+#define Hangupgen	0xffffffff	/* used only in hangup messages */
+
+typedef struct Udphdr Udphdr;
+struct Udphdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+
+	/* pseudo header starts here */
+	uchar	Unused;
+	uchar	udpproto;	/* Protocol */
+	uchar	udpplen[2];	/* Header plus data length */
+	uchar	udpsrc[4];	/* Ip source */
+	uchar	udpdst[4];	/* Ip destination */
+
+	/* udp header */
+	uchar	udpsport[2];	/* Source port */
+	uchar	udpdport[2];	/* Destination port */
+	uchar	udplen[2];	/* data length */
+	uchar	udpcksum[2];	/* Checksum */
+};
+
+typedef struct Rudphdr Rudphdr;
+struct Rudphdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+
+	/* pseudo header starts here */
+	uchar	Unused;
+	uchar	udpproto;	/* Protocol */
+	uchar	udpplen[2];	/* Header plus data length */
+	uchar	udpsrc[4];	/* Ip source */
+	uchar	udpdst[4];	/* Ip destination */
+
+	/* udp header */
+	uchar	udpsport[2];	/* Source port */
+	uchar	udpdport[2];	/* Destination port */
+	uchar	udplen[2];	/* data length (includes rudp header) */
+	uchar	udpcksum[2];	/* Checksum */
+
+	/* rudp header */
+	uchar	relseq[4];	/* id of this packet (or 0) */
+	uchar	relsgen[4];	/* generation/time stamp */
+	uchar	relack[4];	/* packet being acked (or 0) */
+	uchar	relagen[4];	/* generation/time stamp */
+};
+
+
+/*
+ *  one state structure per destination
+ */
+typedef struct Reliable Reliable;
+struct Reliable
+{
+	Ref;
+
+	Reliable *next;
+
+	uchar	addr[IPaddrlen];	/* always V6 when put here */
+	ushort	port;
+
+	Block	*unacked;	/* unacked msg list */
+	Block	*unackedtail;	/*  and its tail */
+
+	int	timeout;	/* time since first unacked msg sent */
+	int	xmits;		/* number of times first unacked msg sent */
+
+	ulong	sndseq;		/* next packet to be sent */
+	ulong	sndgen;		/*  and its generation */
+
+	ulong	rcvseq;		/* last packet received */
+	ulong	rcvgen;		/*  and its generation */
+
+	ulong	acksent;	/* last ack sent */
+	ulong	ackrcvd;	/* last msg for which ack was rcvd */
+
+	/* flow control */
+	QLock	lock;
+	Rendez	vous;
+	int	blocked;
+};
+
+
+
+/* MIB II counters */
+typedef struct Rudpstats Rudpstats;
+struct Rudpstats
+{
+	ulong	rudpInDatagrams;
+	ulong	rudpNoPorts;
+	ulong	rudpInErrors;
+	ulong	rudpOutDatagrams;
+};
+
+typedef struct Rudppriv Rudppriv;
+struct Rudppriv
+{
+	Ipht	ht;
+
+	/* MIB counters */
+	Rudpstats	ustats;
+
+	/* non-MIB stats */
+	ulong	csumerr;		/* checksum errors */
+	ulong	lenerr;			/* short packet */
+	ulong	rxmits;			/* # of retransmissions */
+	ulong	orders;			/* # of out of order pkts */
+
+	/* keeping track of the ack kproc */
+	int	ackprocstarted;
+	QLock	apl;
+};
+
+
+static ulong generation = 0;
+static Rendez rend;
+
+/*
+ *  protocol specific part of Conv
+ */
+typedef struct Rudpcb Rudpcb;
+struct Rudpcb
+{
+	QLock;
+	uchar	headers;
+	uchar	randdrop;
+	Reliable *r;
+};
+
+/*
+ * local functions 
+ */
+void	relsendack(Conv*, Reliable*, int);
+int	reliput(Conv*, Block*, uchar*, ushort);
+Reliable *relstate(Rudpcb*, uchar*, ushort, char*);
+void	relput(Reliable*);
+void	relforget(Conv *, uchar*, int, int);
+void	relackproc(void *);
+void	relackq(Reliable *, Block*);
+void	relhangup(Conv *, Reliable*);
+void	relrexmit(Conv *, Reliable*);
+void	relput(Reliable*);
+void	rudpkick(void *x);
+
+static void
+rudpstartackproc(Proto *rudp)
+{
+	Rudppriv *rpriv;
+	char kpname[KNAMELEN];
+
+	rpriv = rudp->priv;
+	if(rpriv->ackprocstarted == 0){
+		qlock(&rpriv->apl);
+		if(rpriv->ackprocstarted == 0){
+			sprint(kpname, "#I%drudpack", rudp->f->dev);
+			kproc(kpname, relackproc, rudp, 0);
+			rpriv->ackprocstarted = 1;
+		}
+		qunlock(&rpriv->apl);
+	}
+}
+
+static char*
+rudpconnect(Conv *c, char **argv, int argc)
+{
+	char *e;
+	Rudppriv *upriv;
+
+	upriv = c->p->priv;
+	rudpstartackproc(c->p);
+	e = Fsstdconnect(c, argv, argc);
+	Fsconnected(c, e);
+	iphtadd(&upriv->ht, c);
+
+	return e;
+}
+
+
+static int
+rudpstate(Conv *c, char *state, int n)
+{
+	Rudpcb *ucb;
+	Reliable *r;
+	int m;
+
+	m = snprint(state, n, "%s", c->inuse?"Open":"Closed");
+	ucb = (Rudpcb*)c->ptcl;
+	qlock(ucb);
+	for(r = ucb->r; r; r = r->next)
+		m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+	qunlock(ucb);
+	return m;
+}
+
+static char*
+rudpannounce(Conv *c, char** argv, int argc)
+{
+	char *e;
+	Rudppriv *upriv;
+
+	upriv = c->p->priv;
+	rudpstartackproc(c->p);
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	Fsconnected(c, nil);
+	iphtadd(&upriv->ht, c);
+
+	return nil;
+}
+
+static void
+rudpcreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->wq = qopen(64*1024, Qkick, rudpkick, c);
+}
+
+static void
+rudpclose(Conv *c)
+{
+	Rudpcb *ucb;
+	Reliable *r, *nr;
+	Rudppriv *upriv;
+
+	upriv = c->p->priv;
+	iphtrem(&upriv->ht, c);
+
+	/* force out any delayed acks */
+	ucb = (Rudpcb*)c->ptcl;
+	qlock(ucb);
+	for(r = ucb->r; r; r = r->next){
+		if(r->acksent != r->rcvseq)
+			relsendack(c, r, 0);
+	}
+	qunlock(ucb);
+
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+	c->rport = 0;
+
+	ucb->headers = 0;
+	ucb->randdrop = 0;
+	qlock(ucb);
+	for(r = ucb->r; r; r = nr){
+		if(r->acksent != r->rcvseq)
+			relsendack(c, r, 0);
+		nr = r->next;
+		relhangup(c, r);
+		relput(r);
+	}
+	ucb->r = 0;
+
+	qunlock(ucb);
+}
+
+/*
+ *  randomly don't send packets
+ */
+static void
+doipoput(Conv *c, Fs *f, Block *bp, int x, int ttl, int tos)
+{
+	Rudpcb *ucb;
+
+	ucb = (Rudpcb*)c->ptcl;
+	if(ucb->randdrop && nrand(100) < ucb->randdrop)
+		freeblist(bp);
+	else
+		ipoput4(f, bp, x, ttl, tos, nil);
+}
+
+int
+flow(void *v)
+{
+	Reliable *r = v;
+
+	return UNACKED(r) <= Maxunacked;
+}
+
+void
+rudpkick(void *x)
+{
+	Conv *c = x;
+	Udphdr *uh;
+	ushort rport;
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
+	Block *bp;
+	Rudpcb *ucb;
+	Rudphdr *rh;
+	Reliable *r;
+	int dlen, ptcllen;
+	Rudppriv *upriv;
+	Fs *f;
+
+	upriv = c->p->priv;
+	f = c->p->f;
+
+	netlog(c->p->f, Logrudp, "rudp: kick\n");
+	bp = qget(c->wq);
+	if(bp == nil)
+		return;
+
+	ucb = (Rudpcb*)c->ptcl;
+	switch(ucb->headers) {
+	case 7:
+		/* get user specified addresses */
+		bp = pullupblock(bp, UDP_USEAD7);
+		if(bp == nil)
+			return;
+		ipmove(raddr, bp->rp);
+		bp->rp += IPaddrlen;
+		ipmove(laddr, bp->rp);
+		bp->rp += IPaddrlen;
+		/* pick interface closest to dest */
+		if(ipforme(f, laddr) != Runi)
+			findlocalip(f, laddr, raddr);
+		bp->rp += IPaddrlen;		/* Ignore ifc address */
+		rport = nhgets(bp->rp);
+		bp->rp += 2+2;			/* Ignore local port */
+		break;
+	case 6:
+		/* get user specified addresses */
+		bp = pullupblock(bp, UDP_USEAD6);
+		if(bp == nil)
+			return;
+		ipmove(raddr, bp->rp);
+		bp->rp += IPaddrlen;
+		ipmove(laddr, bp->rp);
+		bp->rp += IPaddrlen;
+		/* pick interface closest to dest */
+		if(ipforme(f, laddr) != Runi)
+			findlocalip(f, laddr, raddr);
+		rport = nhgets(bp->rp);
+
+		bp->rp += 4;			/* Igonore local port */
+		break;
+	default:
+		ipmove(raddr, c->raddr);
+		ipmove(laddr, c->laddr);
+		rport = c->rport;
+
+		break;
+	}
+
+	dlen = blocklen(bp);
+
+	/* Make space to fit rudp & ip header */
+	bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
+	if(bp == nil)
+		return;
+
+	uh = (Udphdr *)(bp->rp);
+	uh->vihl = IP_VER4;
+
+	rh = (Rudphdr*)uh;
+
+	ptcllen = dlen + (UDP_RHDRSIZE-UDP_PHDRSIZE);
+	uh->Unused = 0;
+	uh->udpproto = IP_UDPPROTO;
+	uh->frag[0] = 0;
+	uh->frag[1] = 0;
+	hnputs(uh->udpplen, ptcllen);
+	switch(ucb->headers){
+	case 6:
+	case 7:
+		v6tov4(uh->udpdst, raddr);
+		hnputs(uh->udpdport, rport);
+		v6tov4(uh->udpsrc, laddr);
+		break;
+	default:
+		v6tov4(uh->udpdst, c->raddr);
+		hnputs(uh->udpdport, c->rport);
+		if(ipcmp(c->laddr, IPnoaddr) == 0)
+			findlocalip(f, c->laddr, c->raddr);
+		v6tov4(uh->udpsrc, c->laddr);
+		break;
+	}
+	hnputs(uh->udpsport, c->lport);
+	hnputs(uh->udplen, ptcllen);
+	uh->udpcksum[0] = 0;
+	uh->udpcksum[1] = 0;
+
+	qlock(ucb);
+	r = relstate(ucb, raddr, rport, "kick");
+	r->sndseq = NEXTSEQ(r->sndseq);
+	hnputl(rh->relseq, r->sndseq);
+	hnputl(rh->relsgen, r->sndgen);
+
+	hnputl(rh->relack, r->rcvseq);  /* ACK last rcvd packet */
+	hnputl(rh->relagen, r->rcvgen);
+
+	if(r->rcvseq != r->acksent)
+		r->acksent = r->rcvseq;
+
+	hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, dlen+UDP_RHDRSIZE));
+
+	relackq(r, bp);
+	qunlock(ucb);
+
+	upriv->ustats.rudpOutDatagrams++;
+
+	DPRINT("sent: %lud/%lud, %lud/%lud\n", 
+		r->sndseq, r->sndgen, r->rcvseq, r->rcvgen);
+
+	doipoput(c, f, bp, 0, c->ttl, c->tos);
+
+	if(waserror()) {
+		relput(r);
+		qunlock(&r->lock);
+		nexterror();
+	}
+
+	/* flow control of sorts */
+	qlock(&r->lock);
+	if(UNACKED(r) > Maxunacked){
+		r->blocked = 1;
+		sleep(&r->vous, flow, r);
+		r->blocked = 0;
+	}
+
+	qunlock(&r->lock);
+	relput(r);
+	poperror();
+}
+
+void
+rudpiput(Proto *rudp, Ipifc *ifc, Block *bp)
+{
+	int len, olen, ottl;
+	Udphdr *uh;
+	Conv *c;
+	Rudpcb *ucb;
+	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	ushort rport, lport;
+	Rudppriv *upriv;
+	Fs *f;
+	uchar *p;
+
+	upriv = rudp->priv;
+	f = rudp->f;
+
+	upriv->ustats.rudpInDatagrams++;
+
+	uh = (Udphdr*)(bp->rp);
+
+	/* Put back pseudo header for checksum 
+	 * (remember old values for icmpnoconv()) 
+	 */
+	ottl = uh->Unused;
+	uh->Unused = 0;
+	len = nhgets(uh->udplen);
+	olen = nhgets(uh->udpplen);
+	hnputs(uh->udpplen, len);
+
+	v4tov6(raddr, uh->udpsrc);
+	v4tov6(laddr, uh->udpdst);
+	lport = nhgets(uh->udpdport);
+	rport = nhgets(uh->udpsport);
+
+	if(nhgets(uh->udpcksum)) {
+		if(ptclcsum(bp, UDP_IPHDR, len+UDP_PHDRSIZE)) {
+			upriv->ustats.rudpInErrors++;
+			upriv->csumerr++;
+			netlog(f, Logrudp, "rudp: checksum error %I\n", raddr);
+			DPRINT("rudp: checksum error %I\n", raddr);
+			freeblist(bp);
+			return;
+		}
+	}
+
+	qlock(rudp);
+
+	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+	if(c == nil){
+		/* no converstation found */
+		upriv->ustats.rudpNoPorts++;
+		qunlock(rudp);
+		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+			laddr, lport);
+		uh->Unused = ottl;
+		hnputs(uh->udpplen, olen);
+		icmpnoconv(f, bp);
+		freeblist(bp);
+		return;
+	}
+	ucb = (Rudpcb*)c->ptcl;
+	qlock(ucb);
+	qunlock(rudp);
+
+	if(reliput(c, bp, raddr, rport) < 0){
+		qunlock(ucb);
+		freeb(bp);
+		return;
+	}
+
+	/*
+	 * Trim the packet down to data size
+	 */
+
+	len -= (UDP_RHDRSIZE-UDP_PHDRSIZE);
+	bp = trimblock(bp, UDP_IPHDR+UDP_RHDRSIZE, len);
+	if(bp == nil) {
+		netlog(f, Logrudp, "rudp: len err %I.%d -> %I.%d\n", 
+			raddr, rport, laddr, lport);
+		DPRINT("rudp: len err %I.%d -> %I.%d\n", 
+			raddr, rport, laddr, lport);
+		upriv->lenerr++;
+		return;
+	}
+
+	netlog(f, Logrudpmsg, "rudp: %I.%d -> %I.%d l %d\n", 
+		raddr, rport, laddr, lport, len);
+
+	switch(ucb->headers){
+	case 7:
+		/* pass the src address */
+		bp = padblock(bp, UDP_USEAD7);
+		p = bp->rp;
+		ipmove(p, raddr); p += IPaddrlen;
+		ipmove(p, laddr); p += IPaddrlen;
+		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		hnputs(p, rport); p += 2;
+		hnputs(p, lport);
+		break;
+	case 6:
+		/* pass the src address */
+		bp = padblock(bp, UDP_USEAD6);
+		p = bp->rp;
+		ipmove(p, raddr); p += IPaddrlen;
+		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
+		hnputs(p, rport); p += 2;
+		hnputs(p, lport);
+		break;
+	default:
+		/* connection oriented rudp */
+		if(ipcmp(c->raddr, IPnoaddr) == 0){
+			/* save the src address in the conversation */
+		 	ipmove(c->raddr, raddr);
+			c->rport = rport;
+
+			/* reply with the same ip address (if not broadcast) */
+			if(ipforme(f, laddr) == Runi)
+				ipmove(c->laddr, laddr);
+			else
+				v4tov6(c->laddr, ifc->lifc->local);
+		}
+		break;
+	}
+	if(bp->next)
+		bp = concatblock(bp);
+
+	if(qfull(c->rq)) {
+		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
+			laddr, lport);
+		freeblist(bp);
+	}
+	else
+		qpass(c->rq, bp);
+	
+	qunlock(ucb);
+}
+
+static char *rudpunknown = "unknown rudp ctl request";
+
+char*
+rudpctl(Conv *c, char **f, int n)
+{
+	Rudpcb *ucb;
+	uchar ip[IPaddrlen];
+	int x;
+
+	ucb = (Rudpcb*)c->ptcl;
+	if(n < 1)
+		return rudpunknown;
+
+	if(strcmp(f[0], "headers++4") == 0){
+		ucb->headers = 7;
+		return nil;
+	} else if(strcmp(f[0], "headers") == 0){
+		ucb->headers = 6;
+		return nil;
+	} else if(strcmp(f[0], "hangup") == 0){
+		if(n < 3)
+			return "bad syntax";
+		parseip(ip, f[1]);
+		x = atoi(f[2]);
+		qlock(ucb);
+		relforget(c, ip, x, 1);
+		qunlock(ucb);
+		return nil;
+	} else if(strcmp(f[0], "randdrop") == 0){
+		x = 10;		/* default is 10% */
+		if(n > 1)
+			x = atoi(f[1]);
+		if(x > 100 || x < 0)
+			return "illegal rudp drop rate";
+		ucb->randdrop = x;
+		return nil;
+	}
+	return rudpunknown;
+}
+
+void
+rudpadvise(Proto *rudp, Block *bp, char *msg)
+{
+	Udphdr *h;
+	uchar source[IPaddrlen], dest[IPaddrlen];
+	ushort psource, pdest;
+	Conv *s, **p;
+
+	h = (Udphdr*)(bp->rp);
+
+	v4tov6(dest, h->udpdst);
+	v4tov6(source, h->udpsrc);
+	psource = nhgets(h->udpsport);
+	pdest = nhgets(h->udpdport);
+
+	/* Look for a connection */
+	for(p = rudp->conv; *p; p++) {
+		s = *p;
+		if(s->rport == pdest)
+		if(s->lport == psource)
+		if(ipcmp(s->raddr, dest) == 0)
+		if(ipcmp(s->laddr, source) == 0){
+			qhangup(s->rq, msg);
+			qhangup(s->wq, msg);
+			break;
+		}
+	}
+	freeblist(bp);
+}
+
+int
+rudpstats(Proto *rudp, char *buf, int len)
+{
+	Rudppriv *upriv;
+
+	upriv = rudp->priv;
+	return snprint(buf, len, "%lud %lud %lud %lud %lud %lud\n",
+		upriv->ustats.rudpInDatagrams,
+		upriv->ustats.rudpNoPorts,
+		upriv->ustats.rudpInErrors,
+		upriv->ustats.rudpOutDatagrams,
+		upriv->rxmits,
+		upriv->orders);
+}
+
+int
+rudpgc(Proto *rudp)
+{
+	return natgc(rudp->ipproto);
+}
+
+void
+rudpinit(Fs *fs)
+{
+
+	Proto *rudp;
+
+	rudp = smalloc(sizeof(Proto));
+	rudp->priv = smalloc(sizeof(Rudppriv));
+	rudp->name = "rudp";
+	rudp->connect = rudpconnect;
+	rudp->announce = rudpannounce;
+	rudp->ctl = rudpctl;
+	rudp->state = rudpstate;
+	rudp->create = rudpcreate;
+	rudp->close = rudpclose;
+	rudp->rcv = rudpiput;
+	rudp->advise = rudpadvise;
+	rudp->stats = rudpstats;
+	rudp->gc = rudpgc;
+	rudp->ipproto = IP_UDPPROTO;
+	rudp->nc = 16;
+	rudp->ptclsize = sizeof(Rudpcb);
+
+	Fsproto(fs, rudp);
+}
+
+/*********************************************/
+/* Here starts the reliable helper functions */
+/*********************************************/
+/*
+ *  Enqueue a copy of an unacked block for possible retransmissions
+ */
+void
+relackq(Reliable *r, Block *bp)
+{
+	Block *np;
+
+	np = copyblock(bp, blocklen(bp));
+	if(r->unacked)
+		r->unackedtail->list = np;
+	else {
+		/* restart timer */
+		r->timeout = 0;
+		r->xmits = 1;
+		r->unacked = np;
+	}
+	r->unackedtail = np;
+	np->list = nil;
+}
+
+/*
+ *  retransmit unacked blocks
+ */
+void
+relackproc(void *a)
+{
+	Rudpcb *ucb;
+	Proto *rudp;
+	Reliable *r;
+	Conv **s, *c;
+
+	rudp = (Proto *)a;
+
+loop:
+	tsleep(&up->sleep, return0, 0, Rudptickms);
+
+	for(s = rudp->conv; *s; s++) {
+		c = *s;
+		ucb = (Rudpcb*)c->ptcl;
+		qlock(ucb);
+
+		for(r = ucb->r; r; r = r->next) {
+			if(r->unacked != nil){
+				r->timeout += Rudptickms;
+				if(r->timeout > Rudprxms*r->xmits)
+					relrexmit(c, r);
+			}
+			if(r->acksent != r->rcvseq)
+				relsendack(c, r, 0);
+		}
+		qunlock(ucb);
+	}
+	goto loop;
+}
+
+/*
+ *  get the state record for a conversation
+ */
+Reliable*
+relstate(Rudpcb *ucb, uchar *addr, ushort port, char *from)
+{
+	Reliable *r, **l;
+
+	l = &ucb->r;
+	for(r = *l; r; r = *l){
+		if(memcmp(addr, r->addr, IPaddrlen) == 0 && 
+		    port == r->port)
+			break;
+		l = &r->next;
+	}
+
+	/* no state for this addr/port, create some */
+	if(r == nil){
+		while(generation == 0)
+			generation = rand();
+
+		DPRINT("from %s new state %lud for %I!%ud\n", 
+		        from, generation, addr, port);
+
+		r = smalloc(sizeof(Reliable));
+		memmove(r->addr, addr, IPaddrlen);
+		r->port = port;
+		r->unacked = 0;
+		if(generation == Hangupgen)
+			generation++;
+		r->sndgen = generation++;
+		r->sndseq = 0;
+		r->ackrcvd = 0;
+		r->rcvgen = 0;
+		r->rcvseq = 0;
+		r->acksent = 0;
+		r->xmits = 0;
+		r->timeout = 0;
+		r->ref = 0;
+		incref(r);	/* one reference for being in the list */
+
+		*l = r;
+	}
+
+	incref(r);
+	return r;
+}
+
+void
+relput(Reliable *r)
+{
+	if(decref(r) == 0)
+		free(r);
+}
+
+/*
+ *  forget a Reliable state
+ */
+void
+relforget(Conv *c, uchar *ip, int port, int originator)
+{
+	Rudpcb *ucb;
+	Reliable *r, **l;
+
+	ucb = (Rudpcb*)c->ptcl;
+
+	l = &ucb->r;
+	for(r = *l; r; r = *l){
+		if(ipcmp(ip, r->addr) == 0 && port == r->port){
+			*l = r->next;
+			if(originator)
+				relsendack(c, r, 1);
+			relhangup(c, r);
+			relput(r);	/* remove from the list */
+			break;
+		}
+		l = &r->next;
+	}
+}
+
+/* 
+ *  process a rcvd reliable packet. return -1 if not to be passed to user process,
+ *  0 therwise.
+ *
+ *  called with ucb locked.
+ */
+int
+reliput(Conv *c, Block *bp, uchar *addr, ushort port)
+{
+	Block *nbp;
+	Rudpcb *ucb;
+	Rudppriv *upriv;
+	Udphdr *uh;
+	Reliable *r;
+	Rudphdr *rh;
+	ulong seq, ack, sgen, agen, ackreal;
+	int rv = -1;
+
+	/* get fields */
+	uh = (Udphdr*)(bp->rp);
+	rh = (Rudphdr*)uh;
+	seq = nhgetl(rh->relseq);
+	sgen = nhgetl(rh->relsgen);
+	ack = nhgetl(rh->relack);
+	agen = nhgetl(rh->relagen);
+
+	upriv = c->p->priv;
+	ucb = (Rudpcb*)c->ptcl;
+	r = relstate(ucb, addr, port, "input");
+
+	DPRINT("rcvd %lud/%lud, %lud/%lud, r->sndgen = %lud\n", 
+		seq, sgen, ack, agen, r->sndgen);
+
+	/* if acking an incorrect generation, ignore */
+	if(ack && agen != r->sndgen)
+		goto out;
+
+	/* Look for a hangup */
+	if(sgen == Hangupgen) {
+		if(agen == r->sndgen)
+			relforget(c, addr, port, 0);
+		goto out;
+	}
+
+	/* make sure we're not talking to a new remote side */
+	if(r->rcvgen != sgen){
+		if(seq != 0 && seq != 1)
+			goto out;
+
+		/* new connection */
+		if(r->rcvgen != 0){
+			DPRINT("new con r->rcvgen = %lud, sgen = %lud\n", r->rcvgen, sgen);
+			relhangup(c, r);
+		}
+		r->rcvgen = sgen;
+	}
+
+	/* dequeue acked packets */
+	if(ack && agen == r->sndgen){
+		ackreal = 0;
+		while(r->unacked != nil && INSEQ(ack, r->ackrcvd, r->sndseq)){
+			nbp = r->unacked;
+			r->unacked = nbp->list;
+			DPRINT("%lud/%lud acked, r->sndgen = %lud\n", 
+			       ack, agen, r->sndgen);
+			freeb(nbp);
+			r->ackrcvd = NEXTSEQ(r->ackrcvd);
+			ackreal = 1;
+		}
+
+		/* flow control */
+		if(UNACKED(r) < Maxunacked/8 && r->blocked)
+			wakeup(&r->vous);
+
+		/*
+		 *  retransmit next packet if the acked packet
+		 *  was transmitted more than once
+		 */
+		if(ackreal && r->unacked != nil){
+			r->timeout = 0;
+			if(r->xmits > 1){
+				r->xmits = 1;
+				relrexmit(c, r);
+			}
+		}
+		
+	}
+
+	/* no message or input queue full */
+	if(seq == 0 || qfull(c->rq))
+		goto out;
+
+	/* refuse out of order delivery */
+	if(seq != NEXTSEQ(r->rcvseq)){
+		relsendack(c, r, 0);	/* tell him we got it already */
+		upriv->orders++;
+		DPRINT("out of sequence %lud not %lud\n", seq, NEXTSEQ(r->rcvseq));
+		goto out;
+	}
+	r->rcvseq = seq;
+
+	rv = 0;
+out:
+	relput(r);
+	return rv;
+}
+
+void
+relsendack(Conv *c, Reliable *r, int hangup)
+{
+	Udphdr *uh;
+	Block *bp;
+	Rudphdr *rh;
+	int ptcllen;
+	Fs *f;
+
+	bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
+	if(bp == nil)
+		return;
+	bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
+	f = c->p->f;
+	uh = (Udphdr *)(bp->rp);
+	uh->vihl = IP_VER4;
+	rh = (Rudphdr*)uh;
+
+	ptcllen = (UDP_RHDRSIZE-UDP_PHDRSIZE);
+	uh->Unused = 0;
+	uh->udpproto = IP_UDPPROTO;
+	uh->frag[0] = 0;
+	uh->frag[1] = 0;
+	hnputs(uh->udpplen, ptcllen);
+
+	v6tov4(uh->udpdst, r->addr);
+	hnputs(uh->udpdport, r->port);
+	hnputs(uh->udpsport, c->lport);
+	if(ipcmp(c->laddr, IPnoaddr) == 0)
+		findlocalip(f, c->laddr, c->raddr);
+	v6tov4(uh->udpsrc, c->laddr);
+	hnputs(uh->udplen, ptcllen);
+
+	if(hangup)
+		hnputl(rh->relsgen, Hangupgen);
+	else
+		hnputl(rh->relsgen, r->sndgen);
+	hnputl(rh->relseq, 0);
+	hnputl(rh->relagen, r->rcvgen);
+	hnputl(rh->relack, r->rcvseq);
+
+	if(r->acksent < r->rcvseq)
+		r->acksent = r->rcvseq;
+
+	uh->udpcksum[0] = 0;
+	uh->udpcksum[1] = 0;
+	hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, UDP_RHDRSIZE));
+
+	DPRINT("sendack: %lud/%lud, %lud/%lud\n", 0L, r->sndgen, r->rcvseq, r->rcvgen);
+	doipoput(c, f, bp, 0, c->ttl, c->tos);
+}
+
+
+/*
+ *  called with ucb locked (and c locked if user initiated close)
+ */
+void
+relhangup(Conv *c, Reliable *r)
+{
+	int n;
+	Block *bp;
+	char hup[ERRMAX];
+
+	n = snprint(hup, sizeof(hup), "hangup %I!%d", r->addr, r->port);
+	qproduce(c->eq, hup, n);
+
+	/*
+	 *  dump any unacked outgoing messages
+	 */
+	for(bp = r->unacked; bp != nil; bp = r->unacked){
+		r->unacked = bp->list;
+		bp->list = nil;
+		freeb(bp);
+	}
+
+	r->rcvgen = 0;
+	r->rcvseq = 0;
+	r->acksent = 0;
+	if(generation == Hangupgen)
+		generation++;
+	r->sndgen = generation++;
+	r->sndseq = 0;
+	r->ackrcvd = 0;
+	r->xmits = 0;
+	r->timeout = 0;
+	wakeup(&r->vous);
+}
+
+/*
+ *  called with ucb locked
+ */
+void
+relrexmit(Conv *c, Reliable *r)
+{
+	Rudppriv *upriv;
+	Block *np;
+	Fs *f;
+
+	upriv = c->p->priv;
+	f = c->p->f;
+	r->timeout = 0;
+	if(r->xmits++ > Rudpmaxxmit){
+		relhangup(c, r);
+		return;
+	}
+
+	upriv->rxmits++;
+	np = copyblock(r->unacked, blocklen(r->unacked));
+	DPRINT("rxmit r->ackrvcd+1 = %lud\n", r->ackrcvd+1);
+	doipoput(c, f, np, 0, c->ttl, c->tos);
+}
--- /dev/null
+++ b/os/ip.original/tcp.c
@@ -1,0 +1,3194 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+
+enum
+{
+	QMAX		= 64*1024-1,
+	IP_TCPPROTO	= 6,
+
+	TCP4_IPLEN	= 8,
+	TCP4_PHDRSIZE	= 12,
+	TCP4_HDRSIZE	= 20,
+	TCP4_TCBPHDRSZ	= 40,
+	TCP4_PKT	= TCP4_IPLEN+TCP4_PHDRSIZE,
+
+	TCP6_IPLEN	= 0,
+	TCP6_PHDRSIZE	= 40,
+	TCP6_HDRSIZE	= 20,
+	TCP6_TCBPHDRSZ	= 60,
+	TCP6_PKT	= TCP6_IPLEN+TCP6_PHDRSIZE,
+
+	TcptimerOFF	= 0,
+	TcptimerON	= 1,
+	TcptimerDONE	= 2,
+	MAX_TIME 	= (1<<20),	/* Forever */
+	TCP_ACK		= 50,		/* Timed ack sequence in ms */
+	MAXBACKMS	= 9*60*1000,	/* longest backoff time (ms) before hangup */
+
+	URG		= 0x20,		/* Data marked urgent */
+	ACK		= 0x10,		/* Acknowledge is valid */
+	PSH		= 0x08,		/* Whole data pipe is pushed */
+	RST		= 0x04,		/* Reset connection */
+	SYN		= 0x02,		/* Pkt. is synchronise */
+	FIN		= 0x01,		/* Start close down */
+
+	EOLOPT		= 0,
+	NOOPOPT		= 1,
+	MSSOPT		= 2,
+	MSS_LENGTH	= 4,		/* Mean segment size */
+	WSOPT		= 3,
+	WS_LENGTH	= 3,		/* Bits to scale window size by */
+	MSL2		= 10,
+	MSPTICK		= 50,		/* Milliseconds per timer tick */
+	DEF_MSS		= 1460,		/* Default mean segment */
+	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
+	DEF_RTT		= 500,		/* Default round trip */
+	DEF_KAT		= 120000,	/* Default time (ms) between keep alives */
+	TCP_LISTEN	= 0,		/* Listen connection */
+	TCP_CONNECT	= 1,		/* Outgoing connection */
+	SYNACK_RXTIMER	= 250,		/* ms between SYNACK retransmits */
+
+	TCPREXMTTHRESH	= 3,		/* dupack threshhold for rxt */
+
+	FORCE		= 1,
+	CLONE		= 2,
+	RETRAN		= 4,
+	ACTIVE		= 8,
+	SYNACK		= 16,
+
+	LOGAGAIN	= 3,
+	LOGDGAIN	= 2,
+
+	Closed		= 0,		/* Connection states */
+	Listen,
+	Syn_sent,
+	Syn_received,
+	Established,
+	Finwait1,
+	Finwait2,
+	Close_wait,
+	Closing,
+	Last_ack,
+	Time_wait,
+
+	Maxlimbo	= 1000,		/* maximum procs waiting for response to SYN ACK */
+	NLHT		= 256,		/* hash table size, must be a power of 2 */
+	LHTMASK		= NLHT-1,
+
+	HaveWS		= 1<<8,
+};
+
+/* Must correspond to the enumeration above */
+char *tcpstates[] =
+{
+	"Closed", 	"Listen", 	"Syn_sent", "Syn_received",
+	"Established", 	"Finwait1",	"Finwait2", "Close_wait",
+	"Closing", 	"Last_ack", 	"Time_wait"
+};
+
+typedef struct Tcptimer Tcptimer;
+struct Tcptimer
+{
+	Tcptimer	*next;
+	Tcptimer	*prev;
+	Tcptimer	*readynext;
+	int	state;
+	int	start;
+	int	count;
+	void	(*func)(void*);
+	void	*arg;
+};
+
+/*
+ *  v4 and v6 pseudo headers used for
+ *  checksuming tcp
+ */
+typedef struct Tcp4hdr Tcp4hdr;
+struct Tcp4hdr
+{
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;
+	uchar	proto;
+	uchar	tcplen[2];
+	uchar	tcpsrc[4];
+	uchar	tcpdst[4];
+	uchar	tcpsport[2];
+	uchar	tcpdport[2];
+	uchar	tcpseq[4];
+	uchar	tcpack[4];
+	uchar	tcpflag[2];
+	uchar	tcpwin[2];
+	uchar	tcpcksum[2];
+	uchar	tcpurg[2];
+	/* Options segment */
+	uchar	tcpopt[1];
+};
+
+typedef struct Tcp6hdr Tcp6hdr;
+struct Tcp6hdr
+{
+	uchar	vcf[4];
+	uchar	ploadlen[2];
+	uchar	proto;
+	uchar	ttl;
+	uchar	tcpsrc[IPaddrlen];
+	uchar	tcpdst[IPaddrlen];
+	uchar	tcpsport[2];
+	uchar	tcpdport[2];
+	uchar	tcpseq[4];
+	uchar	tcpack[4];
+	uchar	tcpflag[2];
+	uchar	tcpwin[2];
+	uchar	tcpcksum[2];
+	uchar	tcpurg[2];
+	/* Options segment */
+	uchar	tcpopt[1];
+};
+
+/*
+ *  this represents the control info
+ *  for a single packet.  It is derived from
+ *  a packet in ntohtcp{4,6}() and stuck into
+ *  a packet in htontcp{4,6}().
+ */
+typedef struct Tcp Tcp;
+struct	Tcp
+{
+	ushort	source;
+	ushort	dest;
+	ulong	seq;
+	ulong	ack;
+	uchar	flags;
+	ushort	ws;	/* window scale option (if not zero) */
+	ulong	wnd;
+	ushort	urg;
+	ushort	mss;	/* max segment size option (if not zero) */
+	ushort	len;	/* size of data */
+};
+
+/*
+ *  this header is malloc'd to thread together fragments
+ *  waiting to be coalesced
+ */
+typedef struct Reseq Reseq;
+struct Reseq
+{
+	Reseq	*next;
+	Tcp	seg;
+	Block	*bp;
+	ushort	length;
+};
+
+/*
+ *  the qlock in the Conv locks this structure
+ */
+typedef struct Tcpctl Tcpctl;
+struct Tcpctl
+{
+	uchar	state;			/* Connection state */
+	uchar	type;			/* Listening or active connection */
+	uchar	code;			/* Icmp code */
+	struct {
+		ulong	una;		/* Unacked data pointer */
+		ulong	nxt;		/* Next sequence expected */
+		ulong	ptr;		/* Data pointer */
+		ulong	wnd;		/* Tcp send window */
+		ulong	urg;		/* Urgent data pointer */
+		ulong	wl2;
+		int	scale;		/* how much to right shift window in xmitted packets */
+		/* to implement tahoe and reno TCP */
+		ulong	dupacks;	/* number of duplicate acks rcvd */
+		int	recovery;	/* loss recovery flag */
+		ulong	rxt;		/* right window marker for recovery */
+	} snd;
+	struct {
+		ulong	nxt;		/* Receive pointer to next uchar slot */
+		ulong	wnd;		/* Receive window incoming */
+		ulong	urg;		/* Urgent pointer */
+		int	blocked;
+		int	una;		/* unacked data segs */
+		int	scale;		/* how much to left shift window in rcved packets */
+	} rcv;
+	ulong	iss;			/* Initial sequence number */
+	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
+	ulong	cwind;			/* Congestion window */
+	int	scale;			/* desired snd.scale */
+	ushort	ssthresh;		/* Slow start threshold */
+	int	resent;			/* Bytes just resent */
+	int	irs;			/* Initial received squence */
+	ushort	mss;			/* Mean segment size */
+	int	rerecv;			/* Overlap of data rerecevived */
+	ulong	window;			/* Recevive window */
+	uchar	backoff;		/* Exponential backoff counter */
+	int	backedoff;		/* ms we've backed off for rexmits */
+	uchar	flags;			/* State flags */
+	Reseq	*reseq;			/* Resequencing queue */
+	Tcptimer	timer;			/* Activity timer */
+	Tcptimer	acktimer;		/* Acknowledge timer */
+	Tcptimer	rtt_timer;		/* Round trip timer */
+	Tcptimer	katimer;		/* keep alive timer */
+	ulong	rttseq;			/* Round trip sequence */
+	int	srtt;			/* Shortened round trip */
+	int	mdev;			/* Mean deviation of round trip */
+	int	kacounter;		/* count down for keep alive */
+	uint	sndsyntime;		/* time syn sent */
+	ulong	time;			/* time Finwait2 or Syn_received was sent */
+	int	nochecksum;		/* non-zero means don't send checksums */
+	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
+
+	union {
+		Tcp4hdr	tcp4hdr;
+		Tcp6hdr	tcp6hdr;
+	} protohdr;		/* prototype header */
+};
+
+/*
+ *  New calls are put in limbo rather than having a conversation structure
+ *  allocated.  Thus, a SYN attack results in lots of limbo'd calls but not
+ *  any real Conv structures mucking things up.  Calls in limbo rexmit their
+ *  SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
+ *
+ *  In particular they aren't on a listener's queue so that they don't figure
+ *  in the input queue limit.
+ *
+ *  If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
+ *  of 70000 limbo'd calls.  Not great for a linear list but doable.  Therefore
+ *  there is no hashing of this list.
+ */
+typedef struct Limbo Limbo;
+struct Limbo
+{
+	Limbo	*next;
+
+	uchar	laddr[IPaddrlen];
+	uchar	raddr[IPaddrlen];
+	ushort	lport;
+	ushort	rport;
+	ulong	irs;		/* initial received sequence */
+	ulong	iss;		/* initial sent sequence */
+	ushort	mss;		/* mss from the other end */
+	ushort	rcvscale;	/* how much to scale rcvd windows */
+	ushort	sndscale;	/* how much to scale sent windows */
+	ulong	lastsend;	/* last time we sent a synack */
+	uchar	version;	/* v4 or v6 */
+	uchar	rexmits;	/* number of retransmissions */
+};
+
+int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
+ushort	tcp_mss = DEF_MSS;	/* Maximum segment size to be sent */
+
+enum {
+	/* MIB stats */
+	MaxConn,
+	ActiveOpens,
+	PassiveOpens,
+	EstabResets,
+	CurrEstab,
+	InSegs,
+	OutSegs,
+	RetransSegs,
+	RetransTimeouts,
+	InErrs,
+	OutRsts,
+
+	/* non-MIB stats */
+	CsumErrs,
+	HlenErrs,
+	LenErrs,
+	OutOfOrder,
+
+	Nstats
+};
+
+static char *statnames[] =
+{
+[MaxConn]	"MaxConn",
+[ActiveOpens]	"ActiveOpens",
+[PassiveOpens]	"PassiveOpens",
+[EstabResets]	"EstabResets",
+[CurrEstab]	"CurrEstab",
+[InSegs]	"InSegs",
+[OutSegs]	"OutSegs",
+[RetransSegs]	"RetransSegs",
+[RetransTimeouts]	"RetransTimeouts",
+[InErrs]	"InErrs",
+[OutRsts]	"OutRsts",
+[CsumErrs]	"CsumErrs",
+[HlenErrs]	"HlenErrs",
+[LenErrs]	"LenErrs",
+[OutOfOrder]	"OutOfOrder",
+};
+
+typedef struct Tcppriv Tcppriv;
+struct Tcppriv
+{
+	/* List of active timers */
+	QLock 	tl;
+	Tcptimer *timers;
+
+	/* hash table for matching conversations */
+	Ipht	ht;
+
+	/* calls in limbo waiting for an ACK to our SYN ACK */
+	int	nlimbo;
+	Limbo	*lht[NLHT];
+
+	/* for keeping track of tcpackproc */
+	QLock	apl;
+	int	ackprocstarted;
+
+	ulong	stats[Nstats];
+};
+
+/*
+ *  Setting tcpporthogdefense to non-zero enables Dong Lin's
+ *  solution to hijacked systems staking out port's as a form
+ *  of DoS attack.
+ *
+ *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
+ *  it that number gets acked by the other end, we shut down the connection.
+ *  Look for tcpporthogedefense in the code.
+ */
+int tcpporthogdefense = 0;
+
+int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+void	localclose(Conv*, char*);
+void	procsyn(Conv*, Tcp*);
+void	tcpiput(Proto*, Ipifc*, Block*);
+void	tcpoutput(Conv*);
+int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
+void	tcpstart(Conv*, int);
+void	tcptimeout(void*);
+void	tcpsndsyn(Conv*, Tcpctl*);
+void	tcprcvwin(Conv*);
+void	tcpacktimer(void*);
+void	tcpkeepalive(void*);
+void	tcpsetkacounter(Tcpctl*);
+void	tcprxmit(Conv*);
+void	tcpsettimer(Tcpctl*);
+void	tcpsynackrtt(Conv*);
+void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+
+static void limborexmit(Proto*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+
+void
+tcpsetstate(Conv *s, uchar newstate)
+{
+	Tcpctl *tcb;
+	uchar oldstate;
+	Tcppriv *tpriv;
+
+	tpriv = s->p->priv;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	oldstate = tcb->state;
+	if(oldstate == newstate)
+		return;
+
+	if(oldstate == Established)
+		tpriv->stats[CurrEstab]--;
+	if(newstate == Established)
+		tpriv->stats[CurrEstab]++;
+
+	/**
+	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
+		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
+	**/
+
+	switch(newstate) {
+	case Closed:
+		qclose(s->rq);
+		qclose(s->wq);
+		qclose(s->eq);
+		break;
+
+	case Close_wait:		/* Remote closes */
+		qhangup(s->rq, nil);
+		break;
+	}
+
+	tcb->state = newstate;
+
+	if(oldstate == Syn_sent && newstate != Closed)
+		Fsconnected(s, nil);
+}
+
+static char*
+tcpconnect(Conv *c, char **argv, int argc)
+{
+	char *e;
+
+	e = Fsstdconnect(c, argv, argc);
+	if(e != nil)
+		return e;
+	tcpstart(c, TCP_CONNECT);
+
+	return nil;
+}
+
+static int
+tcpstate(Conv *c, char *state, int n)
+{
+	Tcpctl *s;
+
+	s = (Tcpctl*)(c->ptcl);
+
+	return snprint(state, n,
+		"%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+		tcpstates[s->state],
+		c->rq ? qlen(c->rq) : 0,
+		c->wq ? qlen(c->wq) : 0,
+		s->srtt, s->mdev,
+		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+		s->timer.start, s->timer.count, s->rerecv,
+		s->katimer.start, s->katimer.count);
+}
+
+static int
+tcpinuse(Conv *c)
+{
+	Tcpctl *s;
+
+	s = (Tcpctl*)(c->ptcl);
+	return s->state != Closed;
+}
+
+static char*
+tcpannounce(Conv *c, char **argv, int argc)
+{
+	char *e;
+
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	tcpstart(c, TCP_LISTEN);
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+/*
+ *  tcpclose is always called with the q locked
+ */
+static void
+tcpclose(Conv *c)
+{
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)c->ptcl;
+
+	qhangup(c->rq, nil);
+	qhangup(c->wq, nil);
+	qhangup(c->eq, nil);
+	qflush(c->rq);
+
+	switch(tcb->state) {
+	case Listen:
+		/*
+		 *  reset any incoming calls to this listener
+		 */
+		Fsconnected(c, "Hangup");
+
+		localclose(c, nil);
+		break;
+	case Closed:
+	case Syn_sent:
+		localclose(c, nil);
+		break;
+	case Syn_received:
+	case Established:
+		tcb->flgcnt++;
+		tcb->snd.nxt++;
+		tcpsetstate(c, Finwait1);
+		tcpoutput(c);
+		break;
+	case Close_wait:
+		tcb->flgcnt++;
+		tcb->snd.nxt++;
+		tcpsetstate(c, Last_ack);
+		tcpoutput(c);
+		break;
+	}
+}
+
+void
+tcpkick(void *x)
+{
+	Conv *s = x;
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	if(waserror()){
+		qunlock(s);
+		nexterror();
+	}
+	qlock(s);
+
+	switch(tcb->state) {
+	case Syn_sent:
+	case Syn_received:
+	case Established:
+	case Close_wait:
+		/*
+		 * Push data
+		 */
+		tcprcvwin(s);
+		tcpoutput(s);
+		break;
+	default:
+		localclose(s, "Hangup");
+		break;
+	}
+
+	qunlock(s);
+	poperror();
+}
+
+void
+tcprcvwin(Conv *s)				/* Call with tcb locked */
+{
+	int w;
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+	w = tcb->window - qlen(s->rq);
+	if(w < 0)
+		w = 0;
+	tcb->rcv.wnd = w;
+	if(w == 0)
+		tcb->rcv.blocked = 1;
+}
+
+void
+tcpacktimer(void *v)
+{
+	Tcpctl *tcb;
+	Conv *s;
+
+	s = v;
+	tcb = (Tcpctl*)s->ptcl;
+
+	if(waserror()){
+		qunlock(s);
+		nexterror();
+	}
+	qlock(s);
+	if(tcb->state != Closed){
+		tcb->flags |= FORCE;
+		tcprcvwin(s);
+		tcpoutput(s);
+	}
+	qunlock(s);
+	poperror();
+}
+
+static void
+tcpcreate(Conv *c)
+{
+	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
+	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+}
+
+static void
+timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
+{
+	if(newstate != TcptimerON){
+		if(t->state == TcptimerON){
+			// unchain
+			if(priv->timers == t){
+				priv->timers = t->next;
+				if(t->prev != nil)
+					panic("timerstate1");
+			}
+			if(t->next)
+				t->next->prev = t->prev;
+			if(t->prev)
+				t->prev->next = t->next;
+			t->next = t->prev = nil;
+		}
+	} else {
+		if(t->state != TcptimerON){
+			// chain
+			if(t->prev != nil || t->next != nil)
+				panic("timerstate2");
+			t->prev = nil;
+			t->next = priv->timers;
+			if(t->next)
+				t->next->prev = t;
+			priv->timers = t;
+		}
+	}
+	t->state = newstate;
+}
+
+void
+tcpackproc(void *a)
+{
+	Tcptimer *t, *tp, *timeo;
+	Proto *tcp;
+	Tcppriv *priv;
+	int loop;
+
+	tcp = a;
+	priv = tcp->priv;
+
+	for(;;) {
+		tsleep(&up->sleep, return0, 0, MSPTICK);
+
+		qlock(&priv->tl);
+		timeo = nil;
+		loop = 0;
+		for(t = priv->timers; t != nil; t = tp) {
+			if(loop++ > 10000)
+				panic("tcpackproc1");
+			tp = t->next;
+ 			if(t->state == TcptimerON) {
+				t->count--;
+				if(t->count == 0) {
+					timerstate(priv, t, TcptimerDONE);
+					t->readynext = timeo;
+					timeo = t;
+				}
+			}
+		}
+		qunlock(&priv->tl);
+
+		loop = 0;
+		for(t = timeo; t != nil; t = t->readynext) {
+			if(loop++ > 10000)
+				panic("tcpackproc2");
+			if(t->state == TcptimerDONE && t->func != nil && !waserror()){
+				(*t->func)(t->arg);
+				poperror();
+			}
+		}
+
+		limborexmit(tcp);
+	}
+}
+
+void
+tcpgo(Tcppriv *priv, Tcptimer *t)
+{
+	if(t == nil || t->start == 0)
+		return;
+
+	qlock(&priv->tl);
+	t->count = t->start;
+	timerstate(priv, t, TcptimerON);
+	qunlock(&priv->tl);
+}
+
+void
+tcphalt(Tcppriv *priv, Tcptimer *t)
+{
+	if(t == nil)
+		return;
+
+	qlock(&priv->tl);
+	timerstate(priv, t, TcptimerOFF);
+	qunlock(&priv->tl);
+}
+
+int
+backoff(int n)
+{
+	return 1 << n;
+}
+
+void
+localclose(Conv *s, char *reason)	/* called with tcb locked */
+{
+	Tcpctl *tcb;
+	Reseq *rp,*rp1;
+	Tcppriv *tpriv;
+
+	tpriv = s->p->priv;
+	tcb = (Tcpctl*)s->ptcl;
+
+	iphtrem(&tpriv->ht, s);
+
+	tcphalt(tpriv, &tcb->timer);
+	tcphalt(tpriv, &tcb->rtt_timer);
+	tcphalt(tpriv, &tcb->acktimer);
+	tcphalt(tpriv, &tcb->katimer);
+
+	/* Flush reassembly queue; nothing more can arrive */
+	for(rp = tcb->reseq; rp != nil; rp = rp1) {
+		rp1 = rp->next;
+		freeblist(rp->bp);
+		free(rp);
+	}
+	tcb->reseq = nil;
+
+	if(tcb->state == Syn_sent)
+		Fsconnected(s, reason);
+	if(s->state == Announced)
+		wakeup(&s->listenr);
+
+	qhangup(s->rq, reason);
+	qhangup(s->wq, reason);
+
+	tcpsetstate(s, Closed);
+}
+
+/* mtu (- TCP + IP hdr len) of 1st hop */
+int
+tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+{
+	Ipifc *ifc;
+	int mtu;
+
+	ifc = findipifc(tcp->f, addr, 0);
+	switch(version){
+	default:
+	case V4:
+		mtu = DEF_MSS;
+		if(ifc != nil)
+			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
+		break;
+	case V6:
+		mtu = DEF_MSS6;
+		if(ifc != nil)
+			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
+		break;
+	}
+	if(ifc != nil){
+		if(ifc->mbps > 100)
+			*scale = HaveWS | 3;
+		else if(ifc->mbps > 10)
+			*scale = HaveWS | 1;
+		else
+			*scale = HaveWS | 0;
+	} else
+		*scale = HaveWS | 0;
+
+	return mtu;
+}
+
+void
+inittcpctl(Conv *s, int mode)
+{
+	Tcpctl *tcb;
+	Tcp4hdr* h4;
+	Tcp6hdr* h6;
+	int mss;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	memset(tcb, 0, sizeof(Tcpctl));
+
+	tcb->ssthresh = 65535;
+	tcb->srtt = tcp_irtt<<LOGAGAIN;
+	tcb->mdev = 0;
+
+	/* setup timers */
+	tcb->timer.start = tcp_irtt / MSPTICK;
+	tcb->timer.func = tcptimeout;
+	tcb->timer.arg = s;
+	tcb->rtt_timer.start = MAX_TIME;
+	tcb->acktimer.start = TCP_ACK / MSPTICK;
+	tcb->acktimer.func = tcpacktimer;
+	tcb->acktimer.arg = s;
+	tcb->katimer.start = DEF_KAT / MSPTICK;
+	tcb->katimer.func = tcpkeepalive;
+	tcb->katimer.arg = s;
+
+	mss = DEF_MSS;
+
+	/* create a prototype(pseudo) header */
+	if(mode != TCP_LISTEN){
+		if(ipcmp(s->laddr, IPnoaddr) == 0)
+			findlocalip(s->p->f, s->laddr, s->raddr);
+
+		switch(s->ipversion){
+		case V4:
+			h4 = &tcb->protohdr.tcp4hdr;
+			memset(h4, 0, sizeof(*h4));
+			h4->proto = IP_TCPPROTO;
+			hnputs(h4->tcpsport, s->lport);
+			hnputs(h4->tcpdport, s->rport);
+			v6tov4(h4->tcpsrc, s->laddr);
+			v6tov4(h4->tcpdst, s->raddr);
+			break;
+		case V6:
+			h6 = &tcb->protohdr.tcp6hdr;
+			memset(h6, 0, sizeof(*h6));
+			h6->proto = IP_TCPPROTO;
+			hnputs(h6->tcpsport, s->lport);
+			hnputs(h6->tcpdport, s->rport);
+			ipmove(h6->tcpsrc, s->laddr);
+			ipmove(h6->tcpdst, s->raddr);
+			mss = DEF_MSS6;
+			break;
+		default:
+			panic("inittcpctl: version %d", s->ipversion);
+		}
+	}
+
+	tcb->mss = tcb->cwind = mss;
+
+	/* default is no window scaling */
+	tcb->window = QMAX;
+	tcb->rcv.wnd = QMAX;
+	tcb->rcv.scale = 0;
+	tcb->snd.scale = 0;
+	qsetlimit(s->rq, QMAX);
+}
+
+/*
+ *  called with s qlocked
+ */
+void
+tcpstart(Conv *s, int mode)
+{
+	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	char kpname[KNAMELEN];
+
+	tpriv = s->p->priv;
+
+	if(tpriv->ackprocstarted == 0){
+		qlock(&tpriv->apl);
+		if(tpriv->ackprocstarted == 0){
+			sprint(kpname, "#I%dtcpack", s->p->f->dev);
+			kproc(kpname, tcpackproc, s->p, 0);
+			tpriv->ackprocstarted = 1;
+		}
+		qunlock(&tpriv->apl);
+	}
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	inittcpctl(s, mode);
+
+	iphtadd(&tpriv->ht, s);
+	switch(mode) {
+	case TCP_LISTEN:
+		tpriv->stats[PassiveOpens]++;
+		tcb->flags |= CLONE;
+		tcpsetstate(s, Listen);
+		break;
+
+	case TCP_CONNECT:
+		tpriv->stats[ActiveOpens]++;
+		tcb->flags |= ACTIVE;
+		tcpsndsyn(s, tcb);
+		tcpsetstate(s, Syn_sent);
+		tcpoutput(s);
+		break;
+	}
+}
+
+static char*
+tcpflag(ushort flag)
+{
+	static char buf[128];
+
+	sprint(buf, "%d", flag>>10);	/* Head len */
+	if(flag & URG)
+		strcat(buf, " URG");
+	if(flag & ACK)
+		strcat(buf, " ACK");
+	if(flag & PSH)
+		strcat(buf, " PSH");
+	if(flag & RST)
+		strcat(buf, " RST");
+	if(flag & SYN)
+		strcat(buf, " SYN");
+	if(flag & FIN)
+		strcat(buf, " FIN");
+
+	return buf;
+}
+
+Block *
+htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
+{
+	int dlen;
+	Tcp6hdr *h;
+	ushort csum;
+	ushort hdrlen, optpad = 0;
+	uchar *opt;
+
+	hdrlen = TCP6_HDRSIZE;
+	if(tcph->flags & SYN){
+		if(tcph->mss)
+			hdrlen += MSS_LENGTH;
+		if(tcph->ws)
+			hdrlen += WS_LENGTH;
+		optpad = hdrlen & 3;
+		if(optpad)
+			optpad = 4 - optpad;
+		hdrlen += optpad;
+	}
+
+	if(data) {
+		dlen = blocklen(data);
+		data = padblock(data, hdrlen + TCP6_PKT);
+		if(data == nil)
+			return nil;
+	}
+	else {
+		dlen = 0;
+		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
+		if(data == nil)
+			return nil;
+		data->wp += hdrlen + TCP6_PKT;
+	}
+
+	/* copy in pseudo ip header plus port numbers */
+	h = (Tcp6hdr *)(data->rp);
+	memmove(h, ph, TCP6_TCBPHDRSZ);
+
+	/* compose pseudo tcp header, do cksum calculation */
+	hnputl(h->vcf, hdrlen + dlen);
+	h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
+	h->ttl = ph->proto;
+
+	/* copy in variable bits */
+	hnputl(h->tcpseq, tcph->seq);
+	hnputl(h->tcpack, tcph->ack);
+	hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+	hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+	hnputs(h->tcpurg, tcph->urg);
+
+	if(tcph->flags & SYN){
+		opt = h->tcpopt;
+		if(tcph->mss != 0){
+			*opt++ = MSSOPT;
+			*opt++ = MSS_LENGTH;
+			hnputs(opt, tcph->mss);
+			opt += 2;
+		}
+		if(tcph->ws != 0){
+			*opt++ = WSOPT;
+			*opt++ = WS_LENGTH;
+			*opt++ = tcph->ws;
+		}
+		while(optpad-- > 0)
+			*opt++ = NOOPOPT;
+	}
+
+	if(tcb != nil && tcb->nochecksum){
+		h->tcpcksum[0] = h->tcpcksum[1] = 0;
+	} else {
+		csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
+		hnputs(h->tcpcksum, csum);
+	}
+
+	/* move from pseudo header back to normal ip header */
+	memset(h->vcf, 0, 4);
+	h->vcf[0] = IP_VER6;
+	hnputs(h->ploadlen, hdrlen+dlen);
+	h->proto = ph->proto;
+
+	return data;
+}
+
+Block *
+htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
+{
+	int dlen;
+	Tcp4hdr *h;
+	ushort csum;
+	ushort hdrlen, optpad = 0;
+	uchar *opt;
+
+	hdrlen = TCP4_HDRSIZE;
+	if(tcph->flags & SYN){
+		if(tcph->mss)
+			hdrlen += MSS_LENGTH;
+		if(tcph->ws)
+			hdrlen += WS_LENGTH;
+		optpad = hdrlen & 3;
+		if(optpad)
+			optpad = 4 - optpad;
+		hdrlen += optpad;
+	}
+
+	if(data) {
+		dlen = blocklen(data);
+		data = padblock(data, hdrlen + TCP4_PKT);
+		if(data == nil)
+			return nil;
+	}
+	else {
+		dlen = 0;
+		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
+		if(data == nil)
+			return nil;
+		data->wp += hdrlen + TCP4_PKT;
+	}
+
+	/* copy in pseudo ip header plus port numbers */
+	h = (Tcp4hdr *)(data->rp);
+	memmove(h, ph, TCP4_TCBPHDRSZ);
+
+	/* copy in variable bits */
+	hnputs(h->tcplen, hdrlen + dlen);
+	hnputl(h->tcpseq, tcph->seq);
+	hnputl(h->tcpack, tcph->ack);
+	hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+	hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+	hnputs(h->tcpurg, tcph->urg);
+
+	if(tcph->flags & SYN){
+		opt = h->tcpopt;
+		if(tcph->mss != 0){
+			*opt++ = MSSOPT;
+			*opt++ = MSS_LENGTH;
+			hnputs(opt, tcph->mss);
+			opt += 2;
+		}
+		if(tcph->ws != 0){
+			*opt++ = WSOPT;
+			*opt++ = WS_LENGTH;
+			*opt++ = tcph->ws;
+		}
+		while(optpad-- > 0)
+			*opt++ = NOOPOPT;
+	}
+
+	if(tcb != nil && tcb->nochecksum){
+		h->tcpcksum[0] = h->tcpcksum[1] = 0;
+	} else {
+		csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
+		hnputs(h->tcpcksum, csum);
+	}
+
+	return data;
+}
+
+int
+ntohtcp6(Tcp *tcph, Block **bpp)
+{
+	Tcp6hdr *h;
+	uchar *optr;
+	ushort hdrlen;
+	ushort optlen;
+	int n;
+
+	*bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
+	if(*bpp == nil)
+		return -1;
+
+	h = (Tcp6hdr *)((*bpp)->rp);
+	tcph->source = nhgets(h->tcpsport);
+	tcph->dest = nhgets(h->tcpdport);
+	tcph->seq = nhgetl(h->tcpseq);
+	tcph->ack = nhgetl(h->tcpack);
+	hdrlen = (h->tcpflag[0]>>2) & ~3;
+	if(hdrlen < TCP6_HDRSIZE) {
+		freeblist(*bpp);
+		return -1;
+	}
+
+	tcph->flags = h->tcpflag[1];
+	tcph->wnd = nhgets(h->tcpwin);
+	tcph->urg = nhgets(h->tcpurg);
+	tcph->mss = 0;
+	tcph->ws = 0;
+	tcph->len = nhgets(h->ploadlen) - hdrlen;
+
+	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
+	if(*bpp == nil)
+		return -1;
+
+	optr = h->tcpopt;
+	n = hdrlen - TCP6_HDRSIZE;
+	while(n > 0 && *optr != EOLOPT) {
+		if(*optr == NOOPOPT) {
+			n--;
+			optr++;
+			continue;
+		}
+		optlen = optr[1];
+		if(optlen < 2 || optlen > n)
+			break;
+		switch(*optr) {
+		case MSSOPT:
+			if(optlen == MSS_LENGTH)
+				tcph->mss = nhgets(optr+2);
+			break;
+		case WSOPT:
+			if(optlen == WS_LENGTH && *(optr+2) <= 14)
+				tcph->ws = HaveWS | *(optr+2);
+			break;
+		}
+		n -= optlen;
+		optr += optlen;
+	}
+	return hdrlen;
+}
+
+int
+ntohtcp4(Tcp *tcph, Block **bpp)
+{
+	Tcp4hdr *h;
+	uchar *optr;
+	ushort hdrlen;
+	ushort optlen;
+	int n;
+
+	*bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
+	if(*bpp == nil)
+		return -1;
+
+	h = (Tcp4hdr *)((*bpp)->rp);
+	tcph->source = nhgets(h->tcpsport);
+	tcph->dest = nhgets(h->tcpdport);
+	tcph->seq = nhgetl(h->tcpseq);
+	tcph->ack = nhgetl(h->tcpack);
+
+	hdrlen = (h->tcpflag[0]>>2) & ~3;
+	if(hdrlen < TCP4_HDRSIZE) {
+		freeblist(*bpp);
+		return -1;
+	}
+
+	tcph->flags = h->tcpflag[1];
+	tcph->wnd = nhgets(h->tcpwin);
+	tcph->urg = nhgets(h->tcpurg);
+	tcph->mss = 0;
+	tcph->ws = 0;
+	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
+
+	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
+	if(*bpp == nil)
+		return -1;
+
+	optr = h->tcpopt;
+	n = hdrlen - TCP4_HDRSIZE;
+	while(n > 0 && *optr != EOLOPT) {
+		if(*optr == NOOPOPT) {
+			n--;
+			optr++;
+			continue;
+		}
+		optlen = optr[1];
+		if(optlen < 2 || optlen > n)
+			break;
+		switch(*optr) {
+		case MSSOPT:
+			if(optlen == MSS_LENGTH)
+				tcph->mss = nhgets(optr+2);
+			break;
+		case WSOPT:
+			if(optlen == WS_LENGTH && *(optr+2) <= 14)
+				tcph->ws = HaveWS | *(optr+2);
+			break;
+		}
+		n -= optlen;
+		optr += optlen;
+	}
+	return hdrlen;
+}
+
+/*
+ *  For outgiing calls, generate an initial sequence
+ *  number and put a SYN on the send queue
+ */
+void
+tcpsndsyn(Conv *s, Tcpctl *tcb)
+{
+	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+	tcb->rttseq = tcb->iss;
+	tcb->snd.wl2 = tcb->iss;
+	tcb->snd.una = tcb->iss;
+	tcb->snd.ptr = tcb->rttseq;
+	tcb->snd.nxt = tcb->rttseq;
+	tcb->flgcnt++;
+	tcb->flags |= FORCE;
+	tcb->sndsyntime = NOW;
+
+	/* set desired mss and scale */
+	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+}
+
+void
+sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
+{
+	Block *hbp;
+	uchar rflags;
+	Tcppriv *tpriv;
+	Tcp4hdr ph4;
+	Tcp6hdr ph6;
+
+	netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+
+	tpriv = tcp->priv;
+
+	if(seg->flags & RST)
+		return;
+
+	/* make pseudo header */
+	switch(version) {
+	case V4:
+		memset(&ph4, 0, sizeof(ph4));
+		ph4.vihl = IP_VER4;
+		v6tov4(ph4.tcpsrc, dest);
+		v6tov4(ph4.tcpdst, source);
+		ph4.proto = IP_TCPPROTO;
+		hnputs(ph4.tcplen, TCP4_HDRSIZE);
+		hnputs(ph4.tcpsport, seg->dest);
+		hnputs(ph4.tcpdport, seg->source);
+		break;
+	case V6:
+		memset(&ph6, 0, sizeof(ph6));
+		ph6.vcf[0] = IP_VER6;
+		ipmove(ph6.tcpsrc, dest);
+		ipmove(ph6.tcpdst, source);
+		ph6.proto = IP_TCPPROTO;
+		hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+		hnputs(ph6.tcpsport, seg->dest);
+		hnputs(ph6.tcpdport, seg->source);
+		break;
+	default:
+		panic("sndrst: version %d", version);
+	}
+
+	tpriv->stats[OutRsts]++;
+	rflags = RST;
+
+	/* convince the other end that this reset is in band */
+	if(seg->flags & ACK) {
+		seg->seq = seg->ack;
+		seg->ack = 0;
+	}
+	else {
+		rflags |= ACK;
+		seg->ack = seg->seq;
+		seg->seq = 0;
+		if(seg->flags & SYN)
+			seg->ack++;
+		seg->ack += length;
+		if(seg->flags & FIN)
+			seg->ack++;
+	}
+	seg->flags = rflags;
+	seg->wnd = 0;
+	seg->urg = 0;
+	seg->mss = 0;
+	seg->ws = 0;
+	switch(version) {
+	case V4:
+		hbp = htontcp4(seg, nil, &ph4, nil);
+		if(hbp == nil)
+			return;
+		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	case V6:
+		hbp = htontcp6(seg, nil, &ph6, nil);
+		if(hbp == nil)
+			return;
+		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	default:
+		panic("sndrst2: version %d", version);
+	}
+}
+
+/*
+ *  send a reset to the remote side and close the conversation
+ *  called with s qlocked
+ */
+char*
+tcphangup(Conv *s)
+{
+	Tcp seg;
+	Tcpctl *tcb;
+	Block *hbp;
+
+	tcb = (Tcpctl*)s->ptcl;
+	if(waserror())
+		return commonerror();
+	if(ipcmp(s->raddr, IPnoaddr) != 0) {
+		if(!waserror()){
+			memset(&seg, 0, sizeof seg);
+			seg.flags = RST | ACK;
+			seg.ack = tcb->rcv.nxt;
+			tcb->rcv.una = 0;
+			seg.seq = tcb->snd.ptr;
+			seg.wnd = 0;
+			seg.urg = 0;
+			seg.mss = 0;
+			seg.ws = 0;
+			switch(s->ipversion) {
+			case V4:
+				tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+				hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
+				ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+				break;
+			case V6:
+				tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+				hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
+				ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+				break;
+			default:
+				panic("tcphangup: version %d", s->ipversion);
+			}
+			poperror();
+		}
+	}
+	localclose(s, nil);
+	poperror();
+	return nil;
+}
+
+/*
+ *  (re)send a SYN ACK
+ */
+int
+sndsynack(Proto *tcp, Limbo *lp)
+{
+	Block *hbp;
+	Tcp4hdr ph4;
+	Tcp6hdr ph6;
+	Tcp seg;
+	int scale;
+
+	/* make pseudo header */
+	switch(lp->version) {
+	case V4:
+		memset(&ph4, 0, sizeof(ph4));
+		ph4.vihl = IP_VER4;
+		v6tov4(ph4.tcpsrc, lp->laddr);
+		v6tov4(ph4.tcpdst, lp->raddr);
+		ph4.proto = IP_TCPPROTO;
+		hnputs(ph4.tcplen, TCP4_HDRSIZE);
+		hnputs(ph4.tcpsport, lp->lport);
+		hnputs(ph4.tcpdport, lp->rport);
+		break;
+	case V6:
+		memset(&ph6, 0, sizeof(ph6));
+		ph6.vcf[0] = IP_VER6;
+		ipmove(ph6.tcpsrc, lp->laddr);
+		ipmove(ph6.tcpdst, lp->raddr);
+		ph6.proto = IP_TCPPROTO;
+		hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+		hnputs(ph6.tcpsport, lp->lport);
+		hnputs(ph6.tcpdport, lp->rport);
+		break;
+	default:
+		panic("sndrst: version %d", lp->version);
+	}
+
+	seg.seq = lp->iss;
+	seg.ack = lp->irs+1;
+	seg.flags = SYN|ACK;
+	seg.urg = 0;
+	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+	seg.wnd = QMAX;
+
+	/* if the other side set scale, we should too */
+	if(lp->rcvscale){
+		seg.ws = scale;
+		lp->sndscale = scale;
+	} else {
+		seg.ws = 0;
+		lp->sndscale = 0;
+	}
+
+	switch(lp->version) {
+	case V4:
+		hbp = htontcp4(&seg, nil, &ph4, nil);
+		if(hbp == nil)
+			return -1;
+		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	case V6:
+		hbp = htontcp6(&seg, nil, &ph6, nil);
+		if(hbp == nil)
+			return -1;
+		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	default:
+		panic("sndsnack: version %d", lp->version);
+	}
+	lp->lastsend = NOW;
+	return 0;
+}
+
+#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
+
+/*
+ *  put a call into limbo and respond with a SYN ACK
+ *
+ *  called with proto locked
+ */
+static void
+limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
+{
+	Limbo *lp, **l;
+	Tcppriv *tpriv;
+	int h;
+
+	tpriv = s->p->priv;
+	h = hashipa(source, seg->source);
+
+	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+		lp = *l;
+		if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
+			continue;
+		if(ipcmp(lp->raddr, source) != 0)
+			continue;
+		if(ipcmp(lp->laddr, dest) != 0)
+			continue;
+
+		/* each new SYN restarts the retransmits */
+		lp->irs = seg->seq;
+		break;
+	}
+	lp = *l;
+	if(lp == nil){
+		if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
+			lp = tpriv->lht[h];
+			tpriv->lht[h] = lp->next;
+			lp->next = nil;
+		} else {
+			lp = malloc(sizeof(*lp));
+			if(lp == nil)
+				return;
+			tpriv->nlimbo++;
+		}
+		*l = lp;
+		lp->version = version;
+		ipmove(lp->laddr, dest);
+		ipmove(lp->raddr, source);
+		lp->lport = seg->dest;
+		lp->rport = seg->source;
+		lp->mss = seg->mss;
+		lp->rcvscale = seg->ws;
+		lp->irs = seg->seq;
+		lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+	}
+
+	if(sndsynack(s->p, lp) < 0){
+		*l = lp->next;
+		tpriv->nlimbo--;
+		free(lp);
+	}
+}
+
+/*
+ *  resend SYN ACK's once every SYNACK_RXTIMER ms.
+ */
+static void
+limborexmit(Proto *tcp)
+{
+	Tcppriv *tpriv;
+	Limbo **l, *lp;
+	int h;
+	int seen;
+	ulong now;
+
+	tpriv = tcp->priv;
+
+	if(!canqlock(tcp))
+		return;
+	seen = 0;
+	now = NOW;
+	for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
+		for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
+			lp = *l;
+			seen++;
+			if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
+				continue;
+
+			/* time it out after 1 second */
+			if(++(lp->rexmits) > 5){
+				tpriv->nlimbo--;
+				*l = lp->next;
+				free(lp);
+				continue;
+			}
+
+			/* if we're being attacked, don't bother resending SYN ACK's */
+			if(tpriv->nlimbo > 100)
+				continue;
+
+			if(sndsynack(tcp, lp) < 0){
+				tpriv->nlimbo--;
+				*l = lp->next;
+				free(lp);
+				continue;
+			}
+
+			l = &lp->next;
+		}
+	}
+	qunlock(tcp);
+}
+
+/*
+ *  lookup call in limbo.  if found, throw it out.
+ *
+ *  called with proto locked
+ */
+static void
+limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+	Limbo *lp, **l;
+	int h;
+	Tcppriv *tpriv;
+
+	tpriv = s->p->priv;
+
+	/* find a call in limbo */
+	h = hashipa(src, segp->source);
+	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+		lp = *l;
+		if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+			continue;
+		if(ipcmp(lp->laddr, dst) != 0)
+			continue;
+		if(ipcmp(lp->raddr, src) != 0)
+			continue;
+
+		/* RST can only follow the SYN */
+		if(segp->seq == lp->irs+1){
+			tpriv->nlimbo--;
+			*l = lp->next;
+			free(lp);
+		}
+		break;
+	}
+}
+
+/*
+ *  come here when we finally get an ACK to our SYN-ACK.
+ *  lookup call in limbo.  if found, create a new conversation
+ *
+ *  called with proto locked
+ */
+static Conv*
+tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+	Conv *new;
+	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	Tcp4hdr *h4;
+	Tcp6hdr *h6;
+	Limbo *lp, **l;
+	int h;
+
+	/* unless it's just an ack, it can't be someone coming out of limbo */
+	if((segp->flags & SYN) || (segp->flags & ACK) == 0)
+		return nil;
+
+	tpriv = s->p->priv;
+
+	/* find a call in limbo */
+	h = hashipa(src, segp->source);
+	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
+		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+			src, segp->source, lp->raddr, lp->rport,
+			dst, segp->dest, lp->laddr, lp->lport,
+			version, lp->version
+ 		);
+
+		if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+			continue;
+		if(ipcmp(lp->laddr, dst) != 0)
+			continue;
+		if(ipcmp(lp->raddr, src) != 0)
+			continue;
+
+		/* we're assuming no data with the initial SYN */
+		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
+			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
+			lp = nil;
+		} else {
+			tpriv->nlimbo--;
+			*l = lp->next;
+		}
+		break;
+	}
+	if(lp == nil)
+		return nil;
+
+	new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
+	if(new == nil)
+		return nil;
+
+	memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
+	tcb = (Tcpctl*)new->ptcl;
+	tcb->flags &= ~CLONE;
+	tcb->timer.arg = new;
+	tcb->timer.state = TcptimerOFF;
+	tcb->acktimer.arg = new;
+	tcb->acktimer.state = TcptimerOFF;
+	tcb->katimer.arg = new;
+	tcb->katimer.state = TcptimerOFF;
+	tcb->rtt_timer.arg = new;
+	tcb->rtt_timer.state = TcptimerOFF;
+
+	tcb->irs = lp->irs;
+	tcb->rcv.nxt = tcb->irs+1;
+	tcb->rcv.urg = tcb->rcv.nxt;
+
+	tcb->iss = lp->iss;
+	tcb->rttseq = tcb->iss;
+	tcb->snd.wl2 = tcb->iss;
+	tcb->snd.una = tcb->iss+1;
+	tcb->snd.ptr = tcb->iss+1;
+	tcb->snd.nxt = tcb->iss+1;
+	tcb->flgcnt = 0;
+	tcb->flags |= SYNACK;
+
+	/* our sending max segment size cannot be bigger than what he asked for */
+	if(lp->mss != 0 && lp->mss < tcb->mss)
+		tcb->mss = lp->mss;
+
+	/* window scaling */
+	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
+
+	/* the congestion window always starts out as a single segment */
+	tcb->snd.wnd = segp->wnd;
+	tcb->cwind = tcb->mss;
+
+	/* set initial round trip time */
+	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
+	tcpsynackrtt(new);
+
+	free(lp);
+
+	/* set up proto header */
+	switch(version){
+	case V4:
+		h4 = &tcb->protohdr.tcp4hdr;
+		memset(h4, 0, sizeof(*h4));
+		h4->proto = IP_TCPPROTO;
+		hnputs(h4->tcpsport, new->lport);
+		hnputs(h4->tcpdport, new->rport);
+		v6tov4(h4->tcpsrc, dst);
+		v6tov4(h4->tcpdst, src);
+		break;
+	case V6:
+		h6 = &tcb->protohdr.tcp6hdr;
+		memset(h6, 0, sizeof(*h6));
+		h6->proto = IP_TCPPROTO;
+		hnputs(h6->tcpsport, new->lport);
+		hnputs(h6->tcpdport, new->rport);
+		ipmove(h6->tcpsrc, dst);
+		ipmove(h6->tcpdst, src);
+		break;
+	default:
+		panic("tcpincoming: version %d", new->ipversion);
+	}
+
+	tcpsetstate(new, Established);
+
+	iphtadd(&tpriv->ht, new);
+
+	return new;
+}
+
+int
+seq_within(ulong x, ulong low, ulong high)
+{
+	if(low <= high){
+		if(low <= x && x <= high)
+			return 1;
+	}
+	else {
+		if(x >= low || x <= high)
+			return 1;
+	}
+	return 0;
+}
+
+int
+seq_lt(ulong x, ulong y)
+{
+	return (int)(x-y) < 0;
+}
+
+int
+seq_le(ulong x, ulong y)
+{
+	return (int)(x-y) <= 0;
+}
+
+int
+seq_gt(ulong x, ulong y)
+{
+	return (int)(x-y) > 0;
+}
+
+int
+seq_ge(ulong x, ulong y)
+{
+	return (int)(x-y) >= 0;
+}
+
+/*
+ *  use the time between the first SYN and it's ack as the
+ *  initial round trip time
+ */
+void
+tcpsynackrtt(Conv *s)
+{
+	Tcpctl *tcb;
+	int delta;
+	Tcppriv *tpriv;
+
+	tcb = (Tcpctl*)s->ptcl;
+	tpriv = s->p->priv;
+
+	delta = NOW - tcb->sndsyntime;
+	tcb->srtt = delta<<LOGAGAIN;
+	tcb->mdev = delta<<LOGDGAIN;
+
+	/* halt round trip timer */
+	tcphalt(tpriv, &tcb->rtt_timer);
+}
+
+void
+update(Conv *s, Tcp *seg)
+{
+	int rtt, delta;
+	Tcpctl *tcb;
+	ulong acked;
+	ulong expand;
+	Tcppriv *tpriv;
+
+	tpriv = s->p->priv;
+	tcb = (Tcpctl*)s->ptcl;
+
+	/* if everything has been acked, force output(?) */
+	if(seq_gt(seg->ack, tcb->snd.nxt)) {
+		tcb->flags |= FORCE;
+		return;
+	}
+
+	/* added by Dong Lin for fast retransmission */
+	if(seg->ack == tcb->snd.una
+	&& tcb->snd.una != tcb->snd.nxt
+	&& seg->len == 0
+	&& seg->wnd == tcb->snd.wnd) {
+
+		/* this is a pure ack w/o window update */
+		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
+			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
+
+		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
+			/*
+			 *  tahoe tcp rxt the packet, half sshthresh,
+ 			 *  and set cwnd to one packet
+			 */
+			tcb->snd.recovery = 1;
+			tcb->snd.rxt = tcb->snd.nxt;
+			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+			tcprxmit(s);
+		} else {
+			/* do reno tcp here. */
+		}
+	}
+
+	/*
+	 *  update window
+	 */
+	if(seq_gt(seg->ack, tcb->snd.wl2)
+	|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+		tcb->snd.wnd = seg->wnd;
+		tcb->snd.wl2 = seg->ack;
+	}
+
+	if(!seq_gt(seg->ack, tcb->snd.una)){
+		/*
+		 *  don't let us hangup if sending into a closed window and
+		 *  we're still getting acks
+		 */
+		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+			tcb->backedoff = MAXBACKMS/4;
+		}
+		return;
+	}
+
+	/*
+	 *  any positive ack turns off fast rxt,
+	 *  (should we do new-reno on partial acks?)
+	 */
+	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
+		tcb->snd.dupacks = 0;
+		tcb->snd.recovery = 0;
+	} else
+		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
+
+	/* Compute the new send window size */
+	acked = seg->ack - tcb->snd.una;
+
+	/* avoid slow start and timers for SYN acks */
+	if((tcb->flags & SYNACK) == 0) {
+		tcb->flags |= SYNACK;
+		acked--;
+		tcb->flgcnt--;
+		goto done;
+	}
+
+	/* slow start as long as we're not recovering from lost packets */
+	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
+		if(tcb->cwind < tcb->ssthresh) {
+			expand = tcb->mss;
+			if(acked < expand)
+				expand = acked;
+		}
+		else
+			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+
+		if(tcb->cwind + expand < tcb->cwind)
+			expand = tcb->snd.wnd - tcb->cwind;
+		if(tcb->cwind + expand > tcb->snd.wnd)
+			expand = tcb->snd.wnd - tcb->cwind;
+		tcb->cwind += expand;
+	}
+
+	/* Adjust the timers according to the round trip time */
+	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
+		tcphalt(tpriv, &tcb->rtt_timer);
+		if((tcb->flags&RETRAN) == 0) {
+			tcb->backoff = 0;
+			tcb->backedoff = 0;
+			rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
+			if(rtt == 0)
+				rtt = 1;	/* otherwise all close systems will rexmit in 0 time */
+			rtt *= MSPTICK;
+			if(tcb->srtt == 0) {
+				tcb->srtt = rtt << LOGAGAIN;
+				tcb->mdev = rtt << LOGDGAIN;
+			} else {
+				delta = rtt - (tcb->srtt>>LOGAGAIN);
+				tcb->srtt += delta;
+				if(tcb->srtt <= 0)
+					tcb->srtt = 1;
+
+				delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
+				tcb->mdev += delta;
+				if(tcb->mdev <= 0)
+					tcb->mdev = 1;
+			}
+			tcpsettimer(tcb);
+		}
+	}
+
+done:
+	if(qdiscard(s->wq, acked) < acked)
+		tcb->flgcnt--;
+
+	tcb->snd.una = seg->ack;
+	if(seq_gt(seg->ack, tcb->snd.urg))
+		tcb->snd.urg = seg->ack;
+
+	if(tcb->snd.una != tcb->snd.nxt)
+		tcpgo(tpriv, &tcb->timer);
+	else
+		tcphalt(tpriv, &tcb->timer);
+
+	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
+		tcb->snd.ptr = tcb->snd.una;
+
+	tcb->flags &= ~RETRAN;
+	tcb->backoff = 0;
+	tcb->backedoff = 0;
+}
+
+void
+tcpiput(Proto *tcp, Ipifc*, Block *bp)
+{
+	Tcp seg;
+	Tcp4hdr *h4;
+	Tcp6hdr *h6;
+	int hdrlen;
+	Tcpctl *tcb;
+	ushort length;
+	uchar source[IPaddrlen], dest[IPaddrlen];
+	Conv *s;
+	Fs *f;
+	Tcppriv *tpriv;
+	uchar version;
+
+	f = tcp->f;
+	tpriv = tcp->priv;
+
+	tpriv->stats[InSegs]++;
+
+	h4 = (Tcp4hdr*)(bp->rp);
+	h6 = (Tcp6hdr*)(bp->rp);
+
+	if((h4->vihl&0xF0)==IP_VER4) {
+		version = V4;
+		length = nhgets(h4->length);
+		v4tov6(dest, h4->tcpdst);
+		v4tov6(source, h4->tcpsrc);
+
+		h4->Unused = 0;
+		hnputs(h4->tcplen, length-TCP4_PKT);
+		if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
+			ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
+			tpriv->stats[CsumErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "bad tcp proto cksum\n");
+			freeblist(bp);
+			return;
+		}
+
+		hdrlen = ntohtcp4(&seg, &bp);
+		if(hdrlen < 0){
+			tpriv->stats[HlenErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "bad tcp hdr len\n");
+			return;
+		}
+
+		/* trim the packet to the size claimed by the datagram */
+		length -= hdrlen+TCP4_PKT;
+		bp = trimblock(bp, hdrlen+TCP4_PKT, length);
+		if(bp == nil){
+			tpriv->stats[LenErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "tcp len < 0 after trim\n");
+			return;
+		}
+	}
+	else {
+		int ttl = h6->ttl;
+		int proto = h6->proto;
+
+		version = V6;
+		length = nhgets(h6->ploadlen);
+		ipmove(dest, h6->tcpdst);
+		ipmove(source, h6->tcpsrc);
+
+		h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
+		h6->ttl = proto;
+		hnputl(h6->vcf, length);
+		if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
+			ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+			tpriv->stats[CsumErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "bad tcp proto cksum\n");
+			freeblist(bp);
+			return;
+		}
+		h6->ttl = ttl;
+		h6->proto = proto;
+		hnputs(h6->ploadlen, length);
+
+		hdrlen = ntohtcp6(&seg, &bp);
+		if(hdrlen < 0){
+			tpriv->stats[HlenErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "bad tcp hdr len\n");
+			return;
+		}
+
+		/* trim the packet to the size claimed by the datagram */
+		length -= hdrlen;
+		bp = trimblock(bp, hdrlen+TCP6_PKT, length);
+		if(bp == nil){
+			tpriv->stats[LenErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "tcp len < 0 after trim\n");
+			return;
+		}
+	}
+
+	/* lock protocol while searching for a conversation */
+	qlock(tcp);
+
+	/* Look for a matching conversation */
+	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
+	if(s == nil){
+		netlog(f, Logtcp, "iphtlook failed");
+reset:
+		qunlock(tcp);
+		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
+		freeblist(bp);
+		return;
+	}
+
+	/* if it's a listener, look for the right flags and get a new conv */
+	tcb = (Tcpctl*)s->ptcl;
+	if(tcb->state == Listen){
+		if(seg.flags & RST){
+			limborst(s, &seg, source, dest, version);
+			qunlock(tcp);
+			freeblist(bp);
+			return;
+		}
+
+		/* if this is a new SYN, put the call into limbo */
+		if((seg.flags & SYN) && (seg.flags & ACK) == 0){
+			limbo(s, source, dest, &seg, version);
+			qunlock(tcp);
+			freeblist(bp);
+			return;
+		}
+
+		/*
+		 *  if there's a matching call in limbo, tcpincoming will
+		 *  return it in state Syn_received
+		 */
+		s = tcpincoming(s, &seg, source, dest, version);
+		if(s == nil)
+			goto reset;
+	}
+
+	/* The rest of the input state machine is run with the control block
+	 * locked and implements the state machine directly out of the RFC.
+	 * Out-of-band data is ignored - it was always a bad idea.
+	 */
+	tcb = (Tcpctl*)s->ptcl;
+	if(waserror()){
+		qunlock(s);
+		nexterror();
+	}
+	qlock(s);
+	qunlock(tcp);
+
+	/* fix up window */
+	seg.wnd <<= tcb->rcv.scale;
+
+	/* every input packet in puts off the keep alive time out */
+	tcpsetkacounter(tcb);
+
+	switch(tcb->state) {
+	case Closed:
+		sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+		goto raise;
+	case Syn_sent:
+		if(seg.flags & ACK) {
+			if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
+				sndrst(tcp, source, dest, length, &seg, version,
+					 "bad seq in Syn_sent");
+				goto raise;
+			}
+		}
+		if(seg.flags & RST) {
+			if(seg.flags & ACK)
+				localclose(s, Econrefused);
+			goto raise;
+		}
+
+		if(seg.flags & SYN) {
+			procsyn(s, &seg);
+			if(seg.flags & ACK){
+				update(s, &seg);
+				tcpsynackrtt(s);
+				tcpsetstate(s, Established);
+				tcpsetscale(s, tcb, seg.ws, tcb->scale);
+			}
+			else {
+				tcb->time = NOW;
+				tcpsetstate(s, Syn_received);	/* DLP - shouldn't this be a reset? */
+			}
+
+			if(length != 0 || (seg.flags & FIN))
+				break;
+
+			freeblist(bp);
+			goto output;
+		}
+		else
+			freeblist(bp);
+
+		qunlock(s);
+		poperror();
+		return;
+	case Syn_received:
+		/* doesn't matter if it's the correct ack, we're just trying to set timing */
+		if(seg.flags & ACK)
+			tcpsynackrtt(s);
+		break;
+	}
+
+	/*
+	 *  One DOS attack is to open connections to us and then forget about them,
+	 *  thereby tying up a conv at no long term cost to the attacker.
+	 *  This is an attempt to defeat these stateless DOS attacks.  See
+	 *  corresponding code in tcpsendka().
+	 */
+	if(tcb->state != Syn_received && (seg.flags & RST) == 0){
+		if(tcpporthogdefense
+		&& seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
+			print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
+				source, seg.source, dest, seg.dest, seg.flags,
+				tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
+			localclose(s, "stateless hog");
+		}
+	}
+
+	/* Cut the data to fit the receive window */
+	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
+		netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+		update(s, &seg);
+		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
+			tcphalt(tpriv, &tcb->rtt_timer);
+			tcphalt(tpriv, &tcb->acktimer);
+			tcphalt(tpriv, &tcb->katimer);
+			tcpsetstate(s, Time_wait);
+			tcb->timer.start = MSL2*(1000 / MSPTICK);
+			tcpgo(tpriv, &tcb->timer);
+		}
+		if(!(seg.flags & RST)) {
+			tcb->flags |= FORCE;
+			goto output;
+		}
+		qunlock(s);
+		poperror();
+		return;
+	}
+
+	/* Cannot accept so answer with a rst */
+	if(length && tcb->state == Closed) {
+		sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+		goto raise;
+	}
+
+	/* The segment is beyond the current receive pointer so
+	 * queue the data in the resequence queue
+	 */
+	if(seg.seq != tcb->rcv.nxt)
+	if(length != 0 || (seg.flags & (SYN|FIN))) {
+		update(s, &seg);
+		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
+		tcb->flags |= FORCE;
+		goto output;
+	}
+
+	/*
+	 *  keep looping till we've processed this packet plus any
+	 *  adjacent packets in the resequence queue
+	 */
+	for(;;) {
+		if(seg.flags & RST) {
+			if(tcb->state == Established) {
+				tpriv->stats[EstabResets]++;
+				if(tcb->rcv.nxt != seg.seq)
+					print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq);
+			}
+			localclose(s, Econrefused);
+			goto raise;
+		}
+
+		if((seg.flags&ACK) == 0)
+			goto raise;
+
+		switch(tcb->state) {
+		case Syn_received:
+			if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
+				sndrst(tcp, source, dest, length, &seg, version,
+					"bad seq in Syn_received");
+				goto raise;
+			}
+			update(s, &seg);
+			tcpsetstate(s, Established);
+		case Established:
+		case Close_wait:
+			update(s, &seg);
+			break;
+		case Finwait1:
+			update(s, &seg);
+			if(qlen(s->wq)+tcb->flgcnt == 0){
+				tcphalt(tpriv, &tcb->rtt_timer);
+				tcphalt(tpriv, &tcb->acktimer);
+				tcpsetkacounter(tcb);
+				tcb->time = NOW;
+				tcpsetstate(s, Finwait2);
+				tcb->katimer.start = MSL2 * (1000 / MSPTICK);
+				tcpgo(tpriv, &tcb->katimer);
+			}
+			break;
+		case Finwait2:
+			update(s, &seg);
+			break;
+		case Closing:
+			update(s, &seg);
+			if(qlen(s->wq)+tcb->flgcnt == 0) {
+				tcphalt(tpriv, &tcb->rtt_timer);
+				tcphalt(tpriv, &tcb->acktimer);
+				tcphalt(tpriv, &tcb->katimer);
+				tcpsetstate(s, Time_wait);
+				tcb->timer.start = MSL2*(1000 / MSPTICK);
+				tcpgo(tpriv, &tcb->timer);
+			}
+			break;
+		case Last_ack:
+			update(s, &seg);
+			if(qlen(s->wq)+tcb->flgcnt == 0) {
+				localclose(s, nil);
+				goto raise;
+			}
+		case Time_wait:
+			tcb->flags |= FORCE;
+			if(tcb->timer.state != TcptimerON)
+				tcpgo(tpriv, &tcb->timer);
+		}
+
+		if((seg.flags&URG) && seg.urg) {
+			if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
+				tcb->rcv.urg = seg.urg + seg.seq;
+				pullblock(&bp, seg.urg);
+			}
+		}
+		else
+		if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
+			tcb->rcv.urg = tcb->rcv.nxt;
+
+		if(length == 0) {
+			if(bp != nil)
+				freeblist(bp);
+		}
+		else {
+			switch(tcb->state){
+			default:
+				/* Ignore segment text */
+				if(bp != nil)
+					freeblist(bp);
+				break;
+
+			case Syn_received:
+			case Established:
+			case Finwait1:
+				/* If we still have some data place on
+				 * receive queue
+				 */
+				if(bp) {
+					bp = packblock(bp);
+					if(bp == nil)
+						panic("tcp packblock");
+					qpassnolim(s->rq, bp);
+					bp = nil;
+
+					/*
+					 *  Force an ack every 2 data messages.  This is
+					 *  a hack for rob to make his home system run
+					 *  faster.
+					 *
+					 *  this also keeps the standard TCP congestion
+					 *  control working since it needs an ack every
+					 *  2 max segs worth.  This is not quite that,
+					 *  but under a real stream is equivalent since
+					 *  every packet has a max seg in it.
+					 */
+					if(++(tcb->rcv.una) >= 2)
+						tcb->flags |= FORCE;
+				}
+				tcb->rcv.nxt += length;
+
+				/*
+				 *  update our rcv window
+				 */
+				tcprcvwin(s);
+
+				/*
+				 *  turn on the acktimer if there's something
+				 *  to ack
+				 */
+				if(tcb->acktimer.state != TcptimerON)
+					tcpgo(tpriv, &tcb->acktimer);
+
+				break;
+			case Finwait2:
+				/* no process to read the data, send a reset */
+				if(bp != nil)
+					freeblist(bp);
+				sndrst(tcp, source, dest, length, &seg, version,
+					"send to Finwait2");
+				qunlock(s);
+				poperror();
+				return;
+			}
+		}
+
+		if(seg.flags & FIN) {
+			tcb->flags |= FORCE;
+
+			switch(tcb->state) {
+			case Syn_received:
+			case Established:
+				tcb->rcv.nxt++;
+				tcpsetstate(s, Close_wait);
+				break;
+			case Finwait1:
+				tcb->rcv.nxt++;
+				if(qlen(s->wq)+tcb->flgcnt == 0) {
+					tcphalt(tpriv, &tcb->rtt_timer);
+					tcphalt(tpriv, &tcb->acktimer);
+					tcphalt(tpriv, &tcb->katimer);
+					tcpsetstate(s, Time_wait);
+					tcb->timer.start = MSL2*(1000/MSPTICK);
+					tcpgo(tpriv, &tcb->timer);
+				}
+				else
+					tcpsetstate(s, Closing);
+				break;
+			case Finwait2:
+				tcb->rcv.nxt++;
+				tcphalt(tpriv, &tcb->rtt_timer);
+				tcphalt(tpriv, &tcb->acktimer);
+				tcphalt(tpriv, &tcb->katimer);
+				tcpsetstate(s, Time_wait);
+				tcb->timer.start = MSL2 * (1000/MSPTICK);
+				tcpgo(tpriv, &tcb->timer);
+				break;
+			case Close_wait:
+			case Closing:
+			case Last_ack:
+				break;
+			case Time_wait:
+				tcpgo(tpriv, &tcb->timer);
+				break;
+			}
+		}
+
+		/*
+		 *  get next adjacent segment from the resequence queue.
+		 *  dump/trim any overlapping segments
+		 */
+		for(;;) {
+			if(tcb->reseq == nil)
+				goto output;
+
+			if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
+				goto output;
+
+			getreseq(tcb, &seg, &bp, &length);
+
+			if(tcptrim(tcb, &seg, &bp, &length) == 0)
+				break;
+		}
+	}
+output:
+	tcpoutput(s);
+	qunlock(s);
+	poperror();
+	return;
+raise:
+	qunlock(s);
+	poperror();
+	freeblist(bp);
+	tcpkick(s);
+}
+
+/*
+ *  always enters and exits with the s locked.  We drop
+ *  the lock to ipoput the packet so some care has to be
+ *  taken by callers.
+ */
+void
+tcpoutput(Conv *s)
+{
+	Tcp seg;
+	int msgs;
+	Tcpctl *tcb;
+	Block *hbp, *bp;
+	int sndcnt, n;
+	ulong ssize, dsize, usable, sent;
+	Fs *f;
+	Tcppriv *tpriv;
+	uchar version;
+
+	f = s->p->f;
+	tpriv = s->p->priv;
+	version = s->ipversion;
+
+	for(msgs = 0; msgs < 100; msgs++) {
+		tcb = (Tcpctl*)s->ptcl;
+
+		switch(tcb->state) {
+		case Listen:
+		case Closed:
+		case Finwait2:
+			return;
+		}
+
+		/* force an ack when a window has opened up */
+		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
+			tcb->rcv.blocked = 0;
+			tcb->flags |= FORCE;
+		}
+
+		sndcnt = qlen(s->wq)+tcb->flgcnt;
+		sent = tcb->snd.ptr - tcb->snd.una;
+
+		/* Don't send anything else until our SYN has been acked */
+		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+			break;
+
+		/* Compute usable segment based on offered window and limit
+		 * window probes to one
+		 */
+		if(tcb->snd.wnd == 0){
+			if(sent != 0) {
+				if((tcb->flags&FORCE) == 0)
+					break;
+//				tcb->snd.ptr = tcb->snd.una;
+			}
+			usable = 1;
+		}
+		else {
+			usable = tcb->cwind;
+			if(tcb->snd.wnd < usable)
+				usable = tcb->snd.wnd;
+			usable -= sent;
+		}
+		ssize = sndcnt-sent;
+		if(ssize && usable < 2)
+			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
+				tcb->snd.wnd, tcb->cwind);
+		if(usable < ssize)
+			ssize = usable;
+		if(tcb->mss < ssize)
+			ssize = tcb->mss;
+		dsize = ssize;
+		seg.urg = 0;
+
+		if(ssize == 0)
+		if((tcb->flags&FORCE) == 0)
+			break;
+
+		tcb->flags &= ~FORCE;
+		tcprcvwin(s);
+
+		/* By default we will generate an ack */
+		tcphalt(tpriv, &tcb->acktimer);
+		tcb->rcv.una = 0;
+		seg.source = s->lport;
+		seg.dest = s->rport;
+		seg.flags = ACK;
+		seg.mss = 0;
+		seg.ws = 0;
+		switch(tcb->state){
+		case Syn_sent:
+			seg.flags = 0;
+			if(tcb->snd.ptr == tcb->iss){
+				seg.flags |= SYN;
+				dsize--;
+				seg.mss = tcb->mss;
+				seg.ws = tcb->scale;
+			}
+			break;
+		case Syn_received:
+			/*
+			 *  don't send any data with a SYN/ACK packet
+			 *  because Linux rejects the packet in its
+			 *  attempt to solve the SYN attack problem
+			 */
+			if(tcb->snd.ptr == tcb->iss){
+				seg.flags |= SYN;
+				dsize = 0;
+				ssize = 1;
+				seg.mss = tcb->mss;
+				seg.ws = tcb->scale;
+			}
+			break;
+		}
+		seg.seq = tcb->snd.ptr;
+		seg.ack = tcb->rcv.nxt;
+		seg.wnd = tcb->rcv.wnd;
+
+		/* Pull out data to send */
+		bp = nil;
+		if(dsize != 0) {
+			bp = qcopy(s->wq, dsize, sent);
+			if(BLEN(bp) != dsize) {
+				seg.flags |= FIN;
+				dsize--;
+			}
+		}
+
+		if(sent+dsize == sndcnt)
+			seg.flags |= PSH;
+
+		/* keep track of balance of resent data */
+		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
+			n = tcb->snd.nxt - tcb->snd.ptr;
+			if(ssize < n)
+				n = ssize;
+			tcb->resent += n;
+			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
+				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
+			tpriv->stats[RetransSegs]++;
+		}
+
+		tcb->snd.ptr += ssize;
+
+		/* Pull up the send pointer so we can accept acks
+		 * for this window
+		 */
+		if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
+			tcb->snd.nxt = tcb->snd.ptr;
+
+		/* Build header, link data and compute cksum */
+		switch(version){
+		case V4:
+			tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+			hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
+			if(hbp == nil) {
+				freeblist(bp);
+				return;
+			}
+			break;
+		case V6:
+			tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+			hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
+			if(hbp == nil) {
+				freeblist(bp);
+				return;
+			}
+			break;
+		default:
+			hbp = nil;	/* to suppress a warning */
+			panic("tcpoutput: version %d", version);
+		}
+
+		/* Start the transmission timers if there is new data and we
+		 * expect acknowledges
+		 */
+		if(ssize != 0){
+			if(tcb->timer.state != TcptimerON)
+				tcpgo(tpriv, &tcb->timer);
+
+			/*  If round trip timer isn't running, start it.
+			 *  measure the longest packet only in case the
+			 *  transmission time dominates RTT
+			 */
+			if(tcb->rtt_timer.state != TcptimerON)
+			if(ssize == tcb->mss) {
+				tcpgo(tpriv, &tcb->rtt_timer);
+				tcb->rttseq = tcb->snd.ptr;
+			}
+		}
+
+		tpriv->stats[OutSegs]++;
+
+		/* put off the next keep alive */
+		tcpgo(tpriv, &tcb->katimer);
+
+		switch(version){
+		case V4:
+			if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){
+				/* a negative return means no route */
+				localclose(s, "no route");
+			}
+			break;
+		case V6:
+			if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){
+				/* a negative return means no route */
+				localclose(s, "no route");
+			}
+			break;
+		default:
+			panic("tcpoutput2: version %d", version);
+		}
+		if((msgs%4) == 1){
+			qunlock(s);
+			sched();
+			qlock(s);
+		}
+	}
+}
+
+/*
+ *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
+ */
+void
+tcpsendka(Conv *s)
+{
+	Tcp seg;
+	Tcpctl *tcb;
+	Block *hbp,*dbp;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	dbp = nil;
+	seg.urg = 0;
+	seg.source = s->lport;
+	seg.dest = s->rport;
+	seg.flags = ACK|PSH;
+	seg.mss = 0;
+	seg.ws = 0;
+	if(tcpporthogdefense)
+		seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
+	else
+		seg.seq = tcb->snd.una-1;
+	seg.ack = tcb->rcv.nxt;
+	tcb->rcv.una = 0;
+	seg.wnd = tcb->rcv.wnd;
+	if(tcb->state == Finwait2){
+		seg.flags |= FIN;
+	} else {
+		dbp = allocb(1);
+		dbp->wp++;
+	}
+
+	if(isv4(s->raddr)) {
+		/* Build header, link data and compute cksum */
+		tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+		hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
+		if(hbp == nil) {
+			freeblist(dbp);
+			return;
+		}
+		ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+	}
+	else {
+		/* Build header, link data and compute cksum */
+		tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+		hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
+		if(hbp == nil) {
+			freeblist(dbp);
+			return;
+		}
+		ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+	}
+}
+
+/*
+ *  set connection to time out after 12 minutes
+ */
+void
+tcpsetkacounter(Tcpctl *tcb)
+{
+	tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
+	if(tcb->kacounter < 3)
+		tcb->kacounter = 3;
+}
+
+/*
+ *  if we've timed out, close the connection
+ *  otherwise, send a keepalive and restart the timer
+ */
+void
+tcpkeepalive(void *v)
+{
+	Tcpctl *tcb;
+	Conv *s;
+
+	s = v;
+	tcb = (Tcpctl*)s->ptcl;
+	if(waserror()){
+		qunlock(s);
+		nexterror();
+	}
+	qlock(s);
+	if(tcb->state != Closed){
+		if(--(tcb->kacounter) <= 0) {
+			localclose(s, Etimedout);
+		} else {
+			tcpsendka(s);
+			tcpgo(s->p->priv, &tcb->katimer);
+		}
+	}
+	qunlock(s);
+	poperror();
+}
+
+/*
+ *  start keepalive timer
+ */
+char*
+tcpstartka(Conv *s, char **f, int n)
+{
+	Tcpctl *tcb;
+	int x;
+
+	tcb = (Tcpctl*)s->ptcl;
+	if(tcb->state != Established)
+		return "connection must be in Establised state";
+	if(n > 1){
+		x = atoi(f[1]);
+		if(x >= MSPTICK)
+			tcb->katimer.start = x/MSPTICK;
+	}
+	tcpsetkacounter(tcb);
+	tcpgo(s->p->priv, &tcb->katimer);
+
+	return nil;
+}
+
+/*
+ *  turn checksums on/off
+ */
+char*
+tcpsetchecksum(Conv *s, char **f, int)
+{
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+	tcb->nochecksum = !atoi(f[1]);
+
+	return nil;
+}
+
+void
+tcprxmit(Conv *s)
+{
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	tcb->flags |= RETRAN|FORCE;
+	tcb->snd.ptr = tcb->snd.una;
+
+	/*
+	 *  We should be halving the slow start threshhold (down to one
+	 *  mss) but leaving it at mss seems to work well enough
+	 */
+ 	tcb->ssthresh = tcb->mss;
+
+	/*
+	 *  pull window down to a single packet
+	 */
+	tcb->cwind = tcb->mss;
+	tcpoutput(s);
+}
+
+void
+tcptimeout(void *arg)
+{
+	Conv *s;
+	Tcpctl *tcb;
+	int maxback;
+	Tcppriv *tpriv;
+
+	s = (Conv*)arg;
+	tpriv = s->p->priv;
+	tcb = (Tcpctl*)s->ptcl;
+
+	if(waserror()){
+		qunlock(s);
+		nexterror();
+	}
+	qlock(s);
+	switch(tcb->state){
+	default:
+		tcb->backoff++;
+		if(tcb->state == Syn_sent)
+			maxback = MAXBACKMS/2;
+		else
+			maxback = MAXBACKMS;
+		tcb->backedoff += tcb->timer.start * MSPTICK;
+		if(tcb->backedoff >= maxback) {
+			localclose(s, Etimedout);
+			break;
+		}
+		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+		tcpsettimer(tcb);
+		tcprxmit(s);
+		tpriv->stats[RetransTimeouts]++;
+		tcb->snd.dupacks = 0;
+		break;
+	case Time_wait:
+		localclose(s, nil);
+		break;
+	case Closed:
+		break;
+	}
+	qunlock(s);
+	poperror();
+}
+
+int
+inwindow(Tcpctl *tcb, int seq)
+{
+	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
+}
+
+/*
+ *  set up state for a received SYN (or SYN ACK) packet
+ */
+void
+procsyn(Conv *s, Tcp *seg)
+{
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+	tcb->flags |= FORCE;
+
+	tcb->rcv.nxt = seg->seq + 1;
+	tcb->rcv.urg = tcb->rcv.nxt;
+	tcb->irs = seg->seq;
+
+	/* our sending max segment size cannot be bigger than what he asked for */
+	if(seg->mss != 0 && seg->mss < tcb->mss)
+		tcb->mss = seg->mss;
+
+	/* the congestion window always starts out as a single segment */
+	tcb->snd.wnd = seg->wnd;
+	tcb->cwind = tcb->mss;
+}
+
+int
+addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+	Reseq *rp, *rp1;
+	int i, rqlen, qmax;
+
+	rp = malloc(sizeof(Reseq));
+	if(rp == nil){
+		freeblist(bp);	/* bp always consumed by add_reseq */
+		return 0;
+	}
+
+	rp->seg = *seg;
+	rp->bp = bp;
+	rp->length = length;
+
+	/* Place on reassembly list sorting by starting seq number */
+	rp1 = tcb->reseq;
+	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
+		rp->next = rp1;
+		tcb->reseq = rp;
+		if(rp->next != nil)
+			tpriv->stats[OutOfOrder]++;
+		return 0;
+	}
+
+	rqlen = 0;
+	for(i = 0;; i++) {
+		rqlen += rp1->length;
+		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
+			rp->next = rp1->next;
+			rp1->next = rp;
+			if(rp->next != nil)
+				tpriv->stats[OutOfOrder]++;
+			break;
+		}
+		rp1 = rp1->next;
+	}
+	qmax = QMAX<<tcb->rcv.scale;
+	if(rqlen > qmax){
+		print("resequence queue > window: %d > %d\n", rqlen, qmax);
+		i = 0;
+	  	for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
+	  		print("%#lux %#lux %#ux\n", rp1->seg.seq,
+	  			rp1->seg.ack, rp1->seg.flags);
+			if(i++ > 10){
+				print("...\n");
+				break;
+			}
+		}
+
+		// delete entire reassembly queue; wait for retransmit.
+		// - should we be smarter and only delete the tail?
+		for(rp = tcb->reseq; rp != nil; rp = rp1){
+			rp1 = rp->next;
+			freeblist(rp->bp);
+			free(rp);
+		}
+		tcb->reseq = nil;
+
+	  	return -1;
+	}
+	return 0;
+}
+
+void
+getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+	Reseq *rp;
+
+	rp = tcb->reseq;
+	if(rp == nil)
+		return;
+
+	tcb->reseq = rp->next;
+
+	*seg = rp->seg;
+	*bp = rp->bp;
+	*length = rp->length;
+
+	free(rp);
+}
+
+int
+tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+	ushort len;
+	uchar accept;
+	int dupcnt, excess;
+
+	accept = 0;
+	len = *length;
+	if(seg->flags & SYN)
+		len++;
+	if(seg->flags & FIN)
+		len++;
+
+	if(tcb->rcv.wnd == 0) {
+		if(len == 0 && seg->seq == tcb->rcv.nxt)
+			return 0;
+	}
+	else {
+		/* Some part of the segment should be in the window */
+		if(inwindow(tcb,seg->seq))
+			accept++;
+		else
+		if(len != 0) {
+			if(inwindow(tcb, seg->seq+len-1) ||
+			seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
+				accept++;
+		}
+	}
+	if(!accept) {
+		freeblist(*bp);
+		return -1;
+	}
+	dupcnt = tcb->rcv.nxt - seg->seq;
+	if(dupcnt > 0){
+		tcb->rerecv += dupcnt;
+		if(seg->flags & SYN){
+			seg->flags &= ~SYN;
+			seg->seq++;
+
+			if(seg->urg > 1)
+				seg->urg--;
+			else
+				seg->flags &= ~URG;
+			dupcnt--;
+		}
+		if(dupcnt > 0){
+			pullblock(bp, (ushort)dupcnt);
+			seg->seq += dupcnt;
+			*length -= dupcnt;
+
+			if(seg->urg > dupcnt)
+				seg->urg -= dupcnt;
+			else {
+				seg->flags &= ~URG;
+				seg->urg = 0;
+			}
+		}
+	}
+	excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
+	if(excess > 0) {
+		tcb->rerecv += excess;
+		*length -= excess;
+		*bp = trimblock(*bp, 0, *length);
+		if(*bp == nil)
+			panic("presotto is a boofhead");
+		seg->flags &= ~FIN;
+	}
+	return 0;
+}
+
+void
+tcpadvise(Proto *tcp, Block *bp, char *msg)
+{
+	Tcp4hdr *h4;
+	Tcp6hdr *h6;
+	Tcpctl *tcb;
+	uchar source[IPaddrlen];
+	uchar dest[IPaddrlen];
+	ushort psource, pdest;
+	Conv *s, **p;
+
+	h4 = (Tcp4hdr*)(bp->rp);
+	h6 = (Tcp6hdr*)(bp->rp);
+
+	if((h4->vihl&0xF0)==IP_VER4) {
+		v4tov6(dest, h4->tcpdst);
+		v4tov6(source, h4->tcpsrc);
+		psource = nhgets(h4->tcpsport);
+		pdest = nhgets(h4->tcpdport);
+	}
+	else {
+		ipmove(dest, h6->tcpdst);
+		ipmove(source, h6->tcpsrc);
+		psource = nhgets(h6->tcpsport);
+		pdest = nhgets(h6->tcpdport);
+	}
+
+	/* Look for a connection */
+	qlock(tcp);
+	for(p = tcp->conv; *p; p++) {
+		s = *p;
+		tcb = (Tcpctl*)s->ptcl;
+		if(s->rport == pdest)
+		if(s->lport == psource)
+		if(tcb->state != Closed)
+		if(ipcmp(s->raddr, dest) == 0)
+		if(ipcmp(s->laddr, source) == 0){
+			qlock(s);
+			qunlock(tcp);
+			switch(tcb->state){
+			case Syn_sent:
+				localclose(s, msg);
+				break;
+			}
+			qunlock(s);
+			freeblist(bp);
+			return;
+		}
+	}
+	qunlock(tcp);
+	freeblist(bp);
+}
+
+static char*
+tcpporthogdefensectl(char *val)
+{
+	if(strcmp(val, "on") == 0)
+		tcpporthogdefense = 1;
+	else if(strcmp(val, "off") == 0)
+		tcpporthogdefense = 0;
+	else
+		return "unknown value for tcpporthogdefense";
+	return nil;
+}
+
+/* called with c qlocked */
+char*
+tcpctl(Conv* c, char** f, int n)
+{
+	if(n == 1 && strcmp(f[0], "hangup") == 0)
+		return tcphangup(c);
+	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
+		return tcpstartka(c, f, n);
+	if(n >= 1 && strcmp(f[0], "checksum") == 0)
+		return tcpsetchecksum(c, f, n);
+	if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
+		return tcpporthogdefensectl(f[1]);
+	return "unknown control request";
+}
+
+int
+tcpstats(Proto *tcp, char *buf, int len)
+{
+	Tcppriv *priv;
+	char *p, *e;
+	int i;
+
+	priv = tcp->priv;
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+	return p - buf;
+}
+
+/*
+ *  garbage collect any stale conversations:
+ *	- SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
+ *	- Finwait2 after 5 minutes
+ *
+ *  this is called whenever we run out of channels.  Both checks are
+ *  of questionable validity so we try to use them only when we're
+ *  up against the wall.
+ */
+int
+tcpgc(Proto *tcp)
+{
+	Conv *c, **pp, **ep;
+	int n;
+	Tcpctl *tcb;
+
+
+	n = natgc(tcp->ipproto);
+	ep = &tcp->conv[tcp->nc];
+	for(pp = tcp->conv; pp < ep; pp++) {
+		c = *pp;
+		if(c == nil)
+			break;
+		if(!canqlock(c))
+			continue;
+		tcb = (Tcpctl*)c->ptcl;
+		switch(tcb->state){
+		case Syn_received:
+			if(NOW - tcb->time > 5000){
+				localclose(c, "timed out");
+				n++;
+			}
+			break;
+		case Finwait2:
+			if(NOW - tcb->time > 5*60*1000){
+				localclose(c, "timed out");
+				n++;
+			}
+			break;
+		}
+		qunlock(c);
+	}
+	return n;
+}
+
+void
+tcpsettimer(Tcpctl *tcb)
+{
+	int x;
+
+	/* round trip dependency */
+	x = backoff(tcb->backoff) *
+		(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
+
+	/* bounded twixt 1/2 and 64 seconds */
+	if(x < 500/MSPTICK)
+		x = 500/MSPTICK;
+	else if(x > (64000/MSPTICK))
+		x = 64000/MSPTICK;
+	tcb->timer.start = x;
+}
+
+void
+tcpinit(Fs *fs)
+{
+	Proto *tcp;
+	Tcppriv *tpriv;
+
+	tcp = smalloc(sizeof(Proto));
+	tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
+	tcp->name = "tcp";
+	tcp->connect = tcpconnect;
+	tcp->announce = tcpannounce;
+	tcp->ctl = tcpctl;
+	tcp->state = tcpstate;
+	tcp->create = tcpcreate;
+	tcp->close = tcpclose;
+	tcp->rcv = tcpiput;
+	tcp->advise = tcpadvise;
+	tcp->stats = tcpstats;
+	tcp->inuse = tcpinuse;
+	tcp->gc = tcpgc;
+	tcp->ipproto = IP_TCPPROTO;
+	tcp->nc = scalednconv();
+	tcp->ptclsize = sizeof(Tcpctl);
+	tpriv->stats[MaxConn] = tcp->nc;
+
+	Fsproto(fs, tcp);
+}
+
+void
+tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
+{
+	if(rcvscale){
+		tcb->rcv.scale = rcvscale & 0xff;
+		tcb->snd.scale = sndscale & 0xff;
+		tcb->window = QMAX<<tcb->snd.scale;
+		qsetlimit(s->rq, tcb->window);
+	} else {
+		tcb->rcv.scale = 0;
+		tcb->snd.scale = 0;
+		tcb->window = QMAX;
+		qsetlimit(s->rq, tcb->window);
+	}
+}
--- /dev/null
+++ b/os/ip.original/udp.c
@@ -1,0 +1,656 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+#include	"ip.h"
+#include	"ipv6.h"
+
+
+#define DPRINT if(0)print
+
+enum
+{
+	UDP_UDPHDR_SZ	= 8,
+
+	UDP4_PHDR_OFF = 8,
+	UDP4_PHDR_SZ = 12,
+	UDP4_IPHDR_SZ = 20,
+	UDP6_IPHDR_SZ = 40,
+	UDP6_PHDR_SZ = 40,
+	UDP6_PHDR_OFF = 0,
+
+	IP_UDPPROTO	= 17,
+	UDP_USEAD7	= 52,
+	UDP_USEAD6	= 36,
+
+	Udprxms		= 200,
+	Udptickms	= 100,
+	Udpmaxxmit	= 10,
+};
+
+typedef struct Udp4hdr Udp4hdr;
+struct Udp4hdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;	
+	uchar	udpproto;	/* Protocol */
+	uchar	udpplen[2];	/* Header plus data length */
+	uchar	udpsrc[IPv4addrlen];	/* Ip source */
+	uchar	udpdst[IPv4addrlen];	/* Ip destination */
+
+	/* udp header */
+	uchar	udpsport[2];	/* Source port */
+	uchar	udpdport[2];	/* Destination port */
+	uchar	udplen[2];	/* data length */
+	uchar	udpcksum[2];	/* Checksum */
+};
+
+typedef struct Udp6hdr Udp6hdr;
+struct Udp6hdr {
+	uchar viclfl[4];
+	uchar len[2];
+	uchar nextheader;
+	uchar hoplimit;
+	uchar udpsrc[IPaddrlen];
+	uchar udpdst[IPaddrlen];
+
+	/* udp header */
+	uchar	udpsport[2];	/* Source port */
+	uchar	udpdport[2];	/* Destination port */
+	uchar	udplen[2];	/* data length */
+	uchar	udpcksum[2];	/* Checksum */
+};
+
+/* MIB II counters */
+typedef struct Udpstats Udpstats;
+struct Udpstats
+{
+	ulong	udpInDatagrams;
+	ulong	udpNoPorts;
+	ulong	udpInErrors;
+	ulong	udpOutDatagrams;
+};
+
+typedef struct Udppriv Udppriv;
+struct Udppriv
+{
+	Ipht		ht;
+
+	/* MIB counters */
+	Udpstats	ustats;
+
+	/* non-MIB stats */
+	ulong		csumerr;		/* checksum errors */
+	ulong		lenerr;			/* short packet */
+};
+
+void (*etherprofiler)(char *name, int qlen);
+void udpkick(void *x, Block *bp);
+
+/*
+ *  protocol specific part of Conv
+ */
+typedef struct Udpcb Udpcb;
+struct Udpcb
+{
+	QLock;
+	uchar	headers;
+};
+
+static char*
+udpconnect(Conv *c, char **argv, int argc)
+{
+	char *e;
+	Udppriv *upriv;
+
+	upriv = c->p->priv;
+	e = Fsstdconnect(c, argv, argc);
+	Fsconnected(c, e);
+	if(e != nil)
+		return e;
+
+	iphtadd(&upriv->ht, c);
+	return nil;
+}
+
+
+static int
+udpstate(Conv *c, char *state, int n)
+{
+	return snprint(state, n, "%s qin %d qout %d",
+		c->inuse ? "Open" : "Closed",
+		c->rq ? qlen(c->rq) : 0,
+		c->wq ? qlen(c->wq) : 0
+	);
+}
+
+static char*
+udpannounce(Conv *c, char** argv, int argc)
+{
+	char *e;
+	Udppriv *upriv;
+
+	upriv = c->p->priv;
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	Fsconnected(c, nil);
+	iphtadd(&upriv->ht, c);
+
+	return nil;
+}
+
+static void
+udpcreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->wq = qbypass(udpkick, c);
+}
+
+static void
+udpclose(Conv *c)
+{
+	Udpcb *ucb;
+	Udppriv *upriv;
+
+	upriv = c->p->priv;
+	iphtrem(&upriv->ht, c);
+
+	c->state = 0;
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+	c->rport = 0;
+
+	ucb = (Udpcb*)c->ptcl;
+	ucb->headers = 0;
+
+	qunlock(c);
+}
+
+void
+udpkick(void *x, Block *bp)
+{
+	Conv *c = x;
+	Udp4hdr *uh4;
+	Udp6hdr *uh6;
+	ushort rport;
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
+	Udpcb *ucb;
+	int dlen, ptcllen;
+	Udppriv *upriv;
+	Fs *f;
+	int version;
+	Conv *rc;
+
+	upriv = c->p->priv;
+	f = c->p->f;
+
+	netlog(c->p->f, Logudp, "udp: kick\n");
+	if(bp == nil)
+		return;
+
+	ucb = (Udpcb*)c->ptcl;
+	switch(ucb->headers) {
+	case 7:
+		/* get user specified addresses */
+		bp = pullupblock(bp, UDP_USEAD7);
+		if(bp == nil)
+			return;
+		ipmove(raddr, bp->rp);
+		bp->rp += IPaddrlen;
+		ipmove(laddr, bp->rp);
+		bp->rp += IPaddrlen;
+		/* pick interface closest to dest */
+		if(ipforme(f, laddr) != Runi)
+			findlocalip(f, laddr, raddr);
+		bp->rp += IPaddrlen;		/* Ignore ifc address */
+		rport = nhgets(bp->rp);
+		bp->rp += 2+2;			/* Ignore local port */
+		break;
+	case 6:
+		/* get user specified addresses */
+		bp = pullupblock(bp, UDP_USEAD6);
+		if(bp == nil)
+			return;
+		ipmove(raddr, bp->rp);
+		bp->rp += IPaddrlen;
+		ipmove(laddr, bp->rp);
+		bp->rp += IPaddrlen;
+		/* pick interface closest to dest */
+		if(ipforme(f, laddr) != Runi)
+			findlocalip(f, laddr, raddr);
+		rport = nhgets(bp->rp);
+		bp->rp += 2+2;			/* Ignore local port */
+		break;
+	default:
+		rport = 0;
+		break;
+	}
+
+	if(ucb->headers) {
+		if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
+		    ipcmp(laddr, IPnoaddr) == 0)
+			version = V4;
+		else
+			version = V6;
+	} else {
+		if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+			memcmp(c->laddr, v4prefix, IPv4off) == 0)
+			|| ipcmp(c->raddr, IPnoaddr) == 0)
+			version = V4;
+		else
+			version = V6;
+	}
+
+	dlen = blocklen(bp);
+
+	/* fill in pseudo header and compute checksum */
+	switch(version){
+	case V4:
+		bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
+		if(bp == nil)
+			return;
+
+		uh4 = (Udp4hdr *)(bp->rp);
+		ptcllen = dlen + UDP_UDPHDR_SZ;
+		uh4->Unused = 0;
+		uh4->udpproto = IP_UDPPROTO;
+		uh4->frag[0] = 0;
+		uh4->frag[1] = 0;
+		hnputs(uh4->udpplen, ptcllen);
+		if(ucb->headers) {
+			v6tov4(uh4->udpdst, raddr);
+			hnputs(uh4->udpdport, rport);
+			v6tov4(uh4->udpsrc, laddr);
+			rc = nil;
+		} else {
+			v6tov4(uh4->udpdst, c->raddr);
+			hnputs(uh4->udpdport, c->rport);
+			if(ipcmp(c->laddr, IPnoaddr) == 0)
+				findlocalip(f, c->laddr, c->raddr);
+			v6tov4(uh4->udpsrc, c->laddr);
+			rc = c;
+		}
+		hnputs(uh4->udpsport, c->lport);
+		hnputs(uh4->udplen, ptcllen);
+		uh4->udpcksum[0] = 0;
+		uh4->udpcksum[1] = 0;
+		hnputs(uh4->udpcksum, 
+		       ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+		uh4->vihl = IP_VER4;
+		ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+		break;
+
+	case V6:
+		bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
+		if(bp == nil)
+			return;
+
+		// using the v6 ip header to create pseudo header 
+		// first then reset it to the normal ip header
+		uh6 = (Udp6hdr *)(bp->rp);
+		memset(uh6, 0, 8);
+		ptcllen = dlen + UDP_UDPHDR_SZ;
+		hnputl(uh6->viclfl, ptcllen);
+		uh6->hoplimit = IP_UDPPROTO;
+		if(ucb->headers) {
+			ipmove(uh6->udpdst, raddr);
+			hnputs(uh6->udpdport, rport);
+			ipmove(uh6->udpsrc, laddr);
+			rc = nil;
+		} else {
+			ipmove(uh6->udpdst, c->raddr);
+			hnputs(uh6->udpdport, c->rport);
+			if(ipcmp(c->laddr, IPnoaddr) == 0)
+				findlocalip(f, c->laddr, c->raddr);
+			ipmove(uh6->udpsrc, c->laddr);
+			rc = c;
+		}
+		hnputs(uh6->udpsport, c->lport);
+		hnputs(uh6->udplen, ptcllen);
+		uh6->udpcksum[0] = 0;
+		uh6->udpcksum[1] = 0;
+		hnputs(uh6->udpcksum, 
+		       ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+		memset(uh6, 0, 8);
+		uh6->viclfl[0] = IP_VER6;
+		hnputs(uh6->len, ptcllen);
+		uh6->nextheader = IP_UDPPROTO;
+		ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+		break;
+
+	default:
+		panic("udpkick: version %d", version);
+	}
+	upriv->ustats.udpOutDatagrams++;
+}
+
+void
+udpiput(Proto *udp, Ipifc *ifc, Block *bp)
+{
+	int len;
+	Udp4hdr *uh4;
+	Udp6hdr *uh6;
+	Conv *c;
+	Udpcb *ucb;
+	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	ushort rport, lport;
+	Udppriv *upriv;
+	Fs *f;
+	int version;
+	int ottl, oviclfl, olen;
+	uchar *p;
+
+	upriv = udp->priv;
+	f = udp->f;
+	upriv->ustats.udpInDatagrams++;
+
+	uh4 = (Udp4hdr*)(bp->rp);
+	version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+
+	/*
+	 * Put back pseudo header for checksum 
+	 * (remember old values for icmpnoconv())
+	 */
+	switch(version) {
+	case V4:
+		ottl = uh4->Unused;
+		uh4->Unused = 0;
+		len = nhgets(uh4->udplen);
+		olen = nhgets(uh4->udpplen);
+		hnputs(uh4->udpplen, len);
+
+		v4tov6(raddr, uh4->udpsrc);
+		v4tov6(laddr, uh4->udpdst);
+		lport = nhgets(uh4->udpdport);
+		rport = nhgets(uh4->udpsport);
+
+		if(nhgets(uh4->udpcksum)) {
+			if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) {
+				upriv->ustats.udpInErrors++;
+				netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+				DPRINT("udp: checksum error %I\n", raddr);
+				freeblist(bp);
+				return;
+			}
+		}
+		uh4->Unused = ottl;
+		hnputs(uh4->udpplen, olen);
+		break;
+	case V6:
+		uh6 = (Udp6hdr*)(bp->rp);
+		len = nhgets(uh6->udplen);
+		oviclfl = nhgetl(uh6->viclfl);
+		olen = nhgets(uh6->len);
+		ottl = uh6->hoplimit;
+		ipmove(raddr, uh6->udpsrc);
+		ipmove(laddr, uh6->udpdst);
+		lport = nhgets(uh6->udpdport);
+		rport = nhgets(uh6->udpsport);
+		memset(uh6, 0, 8);
+		hnputl(uh6->viclfl, len);
+		uh6->hoplimit = IP_UDPPROTO;
+		if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) {
+			upriv->ustats.udpInErrors++;
+			netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+			DPRINT("udp: checksum error %I\n", raddr);
+			freeblist(bp);
+			return;
+		}
+		hnputl(uh6->viclfl, oviclfl);
+		hnputs(uh6->len, olen);
+		uh6->nextheader = IP_UDPPROTO;
+		uh6->hoplimit = ottl;
+		break;
+	default:
+		panic("udpiput: version %d", version);
+		return;	/* to avoid a warning */
+	}
+
+	qlock(udp);
+
+	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+	if(c == nil){
+		/* no converstation found */
+		upriv->ustats.udpNoPorts++;
+		qunlock(udp);
+		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+		       laddr, lport);
+
+		switch(version){
+		case V4:
+			icmpnoconv(f, bp);
+			break;
+		case V6:
+			icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+			break;
+		default:
+			panic("udpiput2: version %d", version);
+		}
+
+		freeblist(bp);
+		return;
+	}
+	ucb = (Udpcb*)c->ptcl;
+
+	if(c->state == Announced){
+		if(ucb->headers == 0){
+			/* create a new conversation */
+			if(ipforme(f, laddr) != Runi) {
+				switch(version){
+				case V4:
+					v4tov6(laddr, ifc->lifc->local);
+					break;
+				case V6:
+					ipmove(laddr, ifc->lifc->local);
+					break;
+				default:
+					panic("udpiput3: version %d", version);
+				}
+			}
+			c = Fsnewcall(c, raddr, rport, laddr, lport, version);
+			if(c == nil){
+				qunlock(udp);
+				freeblist(bp);
+				return;
+			}
+			iphtadd(&upriv->ht, c);
+			ucb = (Udpcb*)c->ptcl;
+		}
+	}
+
+	qlock(c);
+	qunlock(udp);
+
+	/*
+	 * Trim the packet down to data size
+	 */
+	len -= UDP_UDPHDR_SZ;
+	switch(version){
+	case V4:
+		bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+		break;
+	case V6:
+		bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+		break;
+	default:
+		bp = nil;
+		panic("udpiput4: version %d", version);
+	}
+	if(bp == nil){
+		qunlock(c);
+		netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport,
+		       laddr, lport);
+		upriv->lenerr++;
+		return;
+	}
+
+	netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport,
+	       laddr, lport, len);
+
+	switch(ucb->headers){
+	case 7:
+		/* pass the src address */
+		bp = padblock(bp, UDP_USEAD7);
+		p = bp->rp;
+		ipmove(p, raddr); p += IPaddrlen;
+		ipmove(p, laddr); p += IPaddrlen;
+		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		hnputs(p, rport); p += 2;
+		hnputs(p, lport);
+		break;
+	case 6:
+		/* pass the src address */
+		bp = padblock(bp, UDP_USEAD6);
+		p = bp->rp;
+		ipmove(p, raddr); p += IPaddrlen;
+		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
+		hnputs(p, rport); p += 2;
+		hnputs(p, lport);
+		break;
+	}
+
+	if(bp->next)
+		bp = concatblock(bp);
+
+	if(qfull(c->rq)){
+		qunlock(c);
+		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
+		       laddr, lport);
+		freeblist(bp);
+		return;
+	}
+
+	qpass(c->rq, bp);
+	qunlock(c);
+
+}
+
+char*
+udpctl(Conv *c, char **f, int n)
+{
+	Udpcb *ucb;
+
+	ucb = (Udpcb*)c->ptcl;
+	if(n == 1){
+		if(strcmp(f[0], "oldheaders") == 0){
+			ucb->headers = 6;
+			return nil;
+		} else if(strcmp(f[0], "headers") == 0){
+			ucb->headers = 7;
+			return nil;
+		}
+	}
+	return "unknown control request";
+}
+
+void
+udpadvise(Proto *udp, Block *bp, char *msg)
+{
+	Udp4hdr *h4;
+	Udp6hdr *h6;
+	uchar source[IPaddrlen], dest[IPaddrlen];
+	ushort psource, pdest;
+	Conv *s, **p;
+	int version;
+
+	h4 = (Udp4hdr*)(bp->rp);
+	version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+
+	switch(version) {
+	case V4:
+		v4tov6(dest, h4->udpdst);
+		v4tov6(source, h4->udpsrc);
+		psource = nhgets(h4->udpsport);
+		pdest = nhgets(h4->udpdport);
+		break;
+	case V6:
+		h6 = (Udp6hdr*)(bp->rp);
+		ipmove(dest, h6->udpdst);
+		ipmove(source, h6->udpsrc);
+		psource = nhgets(h6->udpsport);
+		pdest = nhgets(h6->udpdport);
+		break;
+	default:
+		panic("udpadvise: version %d", version);
+		return;  /* to avoid a warning */
+	}
+
+	/* Look for a connection */
+	qlock(udp);
+	for(p = udp->conv; *p; p++) {
+		s = *p;
+		if(s->rport == pdest)
+		if(s->lport == psource)
+		if(ipcmp(s->raddr, dest) == 0)
+		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
+			qlock(s);
+			qunlock(udp);
+			qhangup(s->rq, msg);
+			qhangup(s->wq, msg);
+			qunlock(s);
+			freeblist(bp);
+			return;
+		}
+	}
+	qunlock(udp);
+	freeblist(bp);
+}
+
+int
+udpstats(Proto *udp, char *buf, int len)
+{
+	Udppriv *upriv;
+
+	upriv = udp->priv;
+	return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+		upriv->ustats.udpInDatagrams,
+		upriv->ustats.udpNoPorts,
+		upriv->ustats.udpInErrors,
+		upriv->ustats.udpOutDatagrams);
+}
+
+int
+udpgc(Proto *udp)
+{
+	return natgc(udp->ipproto);
+}
+
+void
+udpinit(Fs *fs)
+{
+	Proto *udp;
+
+	udp = smalloc(sizeof(Proto));
+	udp->priv = smalloc(sizeof(Udppriv));
+	udp->name = "udp";
+	udp->connect = udpconnect;
+	udp->announce = udpannounce;
+	udp->ctl = udpctl;
+	udp->state = udpstate;
+	udp->create = udpcreate;
+	udp->close = udpclose;
+	udp->rcv = udpiput;
+	udp->advise = udpadvise;
+	udp->stats = udpstats;
+	udp->gc = udpgc;
+	udp->ipproto = IP_UDPPROTO;
+	udp->nc = Nchans;
+	udp->ptclsize = sizeof(Udpcb);
+
+	Fsproto(fs, udp);
+}
--- a/os/ip/arp.c
+++ b/os/ip/arp.c
@@ -47,7 +47,8 @@
 
 #define haship(s) ((s)[IPaddrlen-1]%NHASH)
 
-extern int 	ReTransTimer = RETRANS_TIMER;
+int 	ReTransTimer = RETRANS_TIMER;
+
 static void 	rxmitproc(void *v);
 
 void
@@ -57,145 +58,121 @@
 	f->arp->f = f;
 	f->arp->rxmt = nil;
 	f->arp->dropf = f->arp->dropl = nil;
-	kproc("rxmitproc", rxmitproc, f->arp, 0);
+	kproc("rxmitproc", rxmitproc, f->arp);
 }
 
-/*
- *  create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
 {
-	uint t;
-	Block *next, *xp;
-	Arpent *a, *e, *f, **l;
-	Medium *m = ifc->m;
-	int empty;
+	Block *next;
 
-	/* find oldest entry */
-	e = &arp->cache[NCACHE];
-	a = arp->cache;
-	t = a->utime;
-	for(f = a; f < e; f++){
-		if(f->utime < t){
-			t = f->utime;
-			a = f;
-		}
+	while(bp != nil){
+		next = bp->list;
+		freeblist(bp);
+		bp = next;
 	}
+}
 
-	/* dump waiting packets */
-	xp = a->hold;
-	a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+	Arpent **l;
 
-	if(isv4(a->ip)){
-		while(xp){
-			next = xp->list;
-			freeblist(xp);
-			xp = next;
+	for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+		if(*l == a){
+			*l = a->nextrxt;
+			break;
 		}
 	}
-	else {	// queue icmp unreachable for rxmitproc later on, w/o arp lock
-		if(xp){
-			if(arp->dropl == nil) 
-				arp->dropf = xp;
-			else
-				arp->dropl->list = xp;
+	a->nextrxt = nil;
+	return l;
+}
 
-			for(next = xp->list; next; next = next->list)
-				xp = next;
-			arp->dropl = xp;
-			wakeup(&arp->rxmtq);
-		}
-	}
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+	Arpent **l;
+	Block *bp;
 
 	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
+	for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+		if(*l == a){
 			*l = a->hash;
 			break;
 		}
-		l = &f->hash;
 	}
+	a->hash = nil;
 
-	/* insert into new chain */
-	l = &arp->hash[haship(ip)];
-	a->hash = *l;
-	*l = a;
+	/* dump waiting packets */
+	bp = a->hold;
+	a->hold = nil;
+	if(isv4(a->ip))
+		freeblistchain(bp);
+	else {
+		rxmtunchain(arp, a);
 
-	memmove(a->ip, ip, sizeof(a->ip));
-	a->utime = NOW;
-	a->ctime = 0;
-	a->type = m;
+		/* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+		if(bp != nil){
+			if(arp->dropf == nil)
+				arp->dropf = bp;
+			else
+				arp->dropl->list = bp;
+			arp->dropl = a->last;
 
-	a->rtime = NOW + ReTransTimer;
-	a->rxtsrem = MAX_MULTICAST_SOLICIT;
-	a->ifc = ifc;
-	a->ifcid = ifc->ifcid;
-
-	/* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
-	if(!ipismulticast(a->ip) && addrxt){
-		l = &arp->rxmt;
-		empty = (*l==nil);
-
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
+			if(bp == arp->dropf)
+				wakeup(&arp->rxmtq);
 		}
-		for(f = *l; f; f = f->nextrxt){
-			l = &f->nextrxt;
-		}
-		*l = a;
-		if(empty) 
-			wakeup(&arp->rxmtq);
 	}
+	a->last = nil;
 
-	a->nextrxt = nil;
+	a->ifc = nil;
+	a->ifcid = 0;
 
-	return a;
-}
+	a->state = 0;
+	a->rxtsrem = 0;
 
-/* called with arp qlocked */
+	a->utime = 0;
+	a->ctime = 0;
 
-void
-cleanarpent(Arp *arp, Arpent *a)
+	memset(a->ip, 0, sizeof(a->ip));
+	memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ *  create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
 {
-	Arpent *f, **l;
+	Arpent *a, *e, *f, **l;
+	ulong t;
 
-	a->utime = 0;
-	a->ctime = 0;
-	a->type = 0;
-	a->state = 0;
-	
-	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
-			*l = a->hash;
-			break;
+	/* find oldest entry */
+	e = &arp->cache[NCACHE];
+	a = arp->cache;
+	t = a->utime;
+	for(f = a; f < e; f++){
+		if(f->utime < t){
+			t = f->utime;
+			a = f;
 		}
-		l = &f->hash;
 	}
+	cleanarpent(arp, a);
 
-	/* take out of re-transmit chain */
-	l = &arp->rxmt;
-	for(f = *l; f; f = f->nextrxt){
-		if(f == a){
-			*l = a->nextrxt;
-			break;
-		}
-		l = &f->nextrxt;
-	}
-	a->nextrxt = nil;
-	a->hash = nil;
-	a->hold = nil;
-	a->last = nil;
-	a->ifc = nil;
+	ipmove(a->ip, ip);
+	a->ifc = ifc;
+	a->ifcid = ifc->ifcid;
+
+	/* insert into new chain */
+	l = &arp->hash[haship(ip)];
+	a->hash = *l;
+	*l = a;
+
+	return a;
 }
 
+
 /*
  *  fill in the media address if we have it.  Otherwise return an
  *  Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
 {
 	int hash;
 	Arpent *a;
-	Medium *type = ifc->m;
 	uchar v6ip[IPaddrlen];
 
 	if(version == V4){
@@ -217,30 +193,28 @@
 
 	qlock(arp);
 	hash = haship(ip);
-	for(a = arp->hash[hash]; a; a = a->hash){
-		if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
-		if(type == a->type)
+	for(a = arp->hash[hash]; a != nil; a = a->hash){
+		if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
 			break;
 	}
-
 	if(a == nil){
-		a = newarp6(arp, ip, ifc, (version != V4));
+		a = newarpent(arp, ip, ifc);
 		a->state = AWAIT;
 	}
 	a->utime = NOW;
 	if(a->state == AWAIT){
 		if(bp != nil){
-			if(a->hold)
-				a->last->list = bp;
-			else
+			bp->list = nil; 
+			if(a->hold == nil)
 				a->hold = bp;
+			else
+				a->last->list = bp;
 			a->last = bp;
-			bp->list = nil; 
 		}
 		return a;		/* return with arp qlocked */
 	}
 
-	memmove(mac, a->mac, a->type->maclen);
+	memmove(mac, a->mac, ifc->m->maclen);
 
 	/* remove old entries */
 	if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
 arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
 {
 	Block *bp;
-	Arpent *f, **l;
 
-	if(!isv4(a->ip)){
-		l = &arp->rxmt;
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
-		}
-	}
-
 	memmove(a->mac, mac, type->maclen);
-	a->type = type;
+	if(a->state == AWAIT && !isv4(a->ip)){
+		rxmtunchain(arp, a);
+		a->rxtsrem = 0;
+	}
 	a->state = AOK;
-	a->utime = NOW;
+	a->ctime = a->utime = NOW;
 	bp = a->hold;
-	a->hold = nil;
+	a->hold = a->last = nil;
 	qunlock(arp);
 
 	return bp;
 }
 
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
 {
-	Arp *arp;
-	Route *r;
-	Arpent *a, *f, **l;
-	Ipifc *ifc;
-	Medium *type;
-	Block *bp, *next;
 	uchar v6ip[IPaddrlen];
+	Block *bp, *next;
+	Arpent *a;
+	Route *r;
+	Arp *arp;
 
-	arp = fs->arp;
+	if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+		return -1;
 
-	if(n != 6){
-//		print("arp: len = %d\n", n);
-		return;
-	}
-
 	switch(version){
 	case V4:
-		r = v4lookup(fs, ip, nil);
+		r = v4lookup(fs, ip, ia, nil);
 		v4tov6(v6ip, ip);
 		ip = v6ip;
 		break;
 	case V6:
-		r = v6lookup(fs, ip, nil);
+		r = v6lookup(fs, ip, ia, nil);
 		break;
 	default:
 		panic("arpenter: version %d", version);
-		return;	/* to supress warnings */
+		return -1;	/* to supress warnings */
 	}
 
-	if(r == nil){
-//		print("arp: no route for entry\n");
-		return;
-	}
+	if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+		return -1;
 
-	ifc = r->ifc;
-	type = ifc->m;
-
+	arp = fs->arp;
 	qlock(arp);
-	for(a = arp->hash[haship(ip)]; a; a = a->hash){
-		if(a->type != type || (a->state != AWAIT && a->state != AOK))
+	for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+		if(a->ifc != ifc || a->ifcid != ifc->ifcid)
 			continue;
-
 		if(ipcmp(a->ip, ip) == 0){
-			a->state = AOK;
-			memmove(a->mac, mac, type->maclen);
-
-			if(version == V6){
-				/* take out of re-transmit chain */
-				l = &arp->rxmt;
-				for(f = *l; f; f = f->nextrxt){
-					if(f == a){
-						*l = a->nextrxt;
-						break;
-					}
-					l = &f->nextrxt;
-				}
-			}
-
-			a->ifc = ifc;
-			a->ifcid = ifc->ifcid;
-			bp = a->hold;
-			a->hold = nil;
 			if(version == V4)
 				ip += IPv4off;
-			a->utime = NOW;
-			a->ctime = a->utime;
-			qunlock(arp);
-
-			while(bp){
+			bp = arpresolve(arp, a, ifc->m, mac);	/* unlocks arp */
+			for(; bp != nil; bp = next){
 				next = bp->list;
-				if(ifc != nil){
-					if(waserror()){
-						runlock(ifc);
-						nexterror();
-					}
-					rlock(ifc);
-					if(ifc->m != nil)
-						ifc->m->bwrite(ifc, bp, version, ip);
-					else
-						freeb(bp);
-					runlock(ifc);
-					poperror();
-				} else
-					freeb(bp);
-				bp = next;
+				bp->list = nil;
+				if(waserror()){
+					freeblistchain(next);
+					break;
+				}
+				ipifcoput(ifc, bp, version, ip);
+				poperror();
 			}
-			return;
+			return 1;
 		}
 	}
 
 	if(refresh == 0){
-		a = newarp6(arp, ip, ifc, 0);
+		a = newarpent(arp, ip, ifc);
 		a->state = AOK;
-		a->type = type;
-		a->ctime = NOW;
-		memmove(a->mac, mac, type->maclen);
+		a->ctime = a->utime = NOW;
+		memmove(a->mac, mac, n);
 	}
-
 	qunlock(arp);
+
+	return refresh == 0;
 }
 
 int
@@ -401,13 +325,12 @@
 arpwrite(Fs *fs, char *s, int len)
 {
 	int n;
-	Route *r;
 	Arp *arp;
-	Block *bp;
-	Arpent *a, *fl, **l;
+	Arpent *a, *x;
 	Medium *m;
-	char *f[4], buf[256];
-	uchar ip[IPaddrlen], mac[MAClen];
+	Ipifc *ifc;
+	char *f[5], buf[256];
+	uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
 
 	arp = fs->arp;
 
@@ -420,7 +343,7 @@
 	if(len > 0 && buf[len-1] == '\n')
 		buf[len-1] = 0;
 
-	n = getfields(buf, f, 4, 1, " ");
+	n = getfields(buf, f, nelem(f), 1, " ");
 	if(strcmp(f[0], "flush") == 0){
 		qlock(arp);
 		for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
 			memset(a->ip, 0, sizeof(a->ip));
 			memset(a->mac, 0, sizeof(a->mac));
 			a->hash = nil;
+			a->nextrxt = nil;
+			a->ifc = nil;
+			a->ifcid = 0;
 			a->state = 0;
+			a->rxtsrem = 0;
+			a->ctime = 0;
 			a->utime = 0;
-			while(a->hold != nil){
-				bp = a->hold->list;
-				freeblist(a->hold);
-				a->hold = bp;
-			}
+			freeblistchain(a->hold);
+			a->hold = a->last = nil;
 		}
 		memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+		freeblistchain(arp->dropf);
+		arp->dropf = arp->dropl = nil;
 		arp->rxmt = nil;
-		arp->dropf = nil;
-		arp->dropl = nil;
 		qunlock(arp);
 	} else if(strcmp(f[0], "add") == 0){
 		switch(n){
@@ -446,64 +370,53 @@
 		default:
 			error(Ebadarg);
 		case 3:
-			parseip(ip, f[1]);
-			if(isv4(ip))
-				r = v4lookup(fs, ip+IPv4off, nil);
-			else
-				r = v6lookup(fs, ip, nil);
-			if(r == nil)
-				error("Destination unreachable");
-			m = r->ifc->m;
-			n = parsemac(mac, f[2], m->maclen);
+			if(parseip(ip, f[1]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
 		case 4:
 			m = ipfindmedium(f[1]);
-			if(m == nil)
+			if(m == nil || m->maclen == 0)
 				error(Ebadarp);
-			parseip(ip, f[2]);
-			n = parsemac(mac, f[3], m->maclen);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
+		case 5:
+			m = ipfindmedium(f[1]);
+			if(m == nil || m->maclen == 0)
+				error(Ebadarp);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			if(parseip(ia, f[4]) == -1)
+				error(Ebadip);
+			break;
 		}
-
-		if(m->ares == nil)
-			error(Ebadarp);
-
-		m->ares(fs, V6, ip, mac, n, 0);
+		if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+			error("no interface");
+		rlock(ifc);
+		if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+			runlock(ifc);
+			error("destination unreachable");
+		}
+		runlock(ifc);
 	} else if(strcmp(f[0], "del") == 0){
-		if(n != 2)
+		if (n != 2)
 			error(Ebadarg);
-
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			error(Ebadip);
 		qlock(arp);
-
-		l = &arp->hash[haship(ip)];
-		for(a = *l; a; a = a->hash){
-			if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
-				*l = a->hash;
-				break;
-			}
-			l = &a->hash;
+		for(a = arp->hash[haship(ip)]; a != nil; a = x){
+			x = a->hash;
+			if(ipcmp(ip, a->ip) == 0)
+				cleanarpent(arp, a);
 		}
-	
-		if(a){
-			/* take out of re-transmit chain */
-			l = &arp->rxmt;
-			for(fl = *l; fl; fl = fl->nextrxt){
-				if(fl == a){
-					*l = a->nextrxt;
-					break;
-				}
-				l = &fl->nextrxt;
-			}
-
-			a->nextrxt = nil;
-			a->hash = nil;
-			a->hold = nil;
-			a->last = nil;
-			a->ifc = nil;
-			memset(a->ip, 0, sizeof(a->ip));
-			memset(a->mac, 0, sizeof(a->mac));
-		}
 		qunlock(arp);
 	} else
 		error(Ebadarp);
@@ -511,13 +424,6 @@
 	return len;
 }
 
-enum
-{
-	Alinelen=	90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
 static void
 convmac(char *p, uchar *mac, int n)
 {
@@ -526,136 +432,136 @@
 }
 
 int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
 {
+	char mac[2*MAClen+1], *state, *mname, *p;
+	uchar ip[IPaddrlen], ia[IPaddrlen];
+	Ipifc *ifc;
 	Arpent *a;
-	int n;
-	char mac[2*MAClen+1];
+	long n, o;
 
-	if(offset % Alinelen)
-		return 0;
-
-	offset = offset/Alinelen;
-	len = len/Alinelen;
-
-	n = 0;
+	p = s;
+	o = -offset;
 	for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
-		if(a->state == 0)
+		if(a->state == 0 || (ifc = a->ifc) == nil)
 			continue;
-		if(offset > 0){
-			offset--;
+
+		rlock(ifc);
+		qlock(arp);
+		state = arpstate[a->state];
+		ipmove(ip, a->ip);
+		if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+			qunlock(arp);
+			runlock(ifc);
 			continue;
 		}
-		len--;
-		qlock(arp);
-		convmac(mac, a->mac, a->type->maclen);
-		n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+		mname = ifc->m->name;
+		convmac(mac, a->mac, ifc->m->maclen);
 		qunlock(arp);
+		runlock(ifc);
+
+		n = snprint(up->genbuf, sizeof up->genbuf,
+			"%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+			mname, state, ip, mac, ia);
+		o += n;
+		if(o <= 0)
+			continue;
+		if(n > len)
+			break;
+		memmove(p, up->genbuf, n);
+		len -= n;
+		p += n;
 	}
 
-	return n;
+	return p - s;
 }
 
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
 {
-	uint sflag;
-	Block *next, *xp;
-	Arpent *a, *b, **l;
-	Fs *f;
-	uchar ipsrc[IPaddrlen];
-	Ipifc *ifc = nil;
-	long nrxt;
+	uchar targ[IPaddrlen], src[IPaddrlen];
+	Arpent **l;
 
-	qlock(arp);
-	f = arp->f;
+	a->ctime = NOW;
+	if(a->rxtsrem == 0)
+		a->rxtsrem = MAX_MULTICAST_SOLICIT;
+	else
+		a->rxtsrem--;
 
-	a = arp->rxmt;
-	if(a==nil){
-		nrxt = 0;
-		goto dodrops; 		//return nrxt;
-	}
-	nrxt = a->rtime - NOW;
-	if(nrxt > 3*ReTransTimer/4) 
-		goto dodrops; 		//return nrxt;
+	/* put on end of re-transmit chain */
+	for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+		;
+	*l = a;
 
-	for(; a; a = a->nextrxt){
-		ifc = a->ifc;
-		assert(ifc != nil);
-		if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
-			xp = a->hold;
-			a->hold = nil;
+	if(l == &f->arp->rxmt)
+		wakeup(&f->arp->rxmtq);
 
-			if(xp){
-				if(arp->dropl == nil) 
-					arp->dropf = xp;
-				else
-					arp->dropl->list = xp;
-			}
+	/* try to use source address of original packet */
+	ipmove(targ, a->ip);
+	if(a->last != nil){
+		ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+		arprelease(f->arp, a);
 
-			cleanarpent(arp, a);
-		}
-		else
-			break;
+		if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+			goto send;
+	} else {
+		arprelease(f->arp, a);
 	}
-	if(a == nil)
-		goto dodrops;
+	if(!ipv6local(ifc, src, 0, targ))
+		return;
+send:
+	if(!waserror()){
+		icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+		poperror();
+	}
+}
 
+static void
+rxmitsols(Arp *arp)
+{
+	Block *next, *bp;
+	Arpent *a;
+	Ipifc *ifc;
+	Route *r;
 
-	qunlock(arp);	/* for icmpns */
-	if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
-		icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
-
-	runlock(ifc);
-	qlock(arp);	
-
-	/* put to the end of re-transmit chain */
-	l = &arp->rxmt;
-	for(b = *l; b; b = b->nextrxt){
-		if(b == a){
-			*l = a->nextrxt;
-			break;
+	qlock(arp);
+	while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+		if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+			if(a->ifcid == ifc->ifcid){
+				ndpsendsol(arp->f, ifc, a);	/* unlocks arp */
+				runlock(ifc);
+				qlock(arp);
+				continue;
+			}
+			runlock(ifc);
 		}
-		l = &b->nextrxt;
+		cleanarpent(arp, a);
 	}
-	for(b = *l; b; b = b->nextrxt){
-		l = &b->nextrxt;
-	}
-	*l = a;
-	a->rxtsrem--;
-	a->nextrxt = nil;
-	a->rtime = NOW + ReTransTimer;
-
-	a = arp->rxmt;
-	if(a==nil)
-		nrxt = 0;
-	else 
-		nrxt = a->rtime - NOW;
-
-dodrops:
-	xp = arp->dropf;
-	arp->dropf = nil;
-	arp->dropl = nil;
+	bp = arp->dropf;
+	arp->dropf = arp->dropl = nil;
 	qunlock(arp);
 
-	for(; xp; xp = next){
-		next = xp->list;
-		icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+	for(; bp != nil; bp = next){
+		next = bp->list;
+		bp->list = nil;
+		r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+		if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+			if(!waserror()){
+				icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+				poperror();
+			}
+			runlock(ifc);
+		}
+		freeblist(bp);
 	}
-
-	return nrxt;
-
 }
 
 static int
 rxready(void *v)
 {
-	Arp *arp = (Arp *) v;
-	int x;
+	Arp *arp = (Arp *)v;
 
-	x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
-	return x;
+	return arp->rxmt != nil || arp->dropf != nil;
 }
 
 static void
@@ -662,20 +568,15 @@
 rxmitproc(void *v)
 {
 	Arp *arp = v;
-	long wakeupat;
 
 	arp->rxmitp = up;
-	//print("arp rxmitproc started\n");
 	if(waserror()){
-		arp->rxmitp = 0;
+		arp->rxmitp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;){
-		wakeupat = rxmitsols(arp);
-		if(wakeupat == 0) 
-			sleep(&arp->rxmtq, rxready, v); 
-		else if(wakeupat > ReTransTimer/4) 
-			tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+		sleep(&arp->rxmtq, rxready, v);
+		rxmitsols(arp);
+		tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
 	}
 }
-
--- /dev/null
+++ b/os/ip/chandial.c
@@ -1,0 +1,126 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+#include	"../ip/ip.h"
+
+typedef struct DS DS;
+static Chan*	call(char*, char*, DS*);
+static void	_dial_string_parse(char*, DS*);
+
+enum
+{
+	Maxstring=	128,
+};
+
+struct DS
+{
+	char	buf[Maxstring];			/* dist string */
+	char	*netdir;
+	char	*proto;
+	char	*rem;
+	char	*local;				/* other args */
+	char	*dir;
+	Chan	**ctlp;
+};
+
+/*
+ *  the dialstring is of the form '[/net/]proto!dest'
+ */
+Chan*
+chandial(char *dest, char *local, char *dir, Chan **ctlp)
+{
+	DS ds;
+	char clone[Maxpath];
+
+	ds.local = local;
+	ds.dir = dir;
+	ds.ctlp = ctlp;
+
+	_dial_string_parse(dest, &ds);
+	if(ds.netdir == nil)
+		ds.netdir = "/net";
+
+	/* no connection server, don't translate */
+	snprint(clone, sizeof(clone), "%s/%s/clone", ds.netdir, ds.proto);
+	return call(clone, ds.rem, &ds);
+}
+
+static Chan*
+call(char *clone, char *dest, DS *ds)
+{
+	int n;
+	Chan *dchan, *cchan;
+	char name[Maxpath], data[Maxpath], *p;
+
+	cchan = namec(clone, Aopen, ORDWR, 0);
+
+	/* get directory name */
+	if(waserror()){
+		cclose(cchan);
+		nexterror();
+	}
+	n = devtab[cchan->type]->read(cchan, name, sizeof(name)-1, 0);
+	name[n] = 0;
+	for(p = name; *p == ' '; p++)
+		;
+	sprint(name, "%lud", strtoul(p, 0, 0));
+	p = strrchr(clone, '/');
+	*p = 0;
+	if(ds->dir)
+		snprint(ds->dir, Maxpath, "%s/%s", clone, name);
+	snprint(data, sizeof(data), "%s/%s/data", clone, name);
+
+	/* connect */
+	if(ds->local)
+		snprint(name, sizeof(name), "connect %s %s", dest, ds->local);
+	else
+		snprint(name, sizeof(name), "connect %s", dest);
+	devtab[cchan->type]->write(cchan, name, strlen(name), 0);
+
+	/* open data connection */
+	dchan = namec(data, Aopen, ORDWR, 0);
+	if(ds->ctlp)
+		*ds->ctlp = cchan;
+	else
+		cclose(cchan);
+	poperror();
+	return dchan;
+
+}
+
+/*
+ *  parse a dial string
+ */
+static void
+_dial_string_parse(char *str, DS *ds)
+{
+	char *p, *p2;
+
+	strncpy(ds->buf, str, Maxstring);
+	ds->buf[Maxstring-1] = 0;
+
+	p = strchr(ds->buf, '!');
+	if(p == 0) {
+		ds->netdir = 0;
+		ds->proto = "net";
+		ds->rem = ds->buf;
+	} else {
+		if(*ds->buf != '/' && *ds->buf != '#'){
+			ds->netdir = nil;
+			ds->proto = ds->buf;
+		} else {
+			for(p2 = p; *p2 != '/' && p2 != ds->buf; p2--)
+				;
+			if(p2 == ds->buf)
+				error(Ebadarg);
+			*p2++ = 0;
+			ds->netdir = ds->buf;
+			ds->proto = p2;
+		}
+		*p = 0;
+		ds->rem = p + 1;
+	}
+}
--- a/os/ip/devip.c
+++ b/os/ip/devip.c
@@ -14,7 +14,6 @@
 	Qbootp,
 	Qndb,
 	Qiproute,
-	Qiprouter,
 	Qipselftab,
 	Qlog,
 
@@ -43,11 +42,11 @@
 	Maskproto=	(1<<Logproto)-1,
 	Shiftproto=	Logtype + Logconv,
 
-	Nfs=		32,
+	Nfs=		128,
 };
-#define TYPE(x) 	( ((u32)(x).path) & Masktype )
-#define CONV(x) 	( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) 	( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) 	( ((ulong)(x).path) & Masktype )
+#define CONV(x) 	( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((ulong)(x).path) >> Shiftproto) & Maskproto )
 #define QID(p, c, y) 	( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
 
 static char network[] = "network";
@@ -58,8 +57,7 @@
 
 extern	void nullmediumlink(void);
 extern	void pktmediumlink(void);
-static	long ndbwrite(Fs*, char*, ulong, int);
-extern void    closeconv(Conv*);
+	long ndbwrite(Fs *f, char *a, ulong off, int n);
 
 static int
 ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
 		mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
 		devdir(c, q, "stats", 0, network, 0444, dp);
 		return 1;
-	}	
+	}
 	return -1;
 }
 
@@ -144,11 +142,10 @@
 		return -1;
 	case Qarp:
 		p = "arp";
+		prot = 0664;
 		break;
 	case Qbootp:
 		p = "bootp";
-		if(bootp == nil)
-			return 0;
 		break;
 	case Qndb:
 		p = "ndb";
@@ -157,14 +154,12 @@
 		break;
 	case Qiproute:
 		p = "iproute";
+		prot = 0664;
 		break;
 	case Qipselftab:
 		p = "ipselftab";
 		prot = 0444;
 		break;
-	case Qiprouter:
-		p = "iprouter";
-		break;
 	case Qlog:
 		p = "log";
 		break;
@@ -188,7 +183,7 @@
 	case Qtopdir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
@@ -206,19 +201,18 @@
 	case Qndb:
 	case Qlog:
 	case Qiproute:
-	case Qiprouter:
 	case Qipselftab:
 		return ip1gen(c, TYPE(c->qid), dp);
 	case Qprotodir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
 		if(s < f->p[PROTO(c->qid)]->ac) {
 			cv = f->p[PROTO(c->qid)]->conv[s];
-			sprint(up->genbuf, "%d", s);
+			snprint(up->genbuf, sizeof up->genbuf, "%d", s);
 			mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
 			devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
 			return 1;
@@ -262,45 +256,14 @@
 	fmtinstall('M', eipfmt);
 }
 
-static Fs*
-ipgetfs(int dev)
-{
-	extern void (*ipprotoinit[])(Fs*);
-	Fs *f;
-	int i;
-
-	if(dev >= Nfs)
-		return nil;
-
-	qlock(&fslock);
-	if(ipfs[dev] == nil){
-		f = smalloc(sizeof(Fs));
-		ip_init(f);
-		arpinit(f);
-		netloginit(f);
-		for(i = 0; ipprotoinit[i]; i++)
-			ipprotoinit[i](f);
-		f->dev = dev;
-		ipfs[dev] = f;
-	}
-	qunlock(&fslock);
-
-	return ipfs[dev];
-}
-
 IPaux*
 newipaux(char *owner, char *tag)
 {
 	IPaux *a;
-	int n;
 
 	a = smalloc(sizeof(*a));
 	kstrdup(&a->owner, owner);
-	memset(a->tag, ' ', sizeof(a->tag));
-	n = strlen(tag);
-	if(n > sizeof(a->tag))
-		n = sizeof(a->tag);
-	memmove(a->tag, tag, n);
+	strncpy(a->tag, tag, sizeof(a->tag));
 	return a;
 }
 
@@ -310,13 +273,29 @@
 ipattach(char* spec)
 {
 	Chan *c;
-	int dev;
+	ulong dev;
 
-	dev = atoi(spec);
+	dev = strtoul(spec, nil, 10);
 	if(dev >= Nfs)
-		error("bad specification");
+		error(Enodev);
 
-	ipgetfs(dev);
+	qlock(&fslock);
+	if(ipfs[dev] == nil){
+		extern void (*ipprotoinit[])(Fs*);
+		Fs *f;
+		int i;
+
+		f = smalloc(sizeof(Fs));
+		ip_init(f);
+		arpinit(f);
+		netloginit(f);
+		for(i = 0; ipprotoinit[i]; i++)
+			ipprotoinit[i](f);
+		f->dev = dev;
+		ipfs[dev] = f;
+	}
+	qunlock(&fslock);
+
 	c = devattach('I', spec);
 	mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
 	c->dev = dev;
@@ -327,7 +306,7 @@
 }
 
 static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
 {
 	IPaux *a = c->aux;
 	Walkqid* w;
@@ -338,8 +317,9 @@
 	return w;
 }
 
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
 {
 	return devstat(c, db, n, nil, 0, ipgen);
 }
@@ -360,7 +340,7 @@
 };
 
 static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
 {
 	Conv *cv, *nc;
 	Proto *p;
@@ -375,7 +355,7 @@
 	default:
 		break;
 	case Qndb:
-		if(omode & (OWRITE|OTRUNC) && !iseve())
+		if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
 			error(Eperm);
 		if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
 			f->ndb[0] = 0;
@@ -383,10 +363,10 @@
 	case Qlog:
 		netlogopen(f);
 		break;
-	case Qiprouter:
-		iprouteropen(f);
-		break;
 	case Qiproute:
+	case Qarp:
+		if(omode != OREAD && !iseve())
+			error(Eperm);
 		break;
 	case Qtopdir:
 	case Qprotodir:
@@ -412,13 +392,8 @@
 	case Qclone:
 		p = f->p[PROTO(c->qid)];
 		qlock(p);
-		if(waserror()){
-			qunlock(p);
-			nexterror();
-		}
 		cv = Fsprotoclone(p, ATTACHER(c));
 		qunlock(p);
-		poperror();
 		if(cv == nil) {
 			error(Enodev);
 			break;
@@ -437,15 +412,12 @@
 			qunlock(p);
 			nexterror();
 		}
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
-		}
-		cv->inuse++;
-		if(cv->inuse == 1){
+		if(++cv->inuse == 1){
 			kstrdup(&cv->owner, ATTACHER(c));
 			cv->perm = 0660;
 		}
@@ -455,24 +427,26 @@
 		break;
 	case Qlisten:
 		cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
-
+		qlock(cv);
+		if(waserror()){
+			qunlock(cv);
+			nexterror();
 		}
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
 		if(cv->state != Announced)
 			error("not announced");
 
+		cv->inuse++;
+		qunlock(cv);
+		poperror();
 		if(waserror()){
 			closeconv(cv);
 			nexterror();
 		}
-		qlock(cv);
-		cv->inuse++;
-		qunlock(cv);
 
 		nc = nil;
 		while(nc == nil) {
@@ -494,7 +468,6 @@
 			if(nc != nil){
 				cv->incall = nc->next;
 				mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
-				kstrdup(&cv->owner, ATTACHER(c));
 			}
 			qunlock(cv);
 
@@ -511,13 +484,25 @@
 	return c;
 }
 
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
 {
-	Dir *d;
+	error(Eperm);
+	return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+	error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+	Dir *dir;
 	Conv *cv;
 	Fs *f;
-	Proto *p;
 
 	f = ipfs[c->dev];
 	switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
 		break;
 	}
 
-	d = smalloc(sizeof(*d)+n);
+	dir = smalloc(sizeof(Dir)+n);
 	if(waserror()){
-		free(d);
+		free(dir);
 		nexterror();
 	}
-	n = convM2D(dp, n, d, (char*)&d[1]);
+	n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
 	if(n == 0)
 		error(Eshortstat);
-	p = f->p[PROTO(c->qid)];
-	cv = p->conv[CONV(c->qid)];
-	if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+	cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+	qlock(cv);
+	if(waserror()){
+		qunlock(cv);
+		nexterror();
+	}
+	if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
 		error(Eperm);
-	if(!emptystr(d->uid))
-		kstrdup(&cv->owner, d->uid);
-	if(d->mode != ~0UL)
-		cv->perm = d->mode & 0777;
+	if(!emptystr(dir->uid)){
+		if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+			error(Eperm);
+		kstrdup(&cv->owner, dir->uid);
+	}
+	if(dir->mode != ~0UL)
+		cv->perm = dir->mode & 0666;
+	qunlock(cv);
 	poperror();
-	free(d);
+
+	free(dir);
+	poperror();
+
 	return n;
 }
 
-extern void
+void
 closeconv(Conv *cv)
 {
 	Conv *nc;
@@ -564,7 +561,7 @@
 	}
 
 	/* close all incoming calls since no listen will ever happen */
-	for(nc = cv->incall; nc; nc = cv->incall){
+	for(nc = cv->incall; nc != nil; nc = cv->incall){
 		cv->incall = nc->next;
 		closeconv(nc);
 	}
@@ -576,9 +573,9 @@
 	while((mp = cv->multi) != nil)
 		ipifcremmulti(cv, mp->ma, mp->ia);
 
-	cv->r = nil;
-	cv->rgen = 0;
-	cv->p->close(cv);
+	if(cv->p->close != nil)
+		(*cv->p->close)(cv);
+
 	cv->state = Idle;
 	qunlock(cv);
 }
@@ -596,10 +593,6 @@
 		if(c->flag & COPEN)
 			netlogclose(f);
 		break;
-	case Qiprouter:
-		if(c->flag & COPEN)
-			iprouterclose(f);
-		break;
 	case Qdata:
 	case Qctl:
 	case Qerr:
@@ -620,13 +613,13 @@
 	Statelen=	32*1024,
 };
 
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
 	char *buf, *p;
-	s32 rv;
+	long rv;
 	Fs *f;
 	ulong offset = off;
 
@@ -648,21 +641,22 @@
 		return readstr(offset, a, n, f->ndb);
 	case Qiproute:
 		return routeread(f, a, offset, n);
-	case Qiprouter:
-		return iprouterread(f, a, n);
 	case Qipselftab:
 		return ipselftabread(f, a, offset, n);
 	case Qlog:
 		return netlogread(f, a, offset, n);
 	case Qctl:
-		sprint(up->genbuf, "%ud", CONV(ch->qid));
-		return readstr(offset, p, n, up->genbuf);
+		buf = smalloc(16);
+		snprint(buf, 16, "%lud", CONV(ch->qid));
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
 	case Qremote:
 		buf = smalloc(Statelen);
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->remote == nil) {
-			sprint(buf, "%I!%d\n", c->raddr, c->rport);
+			snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
 		} else {
 			(*x->remote)(c, buf, Statelen-2);
 		}
@@ -674,7 +668,7 @@
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->local == nil) {
-			sprint(buf, "%I!%d\n", c->laddr, c->lport);
+			snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
 		} else {
 			(*x->local)(c, buf, Statelen-2);
 		}
@@ -711,7 +705,7 @@
 }
 
 static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
 {
 	Conv *c;
 	Proto *x;
@@ -740,7 +734,7 @@
 /*
  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
  */
-static char*
+char*
 setluniqueport(Conv* c, int lport)
 {
 	Proto *p;
@@ -771,51 +765,63 @@
 }
 
 /*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+	int x;
+
+	for(x = 0; x < p->nc && p->conv[x]; x++)
+		if(p->conv[x]->lport == lport)
+			return 1;
+	return 0;
+}
+
+/*
  *  pick a local port and set it
  */
-extern void
+char *
 setlport(Conv* c)
 {
 	Proto *p;
-	ushort *pp;
-	int x, found;
+	int i, port;
 
 	p = c->p;
-	if(c->restricted)
-		pp = &p->nextrport;
-	else
-		pp = &p->nextport;
 	qlock(p);
-	for(;;(*pp)++){
+	if(c->restricted){
+		/* Restricted ports cycle between 600 and 1024. */
+		for(i=0; i<1024-600; i++){
+			if(p->nextrport >= 1024 || p->nextrport < 600)
+				p->nextrport = 600;
+			port = p->nextrport++;
+			if(!lportinuse(p, port))
+				goto chosen;
+		}
+	}else{
 		/*
-		 * Fsproto initialises p->nextport to 0 and the restricted
-		 * ports (p->nextrport) to 600.
-		 * Restricted ports must lie between 600 and 1024.
-		 * For the initial condition or if the unrestricted port number
-		 * has wrapped round, select a random port between 5000 and 1<<15
-		 * to start at.
+		 * Unrestricted ports are chosen randomly
+		 * between 2^15 and 2^16.  There are at most
+		 * 4*Nchan = 4096 ports in use at any given time,
+		 * so even in the worst case, a random probe has a
+		 * 1 - 4096/2^15 = 87% chance of success.
+		 * If 64 successive probes fail, there is a bug somewhere
+		 * (or a once in 10^58 event has happened, but that's
+		 * less likely than a venti collision).
 		 */
-		if(c->restricted){
-			if(*pp >= 1024)
-				*pp = 600;
+		for(i=0; i<64; i++){
+			port = (1<<15) + nrand(1<<15);
+			if(!lportinuse(p, port))
+				goto chosen;
 		}
-		else while(*pp < 5000)
-			*pp = nrand(1<<15);
-
-		found = 0;
-		for(x = 0; x < p->nc; x++){
-			if(p->conv[x] == nil)
-				break;
-			if(p->conv[x]->lport == *pp){
-				found = 1;
-				break;
-			}
-		}
-		if(found == 0)
-			break;
 	}
-	c->lport = (*pp)++;
 	qunlock(p);
+	return "no ports available";
+
+chosen:
+	c->lport = port;
+	qunlock(p);
+	return nil;
 }
 
 /*
@@ -822,7 +828,7 @@
  *  set a local address and port from a string of the form
  *	[address!]port[!r]
  */
-static char*
+char*
 setladdrport(Conv* c, char* str, int announcing)
 {
 	char *p;
@@ -830,8 +836,6 @@
 	ushort lport;
 	uchar addr[IPaddrlen];
 
-	rv = nil;
-
 	/*
 	 *  ignore restricted part if it exists.  it's
 	 *  meaningless on local ports.
@@ -854,8 +858,9 @@
 		if(strcmp(str, "*") == 0)
 			ipmove(c->laddr, IPnoaddr);
 		else {
-			parseip(addr, str);
-			if(ipforme(c->p->f, addr))
+			if(parseip(addr, str) == -1)
+				return Ebadip;
+			if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
 				ipmove(c->laddr, addr);
 			else
 				return "not a local IP address";
@@ -869,9 +874,13 @@
 		return setluniqueport(c, 0);
 	}
 
-	lport = atoi(p);
+	str = p;
+	lport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
 	if(lport <= 0)
-		setlport(c);
+		rv = setlport(c);
 	else
 		rv = setluniqueport(c, lport);
 	return rv;
@@ -886,13 +895,17 @@
 	if(p == nil)
 		return "malformed address";
 	*p++ = 0;
-	parseip(c->raddr, str);
-	c->rport = atoi(p);
-	p = strchr(p, '!');
-	if(p){
-		if(strstr(p, "!r") != nil)
-			c->restricted = 1;
-	}
+	if(parseip(c->raddr, str) == -1)
+		return Ebadip;
+
+	str = p;
+	c->rport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
+	if(strstr(p, "!r") != nil)
+		c->restricted = 1;
+
 	return nil;
 }
 
@@ -912,7 +925,9 @@
 		if(p != nil)
 			return p;
 		setladdr(c);
-		setlport(c);
+		p = setlport(c);
+		if (p != nil)
+			return p;
 		break;
 	case 3:
 		p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
 			return p;
 	}
 
-	if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-		memcmp(c->laddr, v4prefix, IPv4off) == 0)
-		|| ipcmp(c->raddr, IPnoaddr) == 0)
-		c->ipversion = V4;
-	else
-		c->ipversion = V6;
+	c->ipversion = convipvers(c);
 
 	return nil;
 }
@@ -978,10 +988,11 @@
 	c->rport = 0;
 	switch(argc){
 	default:
-		return "bad args to announce";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 1);
 	}
+	return "bad args to announce";
 }
 
 /*
@@ -1028,10 +1039,11 @@
 {
 	switch(argc){
 	default:
-		return "bad args to bind";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 0);
 	}
+	return "bad args to bind";
 }
 
 static void
@@ -1042,7 +1054,7 @@
 	if(x->bind == nil)
 		p = Fsstdbind(c, cb->f, cb->nf);
 	else
-		p = x->bind(c, cb->f, cb->nf);
+		p = (*x->bind)(c, cb->f, cb->nf);
 	if(p != nil)
 		error(p);
 }
@@ -1065,8 +1077,8 @@
 		c->ttl = atoi(cb->f[1]);
 }
 
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
@@ -1075,6 +1087,7 @@
 	uchar ia[IPaddrlen], ma[IPaddrlen];
 	Fs *f;
 	char *a;
+	ulong offset = off;
 
 	a = v;
 	f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
 		netlogctl(f, a, n);
 		return n;
 	case Qndb:
-		return ndbwrite(f, a, off, n);
+		return ndbwrite(f, a, offset, n);
+		break;
 	case Qctl:
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
 			if(cb->nf == 2){
 				if(!ipismulticast(c->raddr))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
+				if (parseip(ia, cb->f[1]) == -1)
+					error(Ebadip);
 				ipifcaddmulti(c, c->raddr, ia);
 			} else {
-				parseip(ma, cb->f[2]);
+				if (parseip(ia, cb->f[1]) == -1 ||
+				    parseip(ma, cb->f[2]) == -1)
+					error(Ebadip);
 				if(!ipismulticast(ma))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
 				ipifcaddmulti(c, ma, ia);
 			}
 		} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
 				error("remmulti needs interface address");
 			if(!ipismulticast(c->raddr))
 				error("remmulti for a non multicast address");
-			parseip(ia, cb->f[1]);
+			if (parseip(ia, cb->f[1]) == -1)
+				error(Ebadip);
 			ipifcremmulti(c, c->raddr, ia);
 		} else if(x->ctl != nil) {
-			p = x->ctl(c, cb->f, cb->nf);
+			p = (*x->ctl)(c, cb->f, cb->nf);
 			if(p != nil)
 				error(p);
 		} else
@@ -1160,13 +1177,12 @@
 	return n;
 }
 
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
 {
 	Conv *c;
 	Proto *x;
 	Fs *f;
-	int n;
 
 	switch(TYPE(ch->qid)){
 	case Qdata:
@@ -1177,11 +1193,7 @@
 		if(c->wq == nil)
 			error(Eperm);
 
-		if(bp->next)
-			bp = concatblock(bp);
-		n = BLEN(bp);
-		qbwrite(c->wq, bp);
-		return n;
+		return qbwrite(c->wq, bp);
 	default:
 		return devbwrite(ch, bp, offset);
 	}
@@ -1198,13 +1210,13 @@
 	ipwalk,
 	ipstat,
 	ipopen,
-	devcreate,
+	ipcreate,
 	ipclose,
 	ipread,
 	ipbread,
 	ipwrite,
 	ipbwrite,
-	devremove,
+	ipremove,
 	ipwstat,
 };
 
@@ -1224,12 +1236,15 @@
 
 	p->qid.type = QTDIR;
 	p->qid.path = QID(f->np, 0, Qprotodir);
+	if(p->nc > Maskconv+1){
+		print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+		p->nc = Maskconv+1;
+	}
 	p->conv = malloc(sizeof(Conv*)*(p->nc+1));
 	if(p->conv == nil)
 		panic("Fsproto");
 
 	p->x = f->np;
-	p->nextport = 0;
 	p->nextrport = 600;
 	f->p[f->np++] = p;
 
@@ -1262,21 +1277,33 @@
 		if(c == nil){
 			c = malloc(sizeof(Conv));
 			if(c == nil)
-				error(Enomem);
-			qlock(c);
+				return nil;
+			if(waserror()){
+				qfree(c->rq);
+				qfree(c->wq);
+				qfree(c->eq);
+				qfree(c->sq);
+				free(c->ptcl);
+				free(c);
+				return nil;
+			}
 			c->p = p;
 			c->x = pp - p->conv;
 			if(p->ptclsize != 0){
 				c->ptcl = malloc(p->ptclsize);
-				if(c->ptcl == nil) {
-					free(c);
+				if(c->ptcl == nil)
 					error(Enomem);
-				}
 			}
-			*pp = c;
-			p->ac++;
 			c->eq = qopen(1024, Qmsg, 0, 0);
+			if(c->eq == nil)
+				error(Enomem);
 			(*p->create)(c);
+			if(c->rq == nil || c->wq == nil)
+				error(Enomem);
+			poperror();
+			qlock(c);
+			*pp = c;
+			p->ac++;
 			break;
 		}
 		if(canqlock(c)){
@@ -1291,8 +1318,11 @@
 		}
 	}
 	if(pp >= ep) {
-		if(p->gc != nil && (*p->gc)(p))
-			goto retry;
+		if(p->gc != nil){
+			print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+			if((*p->gc)(p))
+				goto retry;
+		}
 		return nil;
 	}
 
@@ -1307,8 +1337,9 @@
 	c->lport = 0;
 	c->rport = 0;
 	c->restricted = 0;
+	c->ignoreadvice = 0;
 	c->ttl = MAXTTL;
-	c->tos = DFLTTOS;
+	c->tos = 0;
 	qreopen(c->rq);
 	qreopen(c->wq);
 	qreopen(c->eq);
@@ -1321,7 +1352,7 @@
 Fsconnected(Conv* c, char* msg)
 {
 	if(msg != nil && *msg != '\0')
-		kstrcpy(c->cerr, msg, sizeof(c->cerr));
+		strncpy(c->cerr, msg, ERRMAX-1);
 
 	switch(c->state){
 
@@ -1368,12 +1399,19 @@
 	for(l = &c->incall; *l; l = &(*l)->next)
 		i++;
 	if(i >= Maxincall) {
+		static int beenhere;
+
 		qunlock(c);
+		if (!beenhere) {
+			beenhere = 1;
+			print("Fsnewcall: incall queue full (%d) on port %d\n",
+				i, c->lport);
+		}
 		return nil;
 	}
 
 	/* find a free conversation */
-	nc = Fsprotoclone(c->p, network);
+	nc = Fsprotoclone(c->p, c->owner);
 	if(nc == nil) {
 		qunlock(c);
 		return nil;
@@ -1394,12 +1432,12 @@
 	return nc;
 }
 
-static long
+long
 ndbwrite(Fs *f, char *a, ulong off, int n)
 {
 	if(off > strlen(f->ndb))
 		error(Eio);
-	if(off+n >= sizeof(f->ndb)-1)
+	if(off+n >= sizeof(f->ndb))
 		error(Eio);
 	memmove(f->ndb+off, a, n);
 	f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
 ulong
 scalednconv(void)
 {
-	if(conf.npage*BY2PG >= 128*MB)
+	if(cpuserver && conf.npage*BY2PG >= 128*MB)
 		return Nchans*4;
 	return Nchans;
 }
--- a/os/ip/esp.c
+++ b/os/ip/esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ *	currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ *	transport mode (host-to-host)
+ */
 #include	"u.h"
 #include	"../port/lib.h"
 #include	"mem.h"
@@ -6,47 +14,79 @@
 #include	"../port/error.h"
 
 #include	"ip.h"
+#include	"ipv6.h"
+#include	<libsec.h>
 
-#include	"libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by)  ((by) * BI2BY)
 
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
 typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
 typedef struct Esptail Esptail;
 typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
 
-#define DPRINT if(0)print
+enum {
+	Encrypt,
+	Decrypt,
 
-enum
-{
-	IP_ESPPROTO	= 50,
-	EsphdrSize	= 28,	// includes IP header
-	IphdrSize	= 20,	// options have been striped
-	EsptailSize	= 2,	// does not include pad or auth data
-	UserhdrSize	= 4,	// user visable header size - if enabled
+	IP_ESPPROTO	= 50,	/* IP v4 and v6 protocol number */
+	Esp4hdrlen	= IP4HDR + 8,
+	Esp6hdrlen	= IP6HDR + 8,
+
+	Esptaillen	= 2,	/* does not include pad or auth data */
+	Userhdrlen	= 4,	/* user-visible header size - if enabled */
+
+	Desblk	 = BITS2BYTES(64),
+	Des3keysz = BITS2BYTES(192),
+
+	Aesblk	 = BITS2BYTES(128),
+	Aeskeysz = BITS2BYTES(128),
 };
 
 struct Esphdr
 {
-	/* ip header */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+	uchar	payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ *	 enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ *	orig IP hdrs | ESP hdr |
+ *			enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+	/* ipv4 header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	espproto;	/* Protocol */
 	uchar	espplen[2];	/* Header plus data length */
 	uchar	espsrc[4];	/* Ip source */
 	uchar	espdst[4];	/* Ip destination */
 
-	/* esp header */
-	uchar	espspi[4];	/* Security parameter index */
-	uchar	espseq[4];	/* Sequence number */
+	Esphdr;
 };
 
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+	IPV6HDR;
+	Esphdr;
+};
+
 struct Esptail
 {
 	uchar	pad;
@@ -53,16 +93,28 @@
 	uchar	nexthdr;
 };
 
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+	ulong	version;
+	ulong	iphdrlen;
+	ulong	hdrlen;		/* iphdrlen + esp hdr len */
+	ulong	spi;
+	uchar	laddr[IPaddrlen];
+	uchar	raddr[IPaddrlen];
+};
+
 /* header as seen by the user */
 struct Userhdr
 {
-	uchar	nexthdr;	// next protocol
+	uchar	nexthdr;	/* next protocol */
 	uchar	unused[3];
 };
 
 struct Esppriv
 {
-	ulong	in;
+	uvlong	in;
 	ulong	inerrors;
 };
 
@@ -72,77 +124,68 @@
 struct Espcb
 {
 	int	incoming;
-	int	header;		// user user level header
+	int	header;		/* user-level header */
 	ulong	spi;
-	ulong	seq;		// last seq sent
-	ulong	window;		// for replay attacks
+	ulong	seq;		/* last seq sent */
+	ulong	window;		/* for replay attacks */
+
 	char	*espalg;
-	void	*espstate;	// other state for esp
-	int	espivlen;	// in bytes
+	void	*espstate;	/* other state for esp */
+	int	espivlen;	/* in bytes */
 	int	espblklen;
 	int	(*cipher)(Espcb*, uchar *buf, int len);
+
 	char	*ahalg;
-	void	*ahstate;	// other state for esp
-	int	ahlen;		// auth data length in bytes
+	void	*ahstate;	/* other state for esp */
+	int	ahlen;		/* auth data length in bytes */
 	int	ahblklen;
 	int	(*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+	DigestState *ds;
 };
 
 struct Algorithm
 {
 	char 	*name;
-	int	keylen;		// in bits
-	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+	int	keylen;		/* in bits */
+	void	(*init)(Espcb*, char* name, uchar *key, unsigned keylen);
 };
 
-
-enum {
-	RC4forward	= 10*1024*1024,	// maximum skip forward
-	RC4back = 100*1024,		// maximum look back
-};
-
-struct Esprc4
-{
-	ulong cseq;	// current byte sequence number
-	RC4state current;
-
-	int ovalid;	// old is valid
-	ulong lgseq; // last good sequence
-	ulong oseq;	// old byte sequence number
-	RC4state old;
-};
-
 static	Conv* convlookup(Proto *esp, ulong spi);
 static	char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static	void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
 static	void espkick(void *x);
 
+static	void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static	void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
 static Algorithm espalg[] =
 {
-	"null",			0,	nullespinit,
-	"des_56_cbc",		64,	desespinit,
-	"rc4_128",		128,	rc4espinit,
-	nil,			0,	nil,
+	"null",		0,	nullespinit,
+	"des3_cbc",	192,	des3espinit,	/* new rfc2451, des-ede3 */
+	"aes_128_cbc",	128,	aescbcespinit,	/* new rfc3602 */
+	"aes_ctr",	128,	aesctrespinit,	/* new rfc3686 */
+	"des_56_cbc",	64,	desespinit,	/* rfc2405, deprecated */
+	nil,		0,	nil,
 };
 
 static Algorithm ahalg[] =
 {
-	"null",			0,	nullahinit,
-	"hmac_sha1_96",		128,	shaahinit,
-	"hmac_md5_96",		128,	md5ahinit,
-	nil,			0,	nil,
+	"null",		0,	nullahinit,
+	"hmac_sha1_96",	128,	shaahinit,	/* rfc2404 */
+	"hmac_md5_96",	128,	md5ahinit,	/* rfc2403 */
+	nil,		0,	nil,
 };
 
 static char*
 espconnect(Conv *c, char **argv, int argc)
 {
-	char *p, *pp;
-	char *e = nil;
+	char *p, *pp, *e = nil;
 	ulong spi;
 	Espcb *ecb = (Espcb*)c->ptcl;
 
@@ -157,7 +200,10 @@
 			break;
 		}
 		*p++ = 0;
-		parseip(c->raddr, argv[1]);
+		if (parseip(c->raddr, argv[1]) == -1) {
+			e = Ebadip;
+			break;
+		}
 		findlocalip(c->p->f, c->laddr, c->raddr);
 		ecb->incoming = 0;
 		ecb->seq = 0;
@@ -215,26 +261,86 @@
 	ipmove(c->raddr, IPnoaddr);
 
 	ecb = (Espcb*)c->ptcl;
-	free(ecb->espstate);
-	free(ecb->ahstate);
+	secfree(ecb->espstate);
+	secfree(ecb->ahstate);
 	memset(ecb, 0, sizeof(Espcb));
 }
 
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+	if (*bpp == nil || BLEN(*bpp) == 0) {
+		/* get enough to identify the IP version */
+		*bpp = pullupblock(*bpp, IP4HDR);
+		if(*bpp == nil) {
+			netlog(f, Logesp, "esp: short packet\n");
+			return 0;
+		}
+	}
+	return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
 static void
+getverslens(int version, Versdep *vp)
+{
+	vp->version = version;
+	switch(vp->version) {
+	case V4:
+		vp->iphdrlen = IP4HDR;
+		vp->hdrlen   = Esp4hdrlen;
+		break;
+	case V6:
+		vp->iphdrlen = IP6HDR;
+		vp->hdrlen   = Esp6hdrlen;
+		break;
+	default:
+		panic("esp: getverslens version %d wrong", version);
+	}
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+
+	switch(vp->version) {
+	case V4:
+		eh4 = (Esp4hdr*)pkt;
+		v4tov6(vp->raddr, eh4->espsrc);
+		v4tov6(vp->laddr, eh4->espdst);
+		vp->spi = nhgetl(eh4->espspi);
+		break;
+	case V6:
+		eh6 = (Esp6hdr*)pkt;
+		ipmove(vp->raddr, eh6->src);
+		ipmove(vp->laddr, eh6->dst);
+		vp->spi = nhgetl(eh6->espspi);
+		break;
+	default:
+		panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+	}
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
 espkick(void *x)
 {
+	int nexthdr, payload, pad, align;
+	uchar *auth;
+	Block *bp;
 	Conv *c = x;
-	Esphdr *eh;
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Espcb *ecb;
 	Esptail *et;
 	Userhdr *uh;
-	Espcb *ecb;
-	Block *bp;
-	int nexthdr;
-	int payload;
-	int pad;
-	int align;
-	uchar *auth;
+	Versdep vers;
 
+	getverslens(convipvers(c), &vers);
 	bp = qget(c->wq);
 	if(bp == nil)
 		return;
@@ -244,7 +350,7 @@
 
 	if(ecb->header) {
 		/* make sure the message has a User header */
-		bp = pullupblock(bp, UserhdrSize);
+		bp = pullupblock(bp, Userhdrlen);
 		if(bp == nil) {
 			qunlock(c);
 			return;
@@ -251,15 +357,16 @@
 		}
 		uh = (Userhdr*)bp->rp;
 		nexthdr = uh->nexthdr;
-		bp->rp += UserhdrSize;
+		bp->rp += Userhdrlen;
 	} else {
-		nexthdr = 0;  // what should this be?
+		nexthdr = 0;	/* what should this be? */
 	}
 
 	payload = BLEN(bp) + ecb->espivlen;
 
 	/* Make space to fit ip header */
-	bp = padblock(bp, EsphdrSize + ecb->espivlen);
+	bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+	getpktspiaddrs(bp->rp, &vers);
 
 	align = 4;
 	if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
 		align = ecb->espblklen;
 	if(align % ecb->ahblklen != 0)
 		panic("espkick: ahblklen is important after all");
-	pad = (align-1) - (payload + EsptailSize-1)%align;
+	pad = (align-1) - (payload + Esptaillen-1)%align;
 
 	/*
 	 * Make space for tail
@@ -273,70 +380,88 @@
 	 * this is done by calling padblock with a negative size
 	 * Padblock does not change bp->wp!
 	 */
-	bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
-	bp->wp += pad+EsptailSize+ecb->ahlen;
+	bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+	bp->wp += pad+Esptaillen+ecb->ahlen;
 
-	eh = (Esphdr *)(bp->rp);
-	et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
 
-	// fill in tail
+	/* fill in tail */
 	et->pad = pad;
 	et->nexthdr = nexthdr;
 
-	ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
-	auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+	/* encrypt the payload */
+	ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+	auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
 
-	// fill in head
-	eh->vihl = IP_VER4;
-	hnputl(eh->espspi, ecb->spi);
-	hnputl(eh->espseq, ++ecb->seq);
-	v6tov4(eh->espsrc, c->laddr);
-	v6tov4(eh->espdst, c->raddr);
-	eh->espproto = IP_ESPPROTO;
-	eh->frag[0] = 0;
-	eh->frag[1] = 0;
+	/* fill in head; construct a new IP header and an ESP header */
+	if (vers.version == V4) {
+		eh4 = (Esp4hdr *)bp->rp;
+		eh4->vihl = IP_VER4;
+		v6tov4(eh4->espsrc, c->laddr);
+		v6tov4(eh4->espdst, c->raddr);
+		eh4->espproto = IP_ESPPROTO;
+		eh4->frag[0] = 0;
+		eh4->frag[1] = 0;
 
-	ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+		hnputl(eh4->espspi, ecb->spi);
+		hnputl(eh4->espseq, ++ecb->seq);
+	} else {
+		eh6 = (Esp6hdr *)bp->rp;
+		eh6->vcf[0] = IP_VER6;
+		ipmove(eh6->src, c->laddr);
+		ipmove(eh6->dst, c->raddr);
+		eh6->proto = IP_ESPPROTO;
 
+		hnputl(eh6->espspi, ecb->spi);
+		hnputl(eh6->espseq, ++ecb->seq);
+	}
+
+	/* compute secure hash */
+	ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+		payload + pad + Esptaillen, auth);
+
 	qunlock(c);
-	//print("esp: pass down: %uld\n", BLEN(bp));
-	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	/* print("esp: pass down: %uld\n", BLEN(bp)); */
+	if (vers.version == V4)
+		ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	else
+		ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
 }
 
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
 void
 espiput(Proto *esp, Ipifc*, Block *bp)
 {
-	Esphdr *eh;
-	Esptail *et;
-	Userhdr *uh;
+	int payload, nexthdr;
+	uchar *auth, *espspi;
 	Conv *c;
 	Espcb *ecb;
-	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	Esptail *et;
 	Fs *f;
-	uchar *auth;
-	ulong spi;
-	int payload, nexthdr;
+	Userhdr *uh;
+	Versdep vers;
 
 	f = esp->f;
 
-	bp = pullupblock(bp, EsphdrSize+EsptailSize);
+	getverslens(pktipvers(f, &bp), &vers);
+
+	bp = pullupblock(bp, vers.hdrlen + Esptaillen);
 	if(bp == nil) {
 		netlog(f, Logesp, "esp: short packet\n");
 		return;
 	}
+	getpktspiaddrs(bp->rp, &vers);
 
-	eh = (Esphdr*)(bp->rp);
-	spi = nhgetl(eh->espspi);
-	v4tov6(raddr, eh->espsrc);
-	v4tov6(laddr, eh->espdst);
-
 	qlock(esp);
 	/* Look for a conversation structure for this port */
-	c = convlookup(esp, spi);
+	c = convlookup(esp, vers.spi);
 	if(c == nil) {
 		qunlock(esp);
-		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		icmpnoconv(f, bp);
 		freeblist(bp);
 		return;
@@ -346,76 +471,83 @@
 	qunlock(esp);
 
 	ecb = c->ptcl;
-	// too hard to do decryption/authentication on block lists
-	if(bp->next)
+	/* too hard to do decryption/authentication on block lists */
+	if(bp->next != nil)
 		bp = concatblock(bp);
 
-	if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+	if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	eh = (Esphdr*)(bp->rp);
 	auth = bp->wp - ecb->ahlen;
-	if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+	espspi = vers.version == V4?	((Esp4hdr*)bp->rp)->espspi:
+					((Esp6hdr*)bp->rp)->espspi;
+
+	/* compute secure hash and authenticate */
+	if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
 		qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
-			laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
-	if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+	payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+	if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
-			laddr, spi, payload, BLEN(bp));
+		netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+			vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
 		freeb(bp);
 		return;
 	}
-	if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+	/* decrypt payload */
+	if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
 		qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
-			laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+			vers.raddr, vers.laddr, vers.spi, up->errstr);
 		freeb(bp);
 		return;
 	}
 
-	payload -= EsptailSize;
-	et = (Esptail*)(bp->rp + EsphdrSize + payload);
+	payload -= Esptaillen;
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload);
 	payload -= et->pad + ecb->espivlen;
 	nexthdr = et->nexthdr;
 	if(payload <= 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+			vers.raddr, vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	// trim packet
-	bp->rp += EsphdrSize + ecb->espivlen;
+	/* trim packet */
+	bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
 	bp->wp = bp->rp + payload;
 	if(ecb->header) {
-		// assume UserhdrSize < EsphdrSize
-		bp->rp -= UserhdrSize;
+		/* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+		bp->rp -= Userhdrlen;
 		uh = (Userhdr*)bp->rp;
-		memset(uh, 0, UserhdrSize);
+		memset(uh, 0, Userhdrlen);
 		uh->nexthdr = nexthdr;
 	}
 
+	/* ingress filtering here? */
+
 	if(qfull(c->rq)){
-		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeblist(bp);
 	}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
-		qpass(c->rq, bp);
+//		print("esp: pass up: %uld\n", BLEN(bp));
+		qpass(c->rq, bp);	/* pass packet up the read queue */
 	}
 
 	qunlock(c);
@@ -440,19 +572,19 @@
 	return e;
 }
 
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
 void
 espadvise(Proto *esp, Block *bp, char *msg)
 {
-	Esphdr *h;
 	Conv *c;
-	ulong spi;
+	Versdep vers;
 
-	h = (Esphdr*)(bp->rp);
+	getverslens(pktipvers(esp->f, &bp), &vers);
+	getpktspiaddrs(bp->rp, &vers);
 
-	spi = nhgets(h->espspi);
 	qlock(esp);
-	c = convlookup(esp, spi);
-	if(c != nil) {
+	c = convlookup(esp, vers.spi);
+	if(c != nil && !c->ignoreadvice) {
 		qhangup(c->rq, msg);
 		qhangup(c->wq, msg);
 	}
@@ -466,7 +598,7 @@
 	Esppriv *upriv;
 
 	upriv = esp->priv;
-	return snprint(buf, len, "%lud %lud\n",
+	return snprint(buf, len, "%llud %lud\n",
 		upriv->in,
 		upriv->inerrors);
 }
@@ -520,10 +652,10 @@
 setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
 {
 	uchar *key;
-	int i, nbyte, nchar;
-	int c;
+	int c, nbyte, nchar;
+	uint i;
 
-	if(n < 2)
+	if(n < 2 || n > 3)
 		return "bad format";
 	for(; alg->name; alg++)
 		if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
 	if(alg->name == nil)
 		return "unknown algorithm";
 
-	if(n != 3)
-		return "bad format";
 	nbyte = (alg->keylen + 7) >> 3;
-	nchar = strlen(f[2]);
+	if (n == 2)
+		nchar = 0;
+	else
+		nchar = strlen(f[2]);
+	if(nchar != 2 * nbyte)			/* TODO: maybe < is ok */
+		return "key not required length";
+	/* convert hex digits from ascii, in place */
 	for(i=0; i<nchar; i++) {
 		c = f[2][i];
 		if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
 		else if(c >= 'A' && c <= 'F')
 			f[2][i] -= 'A'-10;
 		else
-			return "bad character in key";
+			return "non-hex character in key";
 	}
-	key = smalloc(nbyte);
-	for(i=0; i<nchar && i*2<nbyte; i++) {
+	/* collapse hex digits into complete bytes in reverse order in key */
+	key = secalloc(nbyte);
+	for(i = 0; i < nchar && i/2 < nbyte; i++) {
 		c = f[2][nchar-i-1];
 		if(i&1)
 			c <<= 4;
-		key[i>>1] |= c;
+		key[i/2] |= c;
 	}
-
+	memset(f[2], 0, nchar);
 	alg->init(ecb, alg->name, key, alg->keylen);
-	free(key);
+	secfree(key);
 	return nil;
 }
 
+
+/*
+ * null encryption
+ */
+
 static int
 nullcipher(Espcb*, uchar*, int)
 {
@@ -566,7 +708,7 @@
 }
 
 static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->espalg = name;
 	ecb->espblklen = 1;
@@ -581,7 +723,7 @@
 }
 
 static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
 	ecb->auth = nullauth;
 }
 
-void
+
+/*
+ * sha1
+ */
+
+static void
 seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
 	DigestState *digest;
-	uchar innerhash[SHA1dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = sha1(ipad, 64, nil, nil);
+	digest = sha1(ipad, Hmacblksz, nil, nil);
 	sha1(t, tlen, innerhash, digest);
-	digest = sha1(opad, 64, nil, nil);
+	digest = sha1(opad, Hmacblksz, nil, nil);
 	sha1(innerhash, SHA1dlen, hash, digest);
 }
 
@@ -615,11 +759,11 @@
 static int
 shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
 {
-	uchar hash[SHA1dlen];
 	int r;
+	uchar hash[SHA1dlen];
 
 	memset(hash, 0, SHA1dlen);
-	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -626,40 +770,162 @@
 }
 
 static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("shaahinit: bad keylen");
-	klen >>= 8;	// convert to bytes
+	klen /= BI2BY;
 
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = shaauth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aeskeysz], ivec[Aeskeysz];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aeskeysz);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aescbccipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aesblk], ivec[Aesblk];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aesblk);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aesctrcipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
 seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
 	DigestState *digest;
-	uchar innerhash[MD5dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = md5(ipad, 64, nil, nil);
+	digest = md5(ipad, Hmacblksz, nil, nil);
 	md5(t, tlen, innerhash, digest);
-	digest = md5(opad, 64, nil, nil);
+	digest = md5(opad, Hmacblksz, nil, nil);
 	md5(innerhash, MD5dlen, hash, digest);
 }
 
@@ -670,7 +936,7 @@
 	int r;
 
 	memset(hash, 0, MD5dlen);
-	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -677,168 +943,102 @@
 }
 
 static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("md5ahinit: bad keylen");
-	klen >>= 3;	// convert to bytes
-
-
+	klen = BITS2BYTES(klen);
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = md5auth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
+
+/*
+ * des, single and triple
+ */
+
 static int
 descipher(Espcb *ecb, uchar *p, int n)
 {
-	uchar tmp[8];
-	uchar *pp, *tp, *ip, *eip, *ep;
 	DESstate *ds = ecb->espstate;
 
-	ep = p + n;
 	if(ecb->incoming) {
-		memmove(ds->ivec, p, 8);
-		p += 8;
-		while(p < ep){
-			memmove(tmp, p, 8);
-			block_cipher(ds->expanded, p, 1);
-			tp = tmp;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; ){
-				*p++ ^= *ip;
-				*ip++ = *tp++;
-			}
-		}
+		memmove(ds->ivec, p, Desblk);
+		desCBCdecrypt(p + Desblk, n - Desblk, ds);
 	} else {
-		memmove(p, ds->ivec, 8);
-		for(p += 8; p < ep; p += 8){
-			pp = p;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; )
-				*pp++ ^= *ip++;
-			block_cipher(ds->expanded, p, 0);
-			memmove(ds->ivec, p, 8);
-		}
+		memmove(p, ds->ivec, Desblk);
+		desCBCencrypt(p + Desblk, n - Desblk, ds);
 	}
 	return 1;
 }
-	
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+	DES3state *ds = ecb->espstate;
+
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, Desblk);
+		des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+	} else {
+		memmove(p, ds->ivec, Desblk);
+		des3CBCencrypt(p + Desblk, n - Desblk, ds);
+	}
+	return 1;
+}
+
 static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	uchar key[8];
-	uchar ivec[8];
-	int i;
-	
-	// bits to bytes
-	n = (n+7)>>3;
-	if(n > 8)
-		n = 8;
+	uchar key[Desblk], ivec[Desblk];
+
+	n = BITS2BYTES(n);
+	if(n > Desblk)
+		n = Desblk;
 	memset(key, 0, sizeof(key));
 	memmove(key, k, n);
-	for(i=0; i<8; i++)
-		ivec[i] = nrand(256);
+	prng(ivec, Desblk);
 	ecb->espalg = name;
-	ecb->espblklen = 8;
-	ecb->espivlen = 8;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
+
 	ecb->cipher = descipher;
-	ecb->espstate = smalloc(sizeof(DESstate));
+	ecb->espstate = secalloc(sizeof(DESstate));
 	setupDESstate(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	Esprc4 *esprc4;
-	RC4state tmpstate;
-	ulong seq;
-	long d, dd;
+	uchar key[3][Desblk], ivec[Desblk];
 
-	if(n < 4)
-		return 0;
+	n = BITS2BYTES(n);
+	if(n > Des3keysz)
+		n = Des3keysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Desblk);
+	ecb->espalg = name;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
 
-	esprc4 = ecb->espstate;
-	if(ecb->incoming) {
-		seq = nhgetl(p);
-		p += 4;
-		n -= 4;
-		d = seq-esprc4->cseq;
-		if(d == 0) {
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq += n;
-			if(esprc4->ovalid) {
-				dd = esprc4->cseq - esprc4->lgseq;
-				if(dd > RC4back)
-					esprc4->ovalid = 0;
-			}
-		} else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
-			// this link is hosed
-			if(d > RC4forward) {
-				strcpy(up->errstr, "rc4cipher: skipped too much");
-				return 0;
-			}
-			esprc4->lgseq = seq;
-			if(!esprc4->ovalid) {
-				esprc4->ovalid = 1;
-				esprc4->oseq = esprc4->cseq;
-				memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
-			}
-			rc4skip(&esprc4->current, d);
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq = seq+n;
-		} else {
-print("reordered packet: %uld %ld\n", seq, d);
-			dd = seq - esprc4->oseq;
-			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
-				strcpy(up->errstr, "rc4cipher: too far back");
-				return 0;
-			}
-			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
-			rc4skip(&tmpstate, dd);
-			rc4(&tmpstate, p, n);
-			return 1;
-		}
-
-		// move old state up
-		if(esprc4->ovalid) {
-			dd = esprc4->cseq - RC4back - esprc4->oseq;
-			if(dd > 0) {
-				rc4skip(&esprc4->old, dd);
-				esprc4->oseq += dd;
-			}
-		}
-	} else {
-		hnputl(p, esprc4->cseq);
-		p += 4;
-		n -= 4;
-		rc4(&esprc4->current, p, n);
-		esprc4->cseq += n;
-	}
-	return 1;
+	ecb->cipher = des3cipher;
+	ecb->espstate = secalloc(sizeof(DES3state));
+	setupDES3state(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{	
-	Esprc4 *esprc4;
 
-	// bits to bytes
-	n = (n+7)>>3;
-	esprc4 = smalloc(sizeof(Esprc4));
-	memset(esprc4, 0, sizeof(Esprc4));
-	setupRC4state(&esprc4->current, k, n);
-	ecb->espalg = name;
-	ecb->espblklen = 4;
-	ecb->espivlen = 4;
-	ecb->cipher = rc4cipher;
-	ecb->espstate = esprc4;
-}
-	
+/*
+ * interfacing to devip
+ */
 void
 espinit(Fs *fs)
 {
--- a/os/ip/ethermedium.c
+++ b/os/ip/ethermedium.c
@@ -5,9 +5,9 @@
 #include "fns.h"
 #include "../port/error.h"
 
+#include "../port/netif.h"
 #include "ip.h"
 #include "ipv6.h"
-#include "kernel.h"
 
 typedef struct Etherhdr Etherhdr;
 struct Etherhdr
@@ -18,10 +18,10 @@
 };
 
 static uchar ipbroadcast[IPaddrlen] = {
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
 	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
 };
 
 static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
 static void	etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
 static void	etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
 static void	etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void	etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 static Block*	multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
 static void	sendarp(Ipifc *ifc, Arpent *a);
-static void	sendgarp(Ipifc *ifc, uchar*);
+static void	sendndp(Ipifc *ifc, Arpent *a);
 static int	multicastea(uchar *ea, uchar *ip);
 static void	recvarpproc(void*);
-static void	resolveaddr6(Ipifc *ifc, Arpent *a);
 static void	etherpref2addr(uchar *pref, uchar *ea);
 
 Medium ethermedium =
@@ -53,8 +53,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -70,8 +69,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -94,9 +92,6 @@
  */
 enum
 {
-	ETARP		= 0x0806,
-	ETIP4		= 0x0800,
-	ETIP6		= 0x86DD,
 	ARPREQUEST	= 1,
 	ARPREPLY	= 2,
 };
@@ -127,128 +122,92 @@
 static void
 etherbind(Ipifc *ifc, int argc, char **argv)
 {
-	Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
-	char addr[Maxpath];	//char addr[2*KNAMELEN];
-	char dir[Maxpath];	//char dir[2*KNAMELEN];
-	char *buf;
-	int fd, cfd, n;
-	char *ptr;
+	char addr[Maxpath], dir[Maxpath];
 	Etherrock *er;
+	Chan *c;
+	int n;
 
 	if(argc < 2)
 		error(Ebadarg);
 
-	mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
-	buf = nil;
+	/*
+	 *  get mac address
+	 */
+	snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+	c = namec(addr, Aopen, OREAD, 0);
 	if(waserror()){
-		if(mchan4 != nil)
-			cclose(mchan4);
-		if(cchan4 != nil)
-			cclose(cchan4);
-		if(achan != nil)
-			cclose(achan);
-		if(mchan6 != nil)
-			cclose(mchan6);
-		if(cchan6 != nil)
-			cclose(cchan6);
-		if(buf != nil)
-			free(buf);
-		nexterror(); 
+		cclose(c);
+		nexterror();
 	}
+	n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+	if(n < 0)
+		error(Eio);
+	addr[n] = 0;
+	if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+		error("could not find mac address");
+	cclose(c);
+	poperror();
 
+	er = smalloc(sizeof(*er));
+	er->read4p = er->read6p = er->arpp = (void*)-1;
+	er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+	er->f = ifc->conv->p->f;
+
+	if(waserror()){
+		if(er->mchan4 != nil)
+			cclose(er->mchan4);
+		if(er->cchan4 != nil)
+			cclose(er->cchan4);
+		if(er->mchan6 != nil)
+			cclose(er->mchan6);
+		if(er->cchan6 != nil)
+			cclose(er->cchan6);
+		if(er->achan != nil)
+			cclose(er->achan);
+		free(er);
+		nexterror();
+	}
+
 	/*
-	 *  open ip converstation
+	 *  open ipv4 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x800 failed: %s", up->env->errstr);
-	mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);	/* ETIP4 */
+	er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
 
 	/*
-	 *  get mac address and speed
-	 */
-	snprint(addr, sizeof(addr), "%s/stats", dir);
-	fd = kopen(addr, OREAD);
-	if(fd < 0)
-		errorf("can't open ether stats: %s", up->env->errstr);
-
-	buf = smalloc(512);
-	n = kread(fd, buf, 511);
-	kclose(fd);
-	if(n <= 0)
-		error(Eio);
-	buf[n] = 0;
-
-	ptr = strstr(buf, "addr: ");
-	if(!ptr)
-		error(Eio);
-	ptr += 6;
-	parsemac(ifc->mac, ptr, 6);
-
-	ptr = strstr(buf, "mbps: ");
-	if(ptr){
-		ptr += 6;
-		ifc->mbps = atoi(ptr);
-	} else
-		ifc->mbps = 100;
-
-	/*
- 	 *  open arp conversation
-	 */
-	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
-	fd = kdial(addr, nil, nil, nil);
-	if(fd < 0)
-		errorf("dial 0x806 failed: %s", up->env->errstr);
-	achan = commonfdtochan(fd, ORDWR, 0, 1);
-	kclose(fd);
-
-	/*
-	 *  open ip conversation
+	 *  open ipv6 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x86DD failed: %s", up->env->errstr);
-	mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);	/* ETIP6 */
+	er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
 
-	er = smalloc(sizeof(*er));
-	er->mchan4 = mchan4;
-	er->cchan4 = cchan4;
-	er->achan = achan;
-	er->mchan6 = mchan6;
-	er->cchan6 = cchan6;
-	er->f = ifc->conv->p->f;
-	ifc->arg = er;
-
-	free(buf);
+	/*
+ 	 *  open arp conversation
+	 */
+	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);	/* ETARP */
+	er->achan = chandial(addr, nil, nil, nil);
 	poperror();
 
-	kproc("etherread4", etherread4, ifc, 0);
-	kproc("recvarpproc", recvarpproc, ifc, 0);
-	kproc("etherread6", etherread6, ifc, 0);
+	ifc->arg = er;
+
+	kproc("etherread4", etherread4, ifc);
+	kproc("etherread6", etherread6, ifc);
+	kproc("recvarpproc", recvarpproc, ifc);
 }
 
 /*
@@ -259,21 +218,35 @@
 {
 	Etherrock *er = ifc->arg;
 
-	if(er->read4p)
+	while(waserror())
+		;
+
+	/* wait for readers to start */
+	while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->read4p != nil)
 		postnote(er->read4p, 1, "unbind", 0);
-	if(er->read6p)
+	if(er->read6p != nil)
 		postnote(er->read6p, 1, "unbind", 0);
-	if(er->arpp)
+	if(er->arpp != nil)
 		postnote(er->arpp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for readers to die */
-	while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+	while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan4 != nil)
 		cclose(er->mchan4);
-	if(er->achan != nil)
-		cclose(er->achan);
 	if(er->cchan4 != nil)
 		cclose(er->cchan4);
 	if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
 		cclose(er->mchan6);
 	if(er->cchan6 != nil)
 		cclose(er->cchan6);
+	if(er->achan != nil)
+		cclose(er->achan);
 
 	free(er);
 }
@@ -297,16 +272,16 @@
 
 	/* get mac address of destination */
 	a = arpget(er->f->arp, bp, version, ifc, ip, mac);
-	if(a){
+	if(a != nil){
 		/* check for broadcast or multicast */
 		bp = multicastarp(er->f, a, ifc->m, mac);
-		if(bp==nil){
+		if(bp == nil){
 			switch(version){
 			case V4:
 				sendarp(ifc, a);
 				break;
-			case V6: 
-				resolveaddr6(ifc, a);
+			case V6:
+				sendndp(ifc, a);
 				break;
 			default:
 				panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
 
 	/* make it a single block with space for the ether header */
 	bp = padblock(bp, ifc->m->hsize);
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 	eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read4p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read4p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput4(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read4p = nil;
+	pexit("hangup", 1);
 }
 
 
@@ -397,29 +369,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read6p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read6p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput6(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read6p = nil;
+	pexit("hangup", 1);
 }
 
 static void
@@ -477,6 +448,7 @@
 	Block *bp;
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
+	uchar targ[IPv4addrlen], src[IPv4addrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
 		return;
 	}
 
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+
 	/* remove all but the last message */
 	while((bp = a->hold) != nil){
 		if(bp == a->last)
@@ -492,18 +467,20 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
+	memmove(targ, a->ip+IPv4off, IPv4addrlen);
 	arprelease(er->f->arp, a);
 
+	if(!ipv4local(ifc, src, 0, targ))
+		return;
+
 	n = sizeof(Etherarp);
-	if(n < a->type->mintu)
-		n = a->type->mintu;
+	if(n < ifc->m->mintu)
+		n = ifc->m->mintu;
 	bp = allocb(n);
 	memset(bp->rp, 0, n);
 	e = (Etherarp*)bp->rp;
-	memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
-	ipv4local(ifc, e->spa);
+	memmove(e->tpa, targ, sizeof(e->tpa));
+	memmove(e->spa, src, sizeof(e->spa));
 	memmove(e->sha, ifc->mac, sizeof(e->sha));
 	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
 	memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("arp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
 {
-	int sflag;
 	Block *bp;
 	Etherrock *er = ifc->arg;
-	uchar ipsrc[IPaddrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
-	a->rtime = NOW + ReTransTimer;
-	if(a->rxtsrem <= 0) {
-		arprelease(er->f->arp, a);
-		return;
-	}
-
-	a->rxtsrem--;
-	arprelease(er->f->arp, a);
-
-	if(sflag = ipv6anylocal(ifc, ipsrc)) 
-		icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+	ndpsendsol(er->f, ifc, a);	/* unlocks arp */
 }
 
 /*
@@ -569,10 +530,6 @@
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
 
-	/* don't arp for our initial non address */
-	if(ipcmp(ip, IPnoaddr) == 0)
-		return;
-
 	n = sizeof(Etherarp);
 	if(n < ifc->m->mintu)
 		n = ifc->m->mintu;
@@ -593,15 +550,13 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("garp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
 recvarp(Ipifc *ifc)
 {
-	int n;
+	int n, forme;
 	Block *ebp, *rbp;
 	Etherarp *e, *r;
 	uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
 	Etherrock *er = ifc->arg;
 
 	ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
-	if(ebp == nil) {
-		print("arp: rcv: %r\n");
+	if(ebp == nil)
 		return;
-	}
 
+	rlock(ifc);
+
 	e = (Etherarp*)ebp->rp;
 	switch(nhgets(e->op)) {
 	default:
@@ -620,9 +575,13 @@
 		break;
 
 	case ARPREPLY:
+		/* make sure not to enter multi/broadcat address */
+		if(e->sha[0] & 1)
+			break;
+
 		/* check for machine using my ip address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
 				print("arprep: 0x%E/0x%E also has ip addr %V\n",
 					e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
 			}
 		}
 
-		/* make sure we're not entering broadcast addresses */
-		if(ipcmp(ip, ipbroadcast) == 0 ||
-			!memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
-			print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
-				e->s, e->sha, e->spa);
-			break;
-		}
-
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+		/* refresh what we know about sender */
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
 		break;
 
 	case ARPREQUEST:
+		/* don't reply to multi/broadcat addresses */
+		if(e->sha[0] & 1)
+			break;
+
 		/* don't answer arps till we know who we are */
-		if(ifc->lifc == 0)
+		if(ifc->lifc == nil)
 			break;
 
 		/* check for machine using my ip or ether address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
-				if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+				if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
 					/* print only once */
-					print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+					print("arpreq: 0x%E also has ip addr %V\n",
+						e->sha, e->spa);
 					memmove(eprinted, e->spa, sizeof(e->spa));
 				}
+				break;
 			}
 		} else {
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
-				print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+				print("arpreq: %V also has ether addr %E\n",
+					e->spa, e->sha);
 				break;
 			}
 		}
 
-		/* refresh what we know about sender */
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
-		/* answer only requests for our address or systems we're proxying for */
+		/*
+		 * when request is for our address or systems we're proxying for,
+		 * enter senders address into arp table and reply, otherwise just
+		 * refresh the senders address.
+		 */
 		v4tov6(ip, e->tpa);
-		if(!iplocalonifc(ifc, ip))
-		if(!ipproxyifc(er->f, ifc, ip))
+		forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+		if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
 			break;
 
 		n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
 		memmove(r->s, ifc->mac, sizeof(r->s));
 		rbp->wp += n;
 
-		n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
-		if(n < 0)
-			print("arp: write: %r\n");
+		runlock(ifc);
+		freeb(ebp);
+
+		devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+		return;
 	}
+
+	runlock(ifc);
 	freeb(ebp);
 }
 
@@ -707,7 +671,7 @@
 
 	er->arpp = up;
 	if(waserror()){
-		er->arpp = 0;
+		er->arpp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;)
@@ -749,14 +713,9 @@
 multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
 {
 	/* is it broadcast? */
-	switch(ipforme(f, a->ip)){
-	case Runi:
-		return nil;
-	case Rbcast:
-		memset(mac, 0xff, 6);
+	if(ipforme(f, a->ip) == Rbcast){
+		memset(mac, 0xff, medium->maclen);
 		return arpresolve(f->arp, a, medium, mac);
-	default:
-		break;
 	}
 
 	/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
 }
 
 
-static void 
+static void
 etherpref2addr(uchar *pref, uchar *ea)
 {
-	pref[8]  = ea[0] | 0x2;
-	pref[9]  = ea[1];
+	pref[8] = ea[0] ^ 0x2;
+	pref[9] = ea[1];
 	pref[10] = ea[2];
 	pref[11] = 0xFF;
 	pref[12] = 0xFE;
@@ -789,4 +748,41 @@
 	pref[13] = ea[3];
 	pref[14] = ea[4];
 	pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+	static char tdad[] = "dad6";
+	uchar a[IPaddrlen];
+
+	if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+		return;
+
+	if(isv4(ip)){
+		sendgarp(ifc, ip);
+		return;
+	}
+
+	if((lifc->type&Rv4) != 0)
+		return;
+
+	if(!lifc->tentative){
+		icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+		return;
+	}
+
+	if(ipcmp(lifc->local, ip) != 0)
+		return;
+
+	/* temporarily add route for duplicate address detection */
+	ipv62smcast(a, ip);
+	addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+	if(waserror()){
+		remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+		nexterror();
+	}
+	icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+	remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
 }
--- a/os/ip/gre.c
+++ b/os/ip/gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,10 +10,7 @@
 
 #include "ip.h"
 
-#define DPRINT if(0)print
-
-enum
-{
+enum {
 	GRE_IPONLY	= 12,		/* size of ip header */
 	GRE_IPPLUSGRE	= 12,		/* minimum size of GRE header */
 	IP_GREPROTO	= 47,
@@ -18,10 +18,33 @@
 	GRErxms		= 200,
 	GREtickms	= 100,
 	GREmaxxmit	= 10,
+
+	K		= 1024,
+	GREqlen		= 256 * K,
+
+	GRE_cksum	= 0x8000,
+	GRE_routing	= 0x4000,
+	GRE_key		= 0x2000,
+	GRE_seq		= 0x1000,
+
+	Nring		= 1 << 10,	/* power of two, please */
+	Ringmask	= Nring - 1,
+
+	GREctlraw	= 0,
+	GREctlcooked,
+	GREctlretunnel,
+	GREctlreport,
+	GREctldlsuspend,
+	GREctlulsuspend,
+	GREctldlresume,
+	GREctlulresume,
+	GREctlforward,
+	GREctlulkey,
+	Ncmds,
 };
 
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
 	/* ip header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
@@ -28,7 +51,7 @@
 	uchar	len[2];		/* packet length (including headers) */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	ttl;
 	uchar	proto;		/* Protocol */
 	uchar	cksum[2];	/* checksum */
 	uchar	src[4];		/* Ip source */
@@ -37,21 +60,115 @@
 	/* gre header */
 	uchar	flags[2];
 	uchar	eproto[2];	/* encapsulation protocol */
-} GREhdr;
+};
 
 typedef struct GREpriv GREpriv;
-struct GREpriv
-{
-	int		raw;			/* Raw GRE mode */
-
+struct GREpriv{
 	/* non-MIB stats */
-	ulong		csumerr;		/* checksum errors */
-	ulong		lenerr;			/* short packet */
+	uvlong	lenerr;			/* short packet */
 };
 
+typedef struct Bring	Bring;
+struct Bring{
+	Block	*ring[Nring];
+	long	produced;
+	long	consumed;
+};
+
+typedef struct GREconv	GREconv;
+struct GREconv{
+	int	raw;
+
+	/* Retunnelling information.  v4 only */
+	uchar	north[4];			/* HA */
+	uchar	south[4];			/* Base station */
+	uchar	hoa[4];				/* Home address */
+	uchar	coa[4];				/* Careof address */
+	ulong	seq;				/* Current sequence # */
+	int	dlsusp;				/* Downlink suspended? */
+	int	ulsusp;				/* Uplink suspended? */
+	ulong	ulkey;				/* GRE key */
+
+	QLock	lock;				/* Lock for rings */
+	Bring	dlpending;			/* Ring of pending packets */
+	Bring	dlbuffered;			/* Received while suspended */
+	Bring	ulbuffered;			/* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+	uchar	*rp;
+	ulong	seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+	char	*cmd;
+	int	argc;
+	char	*(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw]	=	{	"raw",		1,	grectlraw,	},
+[GREctlcooked]	=	{	"cooked",	1,	grectlcooked,	},
+[GREctlretunnel]=	{	"retunnel",	5,	grectlretunnel,	},
+[GREctlreport]	=	{	"report",	2,	grectlreport,	},
+[GREctldlsuspend]=	{	"dlsuspend",	1,	grectldlsuspend,},
+[GREctlulsuspend]=	{	"ulsuspend",	1,	grectlulsuspend,},
+[GREctldlresume]=	{	"dlresume",	1,	grectldlresume,	},
+[GREctlulresume]=	{	"ulresume",	1,	grectlulresume,	},
+[GREctlforward]	=	{	"forward",	2,	grectlforward,	},
+[GREctlulkey]	=	{	"ulkey",	2,	grectlulkey,	},
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
 static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
 
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+	Block *bp;
+
+	if(r->consumed == r->produced)
+		return nil;
+
+	bp = r->ring[r->consumed & Ringmask];
+	r->ring[r->consumed & Ringmask] = nil;
+	r->consumed++;
+	return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+	Block *tbp;
+
+	if(r->produced - r->consumed > Ringmask){
+		/* Full! */
+		tbp = r->ring[r->produced & Ringmask];
+		assert(tbp);
+		freeb(tbp);
+		r->consumed++;
+	}
+	r->ring[r->produced & Ringmask] = bp;
+	r->produced++;
+}
+
+static char *
 greconnect(Conv *c, char **argv, int argc)
 {
 	Proto *p;
@@ -91,7 +208,7 @@
 static void
 grecreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->rq = qopen(GREqlen, Qmsg, 0, c);
 	c->wq = qbypass(grekick, c);
 }
 
@@ -98,44 +215,88 @@
 static int
 grestate(Conv *c, char *state, int n)
 {
-	USED(c);
-	return snprint(state, n, "%s", "Datagram");
+	GREconv *grec;
+	char *ep, *p;
+
+	grec = c->ptcl;
+	p    = state;
+	ep   = p + n;
+	p    = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+	 "pending %uld  %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+			c->inuse? "Open ": "Closed ",
+			grec->raw? "raw ": "",
+			grec->dlsusp? "DL suspended ": "",
+			grec->ulsusp? "UL suspended ": "",
+			grec->hoa, grec->north, grec->south, grec->seq,
+			grec->dlpending.consumed, grec->dlpending.produced,
+			grec->dlbuffered.consumed, grec->dlbuffered.produced,
+			grec->ulbuffered.consumed, grec->ulbuffered.produced,
+			grec->ulkey);
+	return p - state;
 }
 
 static char*
 greannounce(Conv*, char**, int)
 {
-	return "pktifc does not support announce";
+	return "gre does not support announce";
 }
 
 static void
 greclose(Conv *c)
 {
-	qclose(c->rq);
-	qclose(c->wq);
-	qclose(c->eq);
+	GREconv *grec;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	/* Make sure we don't forward any more packets */
+	memset(grec->hoa, 0, sizeof grec->hoa);
+	memset(grec->north, 0, sizeof grec->north);
+	memset(grec->south, 0, sizeof grec->south);
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->dlpending)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->dlbuffered)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->ulbuffered)) != nil)
+		freeb(bp);
+
+	grec->dlpending.produced = grec->dlpending.consumed = 0;
+	grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+	grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+	qunlock(&grec->lock);
+
+	grec->raw = 0;
+	grec->seq = 0;
+	grec->dlsusp = grec->ulsusp = 1;
+
+	qhangup(c->rq, sessend);
+	qhangup(c->wq, sessend);
+	qhangup(c->eq, sessend);
 	ipmove(c->laddr, IPnoaddr);
 	ipmove(c->raddr, IPnoaddr);
-	c->lport = 0;
-	c->rport = 0;
+	c->lport = c->rport = 0;
 }
 
-int drop;
-
 static void
 grekick(void *x, Block *bp)
 {
-	Conv *c = x;
-	GREhdr *ghp;
+	Conv *c;
+	GREconv *grec;
+	GREhdr *gre;
 	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 
 	if(bp == nil)
 		return;
 
+	c    = x;
+	grec = c->ptcl;
+
 	/* Make space to fit ip header (gre header already there) */
 	bp = padblock(bp, GRE_IPONLY);
-	if(bp == nil)
-		return;
 
 	/* make sure the message has a GRE header */
 	bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
 	if(bp == nil)
 		return;
 
-	ghp = (GREhdr *)(bp->rp);
-	ghp->vihl = IP_VER4;
+	gre = (GREhdr *)bp->rp;
+	gre->vihl = IP_VER4;
 
-	if(!((GREpriv*)c->p->priv)->raw){
-		v4tov6(raddr, ghp->dst);
+	if(grec->raw == 0){
+		v4tov6(raddr, gre->dst);
 		if(ipcmp(raddr, v4prefix) == 0)
-			memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
-		v4tov6(laddr, ghp->src);
+			memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+		v4tov6(laddr, gre->src);
 		if(ipcmp(laddr, v4prefix) == 0){
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
-				findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
-			memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+				/* pick interface closest to dest */
+				findlocalip(c->p->f, c->laddr, raddr);
+			memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
 		}
-		hnputs(ghp->eproto, c->rport);
+		hnputs(gre->eproto, c->rport);
 	}
 
-	ghp->proto = IP_GREPROTO;
-	ghp->frag[0] = 0;
-	ghp->frag[1] = 0;
+	gre->proto = IP_GREPROTO;
+	gre->frag[0] = gre->frag[1] = 0;
 
+	grepdout++;
+	grebdout += BLEN(bp);
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
 static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
 {
-	int len;
-	GREhdr *ghp;
-	Conv *c, **p;
-	ushort eproto;
+	Metablock *m;
+	GREconv *grec;
+	GREhdr *gre;
+	int hdrlen, suspended, extra;
+	ushort flags;
+	ulong seq;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1){
+		freeb(bp);
+		return;
+	}
+
+	/*
+	 * We've received a packet with a GRE header and we need to
+	 * re-adjust the packet header to strip all unwanted parts
+	 * but leave room for only a sequence number.
+	 */
+	grec   = c->ptcl;
+	flags  = nhgets(gre->flags);
+	hdrlen = 0;
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%V routing info present.  Discarding packet", gre->src);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	/*
+	 * The outgoing packet only has the sequence number set.  Make room
+	 * for the sequence number.
+	 */
+	if(hdrlen != sizeof(ulong)){
+		extra = hdrlen - sizeof(ulong);
+		if(extra < 0 && bp->rp - bp->base < -extra){
+			print("gredownlink: cannot add sequence number\n");
+			freeb(bp);
+			return;
+		}
+		memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+		bp->rp += extra;
+		assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+		gre = (GREhdr *)bp->rp;
+	}
+	seq = grec->seq++;
+	hnputs(gre->flags, GRE_seq);
+	hnputl(bp->rp + sizeof(GREhdr), seq);
+
+	/*
+	 * Keep rp and seq at the base.  ipoput4 consumes rp for
+	 * refragmentation.
+	 */
+	assert(bp->rp - bp->base >= sizeof(Metablock));
+	m = (Metablock *)bp->base;
+	m->rp  = bp->rp;
+	m->seq = seq;
+
+	/*
+	 * Here we make a decision what we're doing with the packet.  We're
+	 * doing this w/o holding a lock which means that later on in the
+	 * process we may discover we've done the wrong thing.  I don't want
+	 * to call ipoput with the lock held.
+	 */
+restart:
+	suspended = grec->dlsusp;
+	if(suspended){
+		if(!canqlock(&grec->lock)){
+			/*
+			 * just give up.  too bad, we lose a packet.  this
+			 * is just too hard and my brain already hurts.
+			 */
+			freeb(bp);
+			return;
+		}
+
+		if(!grec->dlsusp){
+			/*
+			 * suspend race.  We though we were suspended, but
+			 * we really weren't.
+			 */
+			qunlock(&grec->lock);
+			goto restart;
+		}
+
+		/* Undo the incorrect ref count addition */
+		addring(&grec->dlbuffered, bp);
+		qunlock(&grec->lock);
+		return;
+	}
+
+	/*
+	 * When we get here, we're not suspended.  Proceed to send the
+	 * packet.
+	 */
+	memmove(gre->src, grec->coa, sizeof gre->dst);
+	memmove(gre->dst, grec->south, sizeof gre->dst);
+
+	ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+	grepdout++;
+	grebdout += BLEN(bp);
+
+	/*
+	 * Now make sure we didn't do the wrong thing.
+	 */
+	if(!canqlock(&grec->lock)){
+		freeb(bp);		/* The packet just goes away */
+		return;
+	}
+
+	/* We did the right thing */
+	addring(&grec->dlpending, bp);
+	qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	ushort flags;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1)
+		return;
+
+	grec = c->ptcl;
+	memmove(gre->src, grec->coa, sizeof gre->src);
+	memmove(gre->dst, grec->north, sizeof gre->dst);
+
+	/*
+	 * Add a key, if needed.
+	 */
+	if(grec->ulkey){
+		flags = nhgets(gre->flags);
+		if(flags & (GRE_cksum|GRE_routing)){
+			print("%V routing info present.  Discarding packet\n",
+				gre->src);
+			freeb(bp);
+			return;
+		}
+
+		if((flags & GRE_key) == 0){
+			/* Make room for the key */
+			if(bp->rp - bp->base < sizeof(ulong)){
+				print("%V can't add key\n", gre->src);
+				freeb(bp);
+				return;
+			}
+
+			bp->rp -= 4;
+			memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+			gre = (GREhdr *)bp->rp;
+			hnputs(gre->flags, flags | GRE_key);
+		}
+
+		/* Add the key */
+		hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+	}
+
+	if(!canqlock(&grec->lock)){
+		freeb(bp);
+		return;
+	}
+
+	if(grec->ulsusp)
+		addring(&grec->ulbuffered, bp);
+	else{
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		grepuout++;
+		grebuout += BLEN(bp);
+	}
+	qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+	int len, hdrlen;
+	ushort eproto, flags;
 	uchar raddr[IPaddrlen];
+	Conv *c, **p;
+	GREconv *grec;
+	GREhdr *gre;
 	GREpriv *gpriv;
+	Ip4hdr *ip;
 
-	gpriv = gre->priv;
-	ghp = (GREhdr*)(bp->rp);
+	/*
+	 * We don't want to deal with block lists.  Ever.  The problem is
+	 * that when the block is forwarded, devether.c puts the block into
+	 * a queue that also uses ->next.  Just do not use ->next here!
+	 */
+	if(bp->next != nil)
+		bp = pullupblock(bp, blocklen(bp));
 
-	v4tov6(raddr, ghp->src);
-	eproto = nhgets(ghp->eproto);
-	qlock(gre);
+	gre = (GREhdr *)bp->rp;
+	if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+		freeb(bp);
+		return;
+	}
 
-	/* Look for a conversation structure for this port and address */
-	c = nil;
-	for(p = gre->conv; *p; p++) {
+	v4tov6(raddr, gre->src);
+	eproto = nhgets(gre->eproto);
+	flags  = nhgets(gre->flags);
+	hdrlen = sizeof(GREhdr);
+
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%I routing info present.  Discarding packet\n", raddr);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	qlock(proto);
+
+	if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+		ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+		/*
+		 * Look for a conversation structure for this port and address, or
+		 * match the retunnel part, or match on the raw flag.
+		 */
+		for(p = proto->conv; *p; p++) {
+			c = *p;
+
+			if(c->inuse == 0)
+				continue;
+
+			/*
+			 * Do not stop this session - blocking here
+			 * implies that etherread is blocked.
+			 */
+			grec = c->ptcl;
+			if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+				grepdin++;
+				grebdin += BLEN(bp);
+				gredownlink(c, bp);
+				qunlock(proto);
+				return;
+			}
+
+			if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+				grepuin++;
+				grebuin += BLEN(bp);
+				greuplink(c, bp);
+				qunlock(proto);
+				return;
+			}
+		}
+	}
+
+
+	/*
+	 * when we get here, none of the forwarding tunnels matched.  now
+	 * try to match on raw and conversational sessions.
+	 */
+	for(c = nil, p = proto->conv; *p; p++) {
 		c = *p;
+
 		if(c->inuse == 0)
 			continue;
-		if(c->rport == eproto && 
-			(gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+		/*
+		 * Do not stop this session - blocking here
+		 * implies that etherread is blocked.
+		 */
+		grec = c->ptcl;
+		if(c->rport == eproto &&
+		    (grec->raw || ipcmp(c->raddr, raddr) == 0))
 			break;
 	}
 
-	if(*p == nil) {
-		qunlock(gre);
-		freeblist(bp);
+	qunlock(proto);
+
+	if(*p == nil){
+		freeb(bp);
 		return;
 	}
 
-	qunlock(gre);
-
 	/*
 	 * Trim the packet down to data size
 	 */
-	len = nhgets(ghp->len) - GRE_IPONLY;
+	len = nhgets(gre->len) - GRE_IPONLY;
 	if(len < GRE_IPPLUSGRE){
-		freeblist(bp);
+		freeb(bp);
 		return;
 	}
+
 	bp = trimblock(bp, GRE_IPONLY, len);
 	if(bp == nil){
+		gpriv = proto->priv;
 		gpriv->lenerr++;
 		return;
 	}
 
-	/*
-	 *  Can't delimit packet so pull it all into one block.
-	 */
-	if(qlen(c->rq) > 64*1024)
-		freeblist(bp);
-	else{
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("greiput");
-		qpass(c->rq, bp);
-	}
+	qpass(c->rq, bp);
 }
 
 int
@@ -234,29 +649,258 @@
 	GREpriv *gpriv;
 
 	gpriv = gre->priv;
+	return snprint(buf, len,
+		"gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+		grepdin, grepdout, grepuin, grepuout,
+		grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
 
-	return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->raw = 1;
+	return nil;
 }
 
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
 {
-	GREpriv *gpriv;
+	GREconv *grec;
 
-	gpriv = c->p->priv;
-	if(n == 1){
-		if(strcmp(f[0], "raw") == 0){
-			gpriv->raw = 1;
-			return nil;
-		}
-		else if(strcmp(f[0], "cooked") == 0){
-			gpriv->raw = 0;
-			return nil;
-		}
+	grec = c->ptcl;
+	grec->raw = 0;
+	return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+	uchar ipaddr[4];
+
+	grec = c->ptcl;
+	if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+		return "tunnel already set up";
+
+	v4parseip(ipaddr, argv[1]);
+	if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+		return "bad hoa";
+	memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+	v4parseip(ipaddr, argv[2]);
+	memmove(grec->north, ipaddr, sizeof grec->north);
+	v4parseip(ipaddr, argv[3]);
+	memmove(grec->south, ipaddr, sizeof grec->south);
+	v4parseip(ipaddr, argv[4]);
+	memmove(grec->coa, ipaddr, sizeof grec->coa);
+	grec->ulsusp = 1;
+	grec->dlsusp = 0;
+
+	return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+	ulong seq;
+	Block *bp;
+	Bring *r;
+	GREconv *grec;
+	Metablock *m;
+
+	grec = c->ptcl;
+	seq  = strtoul(argv[1], nil, 0);
+
+	qlock(&grec->lock);
+	r = &grec->dlpending;
+	while(r->produced - r->consumed > 0){
+		bp = r->ring[r->consumed & Ringmask];
+
+		assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		if((long)(seq - m->seq) <= 0)
+			break;
+
+		r->ring[r->consumed & Ringmask] = nil;
+		r->consumed++;
+
+		freeb(bp);
 	}
-	return "unknown control request";
+	qunlock(&grec->lock);
+	return nil;
 }
 
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->dlsusp)
+		return "already suspended";
+
+	grec->dlsusp = 1;
+	return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->ulsusp)
+		return "already suspended";
+
+	grec->ulsusp = 1;
+	return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		qunlock(&grec->lock);
+
+		ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+		qlock(&grec->lock);
+		addring(&grec->dlpending, bp);
+	}
+	grec->dlsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	grec->ulsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+	Block *bp;
+	GREconv *grec;
+	GREhdr *gre;
+	Metablock *m;
+
+	grec = c->ptcl;
+
+	v4parseip(grec->south, argv[1]);
+	memmove(grec->north, grec->south, sizeof grec->north);
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+	grec->dlsusp = 0;
+	grec->ulsusp = 0;
+
+	while((bp = getring(&grec->dlpending)) != nil){
+
+		assert(bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		assert(m->rp >= bp->base && m->rp < bp->lim);
+
+		bp->rp = m->rp;
+
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->ulkey = strtoul(argv[1], nil, 0);
+	return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+	int i;
+
+	if(n < 1)
+		return "too few arguments";
+
+	for(i = 0; i < Ncmds; i++)
+		if(strcmp(f[0], grectls[i].cmd) == 0)
+			break;
+
+	if(i == Ncmds)
+		return "no such command";
+	if(grectls[i].argc != 0 && grectls[i].argc != n)
+		return "incorrect number of arguments";
+
+	return grectls[i].f(c, n, f);
+}
+
 void
 greinit(Fs *fs)
 {
@@ -276,7 +920,7 @@
 	gre->stats = grestats;
 	gre->ipproto = IP_GREPROTO;
 	gre->nc = 64;
-	gre->ptclsize = 0;
+	gre->ptclsize = sizeof(GREconv);
 
 	Fsproto(fs, gre);
 }
--- a/os/ip/icmp.c
+++ b/os/ip/icmp.c
@@ -44,11 +44,6 @@
 	Maxtype		= 18,
 };
 
-enum
-{
-	MinAdvise	= 24,	/* minimum needed for us to advise another protocol */ 
-};
-
 char *icmpnames[Maxtype+1] =
 {
 [EchoReply]		"EchoReply",
@@ -70,6 +65,8 @@
 	IP_ICMPPROTO	= 1,
 	ICMP_IPSIZE	= 20,
 	ICMP_HDRSIZE	= 8,
+
+	MinAdvise	= ICMP_IPSIZE+4,	/* minimum needed for us to advise another protocol */ 
 };
 
 enum
@@ -113,7 +110,7 @@
 	c->wq = qbypass(icmpkick, c);
 }
 
-extern char*
+char*
 icmpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -126,11 +123,11 @@
 	return nil;
 }
 
-extern int
+int
 icmpstate(Conv *c, char *state, int n)
 {
 	USED(c);
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		"Datagram",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
 	);
 }
 
-extern char*
+char*
 icmpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -150,7 +147,7 @@
 	return nil;
 }
 
-extern void
+void
 icmpclose(Conv *c)
 {
 	qclose(c->rq);
@@ -169,8 +166,7 @@
 
 	if(bp == nil)
 		return;
-
-	if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+	if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
 		freeblist(bp);
 		return;
 	}
@@ -190,21 +186,50 @@
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
 {
+	uchar addr[IPaddrlen];
+	int i;
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	i = ipforme(f, addr);
+	return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+	uchar addr[IPaddrlen];
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
 	Block	*nbp;
 	Icmp	*p, *np;
+	uchar	ia[IPv4addrlen];
 
 	p = (Icmp *)bp->rp;
+	if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+		return;
 
-	netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+	netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+		ia, p->src, p->dst);
+
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
 	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
 	np = (Icmp *)nbp->rp;
 	np->vihl = IP_VER4;
+	memmove(np->src, ia, sizeof(np->src));
 	memmove(np->dst, p->src, sizeof(np->dst));
-	v6tov4(np->src, ia);
 	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
 	np->type = TimeExceed;
 	np->code = 0;
@@ -214,7 +239,6 @@
 	memset(np->cksum, 0, sizeof(np->cksum));
 	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
 }
 
 static void
@@ -222,20 +246,10 @@
 {
 	Block	*nbp;
 	Icmp	*p, *np;
-	int	i;
-	uchar	addr[IPaddrlen];
 
 	p = (Icmp *)bp->rp;
-
-	/* only do this for unicast sources and destinations */
-	v4tov6(addr, p->dst);
-	i = ipforme(f, addr);
-	if((i&Runi) == 0)
+	if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
 		return;
-	v4tov6(addr, p->src);
-	i = ipforme(f, addr);
-	if(i != 0 && (i&Runi) == 0)
-		return;
 
 	netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmpnoconv(Fs *f, Block *bp)
 {
 	icmpunreachable(f, bp, 3, 0);
 }
 
-extern void
+void
 icmpcantfrag(Fs *f, Block *bp, int mtu)
 {
 	icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
 static void
 goticmpkt(Proto *icmp, Block *bp)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
-	v4tov6(dst, p->src);
+	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
-		if(ipcmp(s->raddr, dst) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+		if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
 {
 	Icmp	*q;
 	uchar	ip[4];
 
 	q = (Icmp *)bp->rp;
+	if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+		return nil;
+
 	q->vihl = IP_VER4;
 	memmove(ip, q->src, sizeof(q->dst));
 	memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
 [3]	"port unreachable",
 [4]	"fragmentation needed and DF set",
 [5]	"source route failed",
+[6]	"destination network unknown",
+[7]	"destination host unknown",
+[8]	"source host isolated",
+[9]	"network administratively prohibited",
+[10]	"host administratively prohibited",
+[11]	"network unreachable for tos",
+[12]	"host unreachable for tos",
+[13]	"communication administratively prohibited",
+[14]	"host precedence violation",
+[15]	"precedence cutoff in effect",
 };
 
 static void
 icmpiput(Proto *icmp, Ipifc*, Block *bp)
 {
-	int	n, iplen;
+	int	n;
 	Icmp	*p;
 	Block	*r;
 	Proto	*pr;
@@ -332,12 +355,10 @@
 	Icmppriv *ipriv;
 
 	ipriv = icmp->priv;
-	
 	ipriv->stats[InMsgs]++;
 
-	p = (Icmp *)bp->rp;
-	netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
-	n = blocklen(bp);
+	bp = concatblock(bp);
+	n = BLEN(bp);
 	if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
 		ipriv->stats[InErrors]++;
 		ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
 		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
 		goto raise;
 	}
-	iplen = nhgets(p->length);
-	if(iplen > n || (iplen % 1)){
-		ipriv->stats[LenErrs]++;
+	if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
 		ipriv->stats[InErrors]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto raise;
-	}
-	if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
-		ipriv->stats[InErrors]++;
 		ipriv->stats[CsumErrs]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto raise;
 	}
+	p = (Icmp *)bp->rp;
+	netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+		(p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+		p->type, p->code);
 	if(p->type <= Maxtype)
 		ipriv->in[p->type]++;
 
 	switch(p->type) {
 	case EchoRequest:
-		if (iplen < n)
-			bp = trimblock(bp, 0, iplen);
-		r = mkechoreply(bp);
+		r = mkechoreply(bp, icmp->f);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 	case Unreachable:
-		if(p->code > 5)
-			msg = unreachcode[1];
-		else
+		if(p->code >= nelem(unreachcode)) {
+			snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+				p->src, p->dst, p->code);
+			msg = m2;
+		} else
 			msg = unreachcode[p->code];
 
+	Advise:
 		bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-		if(blocklen(bp) < MinAdvise){
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs]++;
 			goto raise;
 		}
 		p = (Icmp *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
+		if((nhgets(p->frag) & IP_FO) == 0){
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr != nil && pr->advise != nil) {
+				(*pr->advise)(pr, bp, msg);
+				return;
+			}
 		}
-
 		bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
 		goticmpkt(icmp, bp);
 		break;
 	case TimeExceed:
 		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %V", p->src);
-
-			bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-			if(blocklen(bp) < MinAdvise){
-				ipriv->stats[LenErrs]++;
-				goto raise;
-			}
-			p = (Icmp *)bp->rp;
-			pr = Fsrcvpcolx(icmp->f, p->proto);
-			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
-				return;
-			}
-			bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+			goto Advise;
 		}
-
 		goticmpkt(icmp, bp);
 		break;
 	default:
@@ -419,22 +428,25 @@
 	freeblist(bp);
 }
 
-void
+static void
 icmpadvise(Proto *icmp, Block *bp, char *msg)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
 	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, src) == 0)
 		if(ipcmp(s->raddr, dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -443,7 +455,7 @@
 	freeblist(bp);
 }
 
-int
+static int
 icmpstats(Proto *icmp, char *buf, int len)
 {
 	Icmppriv *priv;
@@ -456,7 +468,7 @@
 	for(i = 0; i < Nstats; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
 	for(i = 0; i <= Maxtype; i++){
-		if(icmpnames[i])
+		if(icmpnames[i] != nil)
 			p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
 		else
 			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
 	}
 	return p - buf;
 }
-
-int
-icmpgc(Proto *icmp)
-{
-	return natgc(icmp->ipproto);
-}
-
+	
 void
 icmpinit(Fs *fs)
 {
@@ -487,7 +493,7 @@
 	icmp->stats = icmpstats;
 	icmp->ctl = nil;
 	icmp->advise = icmpadvise;
-	icmp->gc = icmpgc;
+	icmp->gc = nil;
 	icmp->ipproto = IP_ICMPPROTO;
 	icmp->nc = 128;
 	icmp->ptclsize = 0;
--- a/os/ip/icmp6.c
+++ b/os/ip/icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,41 +10,36 @@
 #include "ip.h"
 #include "ipv6.h"
 
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+	InMsgs6,
+	InErrors6,
+	OutMsgs6,
+	CsumErrs6,
+	LenErrs6,
+	HlenErrs6,
+	HoplimErrs6,
+	IcmpCodeErrs6,
+	TargetErrs6,
+	OptlenErrs6,
+	AddrmxpErrs6,
+	RouterAddrErrs6,
 
-struct ICMPpkt {
-	uchar	type;
-	uchar	code;
-	uchar	cksum[2];
-	uchar	icmpid[2];
-	uchar	seq[2];
+	Nstats6,
 };
 
-struct IPICMP {
-	Ip6hdr;
-	ICMPpkt;
+enum {
+	ICMP_USEAD6	= 40,
 };
 
-struct NdiscC
-{
-	IPICMP;
-	uchar target[IPaddrlen];
+enum {
+	Oflag	= 1<<5,
+	Sflag	= 1<<6,
+	Rflag	= 1<<7,
 };
 
-struct Ndpkt
-{
-	NdiscC;
-	uchar otype;
-	uchar olen;	// length in units of 8 octets(incl type, code),
-				// 1 for IEEE 802 addresses
-	uchar lnaddr[6];	// link-layer address
-};
-
-enum {	
-	// ICMPv6 types
+enum {
+	/* ICMPv6 types */
 	EchoReply	= 0,
 	UnreachableV6	= 1,
 	PacketTooBigV6	= 2,
@@ -69,6 +67,65 @@
 	Maxtype6	= 137,
 };
 
+enum {
+	MinAdvise	= IP6HDR+4,	/* minimum needed for us to advise another protocol */ 
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding  */
+#define ICMPHDR \
+	IPV6HDR; \
+	uchar	type; \
+	uchar	code; \
+	uchar	cksum[2]; \
+	uchar	icmpid[2]; \
+	uchar	seq[2]
+
+struct IPICMP {
+	ICMPHDR;
+	uchar	payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	otype;
+	uchar	olen;		/* length in units of 8 octets(incl type, code),
+				 * 1 for IEEE 802 addresses */
+	uchar	lnaddr[6];	/* link-layer address */
+	uchar	payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+	ulong	stats[Nstats6];
+
+	/* message counts */
+	ulong	in[Maxtype6+1];
+	ulong	out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+	uchar	headers;
+} Icmpcb6;
+
 char *icmpnames6[Maxtype6+1] =
 {
 [EchoReply]		"EchoReply",
@@ -95,24 +152,6 @@
 [RedirectV6]		"RedirectV6",
 };
 
-enum
-{
-	InMsgs6,
-	InErrors6,
-	OutMsgs6,
-	CsumErrs6,
-	LenErrs6,
-	HlenErrs6,
-	HoplimErrs6,
-	IcmpCodeErrs6,
-	TargetErrs6,
-	OptlenErrs6,
-	AddrmxpErrs6,
-	RouterAddrErrs6,
-
-	Nstats6,
-};
-
 static char *statnames6[Nstats6] =
 {
 [InMsgs6]	"InMsgs",
@@ -129,49 +168,18 @@
 [RouterAddrErrs6]	"RouterAddrErrs",
 };
 
-typedef struct Icmppriv6
-{
-	ulong	stats[Nstats6];
-
-	/* message counts */
-	ulong	in[Maxtype6+1];
-	ulong	out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6 
-{
-	QLock;
-	uchar headers;
-} Icmpcb6;
-
 static char *unreachcode[] =
 {
-[icmp6_no_route]	"no route to destination",
-[icmp6_ad_prohib]	"comm with destination administratively prohibited",
-[icmp6_unassigned]	"icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach]	"address unreachable",
-[icmp6_port_unreach]	"port unreachable",
-[icmp6_unkn_code]	"icmp unreachable: unknown code",
+[Icmp6_no_route]	"no route to destination",
+[Icmp6_ad_prohib]	"comm with destination administratively prohibited",
+[Icmp6_out_src_scope]	"beyond scope of source address",
+[Icmp6_adr_unreach]	"address unreachable",
+[Icmp6_port_unreach]	"port unreachable",
+[Icmp6_gress_src_fail]	"source address failed ingress/egress policy",
+[Icmp6_rej_route]	"reject route to destination",
+[Icmp6_unknown]		"icmp unreachable: unknown code",
 };
 
-enum {
-	ICMP_USEAD6	= 40,
-};
-
-enum {
-	Oflag	= 1<<5,
-	Sflag	= 1<<6,
-	Rflag	= 1<<7,
-};
-
-enum {
-	slladd	= 1,
-	tlladd	= 2,
-	prfinfo	= 3,
-	redhdr	= 4,
-	mtuopt	= 5,
-};
-
 static void icmpkick6(void *x, Block *bp);
 
 static void
@@ -185,13 +193,14 @@
 set_cksum(Block *bp)
 {
 	IPICMP *p = (IPICMP *)(bp->rp);
+	int n = blocklen(bp);
 
-	hnputl(p->vcf, 0);  // borrow IP header as pseudoheader
-	hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+	hnputl(p->vcf, 0);  	/* borrow IP header as pseudoheader */
+	hnputs(p->ploadlen, n - IP6HDR);
 	p->proto = 0;
-	p->ttl = ICMPv6;	// ttl gets set later
+	p->ttl = ICMPv6;	/* ttl gets set later */
 	hnputs(p->cksum, 0);
-	hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+	hnputs(p->cksum, ptclcsum(bp, 0, n));
 	p->proto = ICMPv6;
 }
 
@@ -198,7 +207,8 @@
 static Block *
 newIPICMP(int packetlen)
 {
-	Block	*nbp;
+	Block *nbp;
+
 	nbp = allocb(packetlen);
 	nbp->wp += packetlen;
 	memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
 	return nbp;
 }
 
-void
+static void
 icmpadvise6(Proto *icmp, Block *bp, char *msg)
 {
-	Conv	**c, *s;
-	IPICMP	*p;
-	ushort	recid;
+	ushort recid;
+	Conv **c, *s;
+	IPICMP *p;
 
-	p = (IPICMP *) bp->rp;
+	p = (IPICMP *)bp->rp;
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->src) == 0)
 		if(ipcmp(s->raddr, p->dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -230,9 +242,9 @@
 static void
 icmpkick6(void *x, Block *bp)
 {
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Conv *c = x;
 	IPICMP *p;
-	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Icmppriv6 *ipriv = c->p->priv;
 	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
 
@@ -249,10 +261,10 @@
 		bp->rp += IPaddrlen;
 		ipmove(raddr, bp->rp);
 		bp->rp += IPaddrlen;
-		bp = padblock(bp, sizeof(Ip6hdr));
+		bp = padblock(bp, IP6HDR);
 	}
 
-	if(blocklen(bp) < sizeof(IPICMP)){
+	if(BLEN(bp) < IPICMPSZ){
 		freeblist(bp);
 		return;
 	}
@@ -268,23 +280,20 @@
 
 	set_cksum(bp);
 	p->vcf[0] = 0x06 << 4;
-	if(p->type <= Maxtype6)	
+	if(p->type <= Maxtype6)
 		ipriv->out[p->type]++;
 	ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-char*
+static char*
 icmpctl6(Conv *c, char **argv, int argc)
 {
 	Icmpcb6 *icb;
 
 	icb = (Icmpcb6*) c->ptcl;
-
-	if(argc==1) {
-		if(strcmp(argv[0], "headers")==0) {
-			icb->headers = 6;
-			return nil;
-		}
+	if(argc==1 && strcmp(argv[0], "headers")==0) {
+		icb->headers = 6;
+		return nil;
 	}
 	return "unknown control request";
 }
@@ -292,41 +301,39 @@
 static void
 goticmpkt6(Proto *icmp, Block *bp, int muxkey)
 {
-	Conv	**c, *s;
-	IPICMP	*p = (IPICMP *)bp->rp;
-	ushort	recid; 
-	uchar 	*addr;
+	ushort recid;
+	uchar *addr;
+	Conv **c, *s;
+	IPICMP *p = (IPICMP *)bp->rp;
 
 	if(muxkey == 0) {
 		recid = nhgets(p->icmpid);
 		addr = p->src;
-	}
-	else {
+	} else {
 		recid = muxkey;
 		addr = p->dst;
 	}
-
-	for(c = icmp->conv; *c; c++){
-		s = *c;
-		if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+	for(c = icmp->conv; (s = *c) != nil; c++){
+		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
-
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
 {
+	uchar addr[IPaddrlen];
 	IPICMP *p = (IPICMP *)(bp->rp);
-	uchar	addr[IPaddrlen];
 
+	if(isv6mcast(p->src))
+		return nil;
 	ipmove(addr, p->src);
-	ipmove(p->src, p->dst);
+	if(!isv6mcast(p->dst))
+		ipmove(p->src, p->dst);
+	else if (!ipv6local(ifc, p->src, 0, addr))
+		return nil;
 	ipmove(p->dst, addr);
 	p->type = EchoReplyV6;
 	set_cksum(bp);
@@ -335,49 +342,43 @@
 
 /*
  * sends out an ICMPv6 neighbor solicitation
- * 	suni == SRC_UNSPEC or SRC_UNI, 
+ * 	suni == SRC_UNSPEC or SRC_UNI,
  *	tuni == TARG_MULTI => multicast for address resolution,
  * 	and tuni == TARG_UNI => neighbor reachability.
  */
-
-extern void
+void
 icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-
-	nbp = newIPICMP(sizeof(Ndpkt));
+	nbp = newIPICMP(NDPKTSZ);
 	np = (Ndpkt*) nbp->rp;
 
+	if(suni == SRC_UNSPEC)
+		ipmove(np->src, v6Unspecified);
+	else
+		ipmove(np->src, src);
 
-	if(suni == SRC_UNSPEC) 
-		memmove(np->src, v6Unspecified, IPaddrlen);
-	else 
-		memmove(np->src, src, IPaddrlen);
-
 	if(tuni == TARG_UNI)
-		memmove(np->dst, targ, IPaddrlen);
+		ipmove(np->dst, targ);
 	else
 		ipv62smcast(np->dst, targ);
 
 	np->type = NbrSolicit;
 	np->code = 0;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 	if(suni != SRC_UNSPEC) {
-		np->otype = SRC_LLADDRESS;
-		np->olen = 1;	/* 1+1+6 = 8 = 1 8-octet */
+		np->otype = SRC_LLADDR;
+		np->olen = 1;		/* 1+1+6 = 8 = 1 8-octet */
 		memmove(np->lnaddr, mac, sizeof(np->lnaddr));
-	}
-	else {
-		int r = sizeof(Ndpkt)-sizeof(NdiscC);
-		nbp->wp -= r;
-	}
+	} else
+		nbp->wp -= NDPKTSZ - NDISCSZ;
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
 /*
  * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
  */
-extern void
+void
 icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-	nbp = newIPICMP(sizeof(Ndpkt));
-	np = (Ndpkt*) nbp->rp;
+	nbp = newIPICMP(NDPKTSZ);
+	np = (Ndpkt*)nbp->rp;
 
-	memmove(np->src, src, IPaddrlen);
-	memmove(np->dst, dst, IPaddrlen);
+	ipmove(np->src, src);
+	ipmove(np->dst, dst);
 
 	np->type = NbrAdvert;
 	np->code = 0;
 	np->icmpid[0] = flags;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 
-	np->otype = TARGET_LLADDRESS;
-	np->olen = 1;	
+	np->otype = TARGET_LLADDR;
+	np->olen = 1;
 	memmove(np->lnaddr, mac, sizeof(np->lnaddr));
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrAdvert]++;
-	netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+	netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+		return;
 
-	if(isv6mcast(p->src)) 
-		goto clean;
+	netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
 	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
-
-	rlock(ifc);
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
-		freeblist(nbp);
-		if(free) 
-			goto clean;
-		else
-			return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = UnreachableV6;
 	np->code = code;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[UnreachableV6]++;
 
-	if(free)
+	if(tome)
 		ipiput6(f, ifc, nbp);
-	else {
+	else 
 		ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-		return;
-	}
-
-clean:
-	runlock(ifc);
-	freeblist(bp);
 }
 
-extern void
+void
 icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
+	netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
+
 	nbp = newIPICMP(sz);
 	np = (IPICMP *) nbp->rp;
-
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = TimeExceedV6;
 	np->code = 0;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
-	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
+	netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	nbp = newIPICMP(sz);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = PacketTooBigV6;
 	np->code = 0;
 	hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
  * RFC 2461, pages 39-40, pages 57-58.
  */
 static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
-	int 	sz, osz, unsp, n, ttl, iplen;
-	int 	pktsz = BLEN(bp);
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *) packet;
-	Ndpkt	*np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+	int sz, osz, unsp, ttl;
+	int pktsz = BLEN(bp);
+	uchar *packet = bp->rp;
+	IPICMP *p = (IPICMP *) packet;
+	Ndpkt *np;
 
-	USED(ifc);
-	n = blocklen(bp);
-	if(n < sizeof(IPICMP)) {
+	if(pktsz < IPICMPSZ) {
 		ipriv->stats[HlenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
 		goto err;
 	}
 
-	iplen = nhgets(p->ploadlen);
-	if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
-		ipriv->stats[LenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto err;
-	}
-
-	// Rather than construct explicit pseudoheader, overwrite IPv6 header
+	/* Rather than construct explicit pseudoheader, overwrite IPv6 header */
 	if(p->proto != ICMPv6) {
-		// This code assumes no extension headers!!!
+		/* This code assumes no extension headers!!! */
 		netlog(icmp->f, Logicmp, "icmp error: extension header\n");
 		goto err;
 	}
@@ -586,7 +553,7 @@
 	ttl = p->ttl;
 	p->ttl = p->proto;
 	p->proto = 0;
-	if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+	if(ptclcsum(bp, 0, pktsz)) {
 		ipriv->stats[CsumErrs6]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto err;
@@ -595,19 +562,16 @@
 	p->ttl = ttl;
 
 	/* additional tests for some pkt types */
-	if( (p->type == NbrSolicit) ||
-		(p->type == NbrAdvert) ||
-		(p->type == RouterAdvert) ||
-		(p->type == RouterSolicit) ||
-		(p->type == RedirectV6) ) {
-
+	if (p->type == NbrSolicit   || p->type == NbrAdvert ||
+	    p->type == RouterAdvert || p->type == RouterSolicit ||
+	    p->type == RedirectV6) {
 		if(p->ttl != HOP_LIMIT) {
-			ipriv->stats[HoplimErrs6]++; 
-			goto err; 
+			ipriv->stats[HoplimErrs6]++;
+			goto err;
 		}
 		if(p->code != 0) {
-			ipriv->stats[IcmpCodeErrs6]++; 
-			goto err; 
+			ipriv->stats[IcmpCodeErrs6]++;
+			goto err;
 		}
 
 		switch (p->type) {
@@ -615,82 +579,78 @@
 		case NbrAdvert:
 			np = (Ndpkt*) p;
 			if(isv6mcast(np->target)) {
-				ipriv->stats[TargetErrs6]++; 
-				goto err; 
+				ipriv->stats[TargetErrs6]++;
+				goto err;
 			}
-			if(optexsts(np) && (np->olen == 0)) {
-				ipriv->stats[OptlenErrs6]++; 
-				goto err; 
+			if(optexsts(np) && np->olen == 0) {
+				ipriv->stats[OptlenErrs6]++;
+				goto err;
 			}
-		
-			if(p->type == NbrSolicit) {
-				if(ipcmp(np->src, v6Unspecified) == 0) { 
-					if(!issmcast(np->dst) || optexsts(np))  {
-						ipriv->stats[AddrmxpErrs6]++; 
-						goto err;
-					}
+
+			if (p->type == NbrSolicit &&
+			    ipcmp(np->src, v6Unspecified) == 0)
+				if(!issmcast(np->dst) || optexsts(np)) {
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
-		
-			if(p->type == NbrAdvert) {
-				if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
-					ipriv->stats[AddrmxpErrs6]++; 
-					goto err; 
+
+			if(p->type == NbrAdvert)
+				if(isv6mcast(np->dst) &&
+				    (nhgets(np->icmpid) & Sflag)){
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
 			break;
-	
+
 		case RouterAdvert:
-			if(pktsz - sizeof(Ip6hdr) < 16) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 16) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			if(!islinklocal(p->src)) {
-				ipriv->stats[RouterAddrErrs6]++; 
-				goto err; 
+				ipriv->stats[RouterAddrErrs6]++;
+				goto err;
 			}
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
 				if(osz <= 0) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
-				}	
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
+				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RouterSolicit:
-			if(pktsz - sizeof(Ip6hdr) < 8) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 8) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			unsp = (ipcmp(p->src, v6Unspecified) == 0);
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
-				if((osz <= 0) ||
-					(unsp && (*(packet+sz) == slladd)) ) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
+				if(osz <= 0 ||
+				    (unsp && packet[sz] == SRC_LLADDR)) {
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
 				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RedirectV6:
-			//to be filled in
+			/* to be filled in */
 			break;
-	
+
 		default:
 			goto err;
 		}
 	}
-
 	return 1;
-
 err:
-	ipriv->stats[InErrors6]++; 
+	ipriv->stats[InErrors6]++;
 	return 0;
 }
 
@@ -700,169 +660,162 @@
 	Iplifc *lifc;
 	int t;
 
-	rlock(ifc);
-	if(ipproxyifc(f, ifc, target)) {
-		runlock(ifc);
-		return t_uniproxy;
-	}
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, target) == 0) {
-			t = (lifc->tentative) ? t_unitent : t_unirany; 
-			runlock(ifc);
-			return t;
-		}
-	}
-
-	runlock(ifc);
-	return 0;
+	if((lifc = iplocalonifc(ifc, target)) != nil)
+		t = lifc->tentative? Tunitent: Tunirany;
+	else if(ipproxyifc(f, ifc, target))
+		t = Tuniproxy;
+	else
+		t = 0;
+	return t;
 }
 
 static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
 {
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *)packet;
-	Icmppriv6 *ipriv = icmp->priv;
-	Block	*r;
-	Proto	*pr;
-	char	*msg, m2[128];
-	Ndpkt* np;
+	char *msg, m2[128];
 	uchar pktflags;
-	uchar lsrc[IPaddrlen];
-	int refresh = 1;
+	uchar ia[IPaddrlen];
+	Block *r;
+	IPICMP *p;
+	Icmppriv6 *ipriv = icmp->priv;
 	Iplifc *lifc;
+	Ndpkt* np;
+	Proto *pr;
 
-	if(!valid(icmp, ipifc, bp, ipriv)) 
-		goto raise;
+	bp = concatblock(bp);
+	p = (IPICMP*)bp->rp;
 
-	if(p->type <= Maxtype6)
-		ipriv->in[p->type]++;
-	else
+	if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
 		goto raise;
 
+	ipriv->in[p->type]++;
+
 	switch(p->type) {
 	case EchoRequestV6:
-		r = mkechoreply6(bp);
+		r = mkechoreply6(bp, ifc);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 
 	case UnreachableV6:
-		if(p->code > 4)
-			msg = unreachcode[icmp6_unkn_code];
+		if(p->code >= nelem(unreachcode))
+			msg = unreachcode[Icmp6_unknown];
 		else
 			msg = unreachcode[p->code];
-
-		bp->rp += sizeof(IPICMP);
-		if(blocklen(bp) < 8){
+	Advise:
+		bp->rp += IPICMPSZ;
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs6]++;
 			goto raise;
 		}
 		p = (IPICMP *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
-		}
 
-		bp->rp -= sizeof(IPICMP);
-		goticmpkt6(icmp, bp, 0);
-		break;
-
-	case TimeExceedV6:
-		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %I", p->src);
-
-			bp->rp += sizeof(IPICMP);
-			if(blocklen(bp) < 8){
-				ipriv->stats[LenErrs6]++;
-				goto raise;
+		/* get rid of fragment header if this is the first fragment */
+		if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+			Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+			if((nhgets(fh->offsetRM) & ~7) == 0){	/* first fragment */
+				p->proto = fh->nexthdr;
+				/* copy down payload over fragment header */
+				bp->rp += IP6HDR;
+				bp->wp -= IP6FHDR;
+				memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+				hnputs(p->ploadlen, BLEN(bp));
+				bp->rp -= IP6HDR;
 			}
-			p = (IPICMP *)bp->rp;
+		}
+		if(p->proto != FH){
 			pr = Fsrcvpcolx(icmp->f, p->proto);
 			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
+				(*pr->advise)(pr, bp, msg);
 				return;
 			}
-			bp->rp -= sizeof(IPICMP);
 		}
+		bp->rp -= IPICMPSZ;
+		goticmpkt6(icmp, bp, 0);
+		break;
 
+	case TimeExceedV6:
+		if(p->code == 0){
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+			goto Advise;
+		}
+		if(p->code == 1){
+			snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+			goto Advise;
+		}
 		goticmpkt6(icmp, bp, 0);
 		break;
 
+	case PacketTooBigV6:
+		snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+			(ulong)nhgetl(p->icmpid), p->src);
+		goto Advise;
+
 	case RouterAdvert:
 	case RouterSolicit:
-		/* using lsrc as a temp, munge hdr for goticmp6 
-		memmove(lsrc, p->src, IPaddrlen);
-		memmove(p->src, p->dst, IPaddrlen);
-		memmove(p->dst, lsrc, IPaddrlen); */
-
 		goticmpkt6(icmp, bp, p->type);
 		break;
 
 	case NbrSolicit:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 		pktflags = 0;
-		switch (targettype(icmp->f, ipifc, np->target)) {
-		case t_unirany:
+		if(ifc->sendra6)
+			pktflags |= Rflag;
+		switch (targettype(icmp->f, ifc, np->target)) {
+		case Tunirany:
 			pktflags |= Oflag;
 			/* fall through */
 
-		case t_uniproxy: 
-			if(ipcmp(np->src, v6Unspecified) != 0) {
-				arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+		case Tuniproxy:
+			if(ipv6local(ifc, ia, 0, np->src)) {
+				if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+					break;
 				pktflags |= Sflag;
-			}
-			if(ipv6local(ipifc, lsrc)) {
-				icmpna(icmp->f, lsrc, 
-				   (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
-				   np->target, ipifc->mac, pktflags); 
-			}
-			else
-				freeblist(bp);
+			} else
+				ipmove(ia, np->target);
+			icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+				np->target, ifc->mac, pktflags);
 			break;
-
-		case t_unitent:
-			/* not clear what needs to be done. send up
-			 * an icmp mesg saying don't use this address? */
-
-		default:
-			freeblist(bp);
+		case Tunitent:
+			/*
+			 * not clear what needs to be done. send up
+			 * an icmp mesg saying don't use this address?
+			 */
+			break;
 		}
-
+		freeblist(bp);
 		break;
 
 	case NbrAdvert:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 
-		/* if the target address matches one of the local interface 
-		 * address and the local interface address has tentative bit set, 
-		 * then insert into ARP table. this is so the duplication address 
-		 * detection part of ipconfig can discover duplication through 
-		 * the arp table
+		/*
+		 * if the target address matches one of the local interface
+		 * addresses and the local interface address has tentative bit
+		 * set, insert into ARP table. this is so the duplicate address
+		 * detection part of ipconfig can discover duplication through
+		 * the arp table.
 		 */
-		lifc = iplocalonifc(ipifc, np->target);
-		if(lifc && lifc->tentative)
-			refresh = 0;
-		arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+		lifc = iplocalonifc(ifc, np->target);
+		if(lifc != nil && lifc->tentative)
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+		else if(ipv6local(ifc, ia, 0, np->target))
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
 		freeblist(bp);
 		break;
 
-	case PacketTooBigV6:
-
 	default:
 		goticmpkt6(icmp, bp, 0);
 		break;
 	}
 	return;
-
 raise:
 	freeblist(bp);
-
 }
 
-int
+static int
 icmpstats6(Proto *icmp6, char *buf, int len)
 {
 	Icmppriv6 *priv;
@@ -874,23 +827,28 @@
 	e = p+len;
 	for(i = 0; i < Nstats6; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
-	for(i = 0; i <= Maxtype6; i++){
+	for(i = 0; i <= Maxtype6; i++)
 		if(icmpnames6[i])
-			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/*		else
-			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
-	}
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+				priv->in[i], priv->out[i]);
 	return p - buf;
 }
 
 
-// need to import from icmp.c
+/* import from icmp.c */
 extern int	icmpstate(Conv *c, char *state, int n);
 extern char*	icmpannounce(Conv *c, char **argv, int argc);
 extern char*	icmpconnect(Conv *c, char **argv, int argc);
 extern void	icmpclose(Conv *c);
 
+static void
+icmpclose6(Conv *c)
+{
+	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+	icb->headers = 0;
+	icmpclose(c);
+}
+
 void
 icmp6init(Fs *fs)
 {
@@ -902,7 +860,7 @@
 	icmp6->announce = icmpannounce;
 	icmp6->state = icmpstate;
 	icmp6->create = icmpcreate6;
-	icmp6->close = icmpclose;
+	icmp6->close = icmpclose6;
 	icmp6->rcv = icmpiput6;
 	icmp6->stats = icmpstats6;
 	icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
 
 	Fsproto(fs, icmp6);
 }
-
--- a/os/ip/igmp.c
+++ b/os/ip/igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -40,8 +44,12 @@
 	uchar	unused;
 	uchar	igmpcksum[2];		/* checksum of igmp portion */
 	uchar	group[IPaddrlen];	/* multicast group */
+
+	uchar	payload[];
 };
 
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
 /*
  *  lists for group reports
  */
@@ -49,7 +57,7 @@
 struct IGMPrep
 {
 	IGMPrep		*next;
-	Media		*m;
+	Medium		*m;
 	int		ticks;
 	Multicast	*multi;
 };
@@ -76,19 +84,17 @@
 } stats;
 
 void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
 {
 	IGMPpkt *p;
 	Block *bp;
 
 	bp = allocb(sizeof(IGMPpkt));
-	if(bp == nil)
-		return;
 	p = (IGMPpkt*)bp->wp;
 	p->vihl = IP_VER4;
-	bp->wp += sizeof(IGMPpkt);
-	memset(bp->rp, 0, sizeof(IGMPpkt));
-	hnputl(p->src, Mediagetaddr(m));
+	bp->wp += IGMPPKTSZ;
+	memset(bp->rp, 0, IGMPPKTSZ);
+	hnputl(p->src, Mediumgetaddr(m));
 	hnputl(p->dst, Ipallsys);
 	p->vertype = (1<<4) | IGMPreport;
 	p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
 }
 
 void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
 {
 	int n;
 	IGMPpkt *ghp;
@@ -206,7 +212,7 @@
 		if(rp != nil)
 			break;	/* already reporting */
 
-		mp = Mediacopymulti(m);
+		mp = Mediumcopymulti(m);
 		if(mp == nil)
 			break;
 
@@ -285,7 +291,7 @@
 	igmp.ptclsize = 0;
 
 	igmpreportfn = igmpsendreport;
-	kproc("igmpproc", igmpproc, 0, 0);
+	kproc("igmpproc", igmpproc, 0);
 
 	Fsproto(fs, &igmp);
 }
--- a/os/ip/il.c
+++ b/os/ip/il.c
@@ -189,7 +189,7 @@
 {
 	Ipht	ht;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 
 	ulong	csumerr;		/* checksum errors */
 	ulong	hlenerr;		/* header length error */
@@ -208,7 +208,7 @@
 
 
 void	ilrcvmsg(Conv*, Block*);
-void	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
 void	ilackq(Ilcb*, Block*);
 void	ilprocess(Conv*, Ilhdr*, Block*);
 void	ilpullup(Conv*);
@@ -251,6 +251,9 @@
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
+	if(c->ipversion != V4)
+		return "only IP version 4 supported";
+		
 	return ilstart(c, IL_CONNECT, fast);
 }
 
@@ -260,7 +263,7 @@
 	Ilcb *ic;
 
 	ic = (Ilcb*)(c->ptcl);
-	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
 		ilstates[ic->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -548,6 +551,9 @@
 
 	ih = (Ilhdr *)bp->rp;
 	plen = blocklen(bp);
+	if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+		goto raise;	/* ignore non V4 packets */
+
 	if(plen < IL_IPSIZE+IL_HDRSIZE){
 		netlog(il->f, Logil, "il: hlenerr\n");
 		ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
 		else
 			st = iltype[ih->iltype];
 		ipriv->stats[CsumErrs]++;
-		netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+		netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
 			csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
 		goto raise;
 	}
@@ -595,7 +601,7 @@
 			else
 				st = iltype[ih->iltype];
 			ilreject(il->f, ih);		/* no channel and not sync */
-			netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+			netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
 				st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); 
 			goto raise;
 		}
@@ -829,7 +835,7 @@
 
 	c = ic->conv;
 	id = nhgetl(h->ilid);
-	netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+	netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
 		ic->rexmit, ic->timeout,
 		c->raddr, c->lport, c->rport);
 
@@ -852,7 +858,7 @@
 	ic = (Ilcb*)s->ptcl;
 
 	USED(ic);
-	netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
 		ilstates[ic->state],  ic->rstart, ic->recvd, ic->start, 
 		ic->next, iltype[h->iltype], nhgetl(h->ilid), 
 		nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
 
 	_ilprocess(s, h, bp);
 
-	netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
 }
 
 void
@@ -917,17 +923,12 @@
 		bp->list = nil;
 		dlen = nhgets(oh->illen)-IL_HDRSIZE;
 		bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+			
 		/*
 		 * Upper levels don't know about multiple-block
 		 * messages so copy all into one (yick).
 		 */
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("ilpullup");
-		bp = packblock(bp);
-		if(bp == 0)
-			panic("ilpullup2");
-		qpass(s->rq, bp);
+		qpass(s->rq, packblock(concatblock(bp)));
 	}
 	qunlock(&ic->outo);
 }
@@ -948,7 +949,7 @@
 	id = nhgetl(h->ilid);
 	/* Window checks */
 	if(id <= ic->recvd || id > ic->recvd+ic->window) {
-		netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+		netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
 			id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
 		freeblist(bp);
 		return;
@@ -983,7 +984,7 @@
 	qunlock(&ic->outo);
 }
 
-void
+int
 ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
 {
 	Ilhdr *ih;
@@ -1034,7 +1035,7 @@
 		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
 
 if(ipc==nil)
-	panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+	panic("ipc is nil caller is %#p", getcallerpc(&ipc));
 if(ipc->p==nil)
 	panic("ipc->p is nil");
 
@@ -1042,7 +1043,7 @@
 		iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), 
 		nhgets(ih->ilsrc), nhgets(ih->ildst));
 
-	ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+	return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
 }
 
 void
@@ -1145,6 +1146,8 @@
 
 	il = x;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Iltickms);
 	for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
 		qlock(&ipriv->apl);
 		if(ipriv->ackprocstarted == 0){
 			sprint(kpname, "#I%dilack", c->p->f->dev);
-			kproc(kpname, ilackproc, c->p, 0);
+			kproc(kpname, ilackproc, c->p);
 			ipriv->ackprocstarted = 1;
 		}
 		qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
 	case IL_CONNECT:
 		ic->state = Ilsyncer;
 		iphtadd(&ipriv->ht, c);
-		ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+		if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+			ilhangup(c, "no route");
 		break;
 	}
 
@@ -1332,6 +1336,8 @@
 		if(s->lport == psource)
 		if(ipcmp(s->laddr, source) == 0)
 		if(ipcmp(s->raddr, dest) == 0){
+			if(s->ignoreadvice)
+				break;
 			qunlock(il);
 			ic = (Ilcb*)s->ptcl;
 			switch(ic->state){
@@ -1380,12 +1386,6 @@
 	}
 }
 
-int
-ilgc(Proto *il)
-{
-	return natgc(il->ipproto);
-}
-
 void
 ilinit(Fs *f)
 {
@@ -1406,7 +1406,7 @@
 	il->advise = iladvise;
 	il->stats = ilxstats;
 	il->inuse = ilinuse;
-	il->gc = ilgc;
+	il->gc = nil;
 	il->ipproto = IP_ILPROTO;
 	il->nc = scalednconv();
 	il->ptclsize = sizeof(Ilcb);
--- /dev/null
+++ b/os/ip/inferno.c
@@ -1,0 +1,28 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+/*
+ *  some hacks for commonality twixt inferno and plan9
+ */
+
+char*
+commonuser(void)
+{
+	return up->user;
+}
+
+char*
+commonerror(void)
+{
+	return up->errstr;
+}
+
+int
+bootpread(char*, ulong, int)
+{
+	return	0;
+}
--- a/os/ip/ip.c
+++ b/os/ip/ip.c
@@ -7,94 +7,6 @@
 
 #include	"ip.h"
 
-typedef struct IP		IP;
-typedef struct Fragment4	Fragment4;
-typedef struct Fragment6	Fragment6;
-typedef struct Ipfrag		Ipfrag;
-
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= 64*1024,	/* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp)	(((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
-
-	Nstats,
-};
-
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
-
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
-
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 static char *statnames[] =
 {
 [Forwarding]	"Forwarding",
@@ -118,45 +30,11 @@
 [FragCreates]	"FragCreates",
 };
 
-#define BLKIP(xp)	((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
+static Block*		ip4reassemble(IP*, int, Block*);
+static void		ipfragfree4(IP*, Fragment4*);
+static Fragment4*	ipfragallo4(IP*);
 
-ushort		ipcsum(uchar*);
-Block*		ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void		ipfragfree4(IP*, Fragment4*);
-Fragment4*	ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
-	V6params *v6p;
-
-	v6p = smalloc(sizeof(V6params));
-	
-	v6p->rp.mflag		= 0;		// default not managed
-	v6p->rp.oflag		= 0;
-	v6p->rp.maxraint	= 600000;	// millisecs
-	v6p->rp.minraint	= 200000;
-	v6p->rp.linkmtu		= 0;		// no mtu sent
-	v6p->rp.reachtime	= 0;
-	v6p->rp.rxmitra		= 0;
-	v6p->rp.ttl		= MAXTTL;
-	v6p->rp.routerlt	= 3*(v6p->rp.maxraint);	
-
-	v6p->hp.rxmithost	= 1000;		// v6 RETRANS_TIMER
-
-	v6p->cdrouter 		= -1;
-
-	f->v6p			= v6p;
-
-}
-
-void
+static void
 initfrag(IP *ip, int size)
 {
 	Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
 	IP *ip;
 
 	ip = smalloc(sizeof(IP));
+	ip->stats[DefaultTTL] = MAXTTL;
 	initfrag(ip, 100);
 	f->ip = ip;
 
@@ -202,11 +81,11 @@
 	if(f->ip->iprouting==0)
 		f->ip->stats[Forwarding] = 2;
 	else
-		f->ip->stats[Forwarding] = 1;	
+		f->ip->stats[Forwarding] = 1;
 }
 
 int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
 	Ipifc *ifc;
 	uchar *gate;
@@ -213,66 +92,41 @@
 	ulong fragoff;
 	Block *xp, *nb;
 	Ip4hdr *eh, *feh;
-	int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
-	Route *r, *sr;
+	int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+	Route *r;
 	IP *ip;
 	int rv = 0;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip4hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip4hdr*)bp->rp;
+	assert(BLEN(bp) >= IP4HDR);
 	len = blocklen(bp);
-
-	if(gating){
-		chunk = nhgets(eh->length);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk < len)
-			len = chunk;
-	}
 	if(len >= IP_MAX){
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v4lookup(f, eh->dst, c);
-	if(r == nil){
+	r = v4lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v4lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v4.gate;
 
-	if(!gating)
-		eh->vihl = IP_VER4|IP_HLEN4;
-	eh->ttl = ttl;
-	if(!gating)
-		eh->tos = tos;
-
-	if(!canrlock(ifc))
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
+	}
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
@@ -280,17 +134,18 @@
 	if(ifc->m == nil)
 		goto raise;
 
-	/* Output NAT */
-	if(nato(bp, ifc, f) != 0)
-		goto raise;
+	if(!gating){
+		eh->vihl = IP_VER4|IP_HLEN4;
+		eh->tos = tos;
+	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		if(!gating)
-			hnputs(eh->id, incref(&ip->id4));
 		hnputs(eh->length, len);
 		if(!gating){
+			hnputs(eh->id, incref(&ip->id4));
 			eh->frag[0] = 0;
 			eh->frag[1] = 0;
 		}
@@ -297,31 +152,31 @@
 		eh->cksum[0] = 0;
 		eh->cksum[1] = 0;
 		hnputs(eh->cksum, ipcsum(&eh->vihl));
-		ifc->m->bwrite(ifc, bp, V4, gate);
+
+		ipifcoput(ifc, bp, V4, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
 	if(eh->frag[0] & (IP_DF>>8)){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
 		icmpcantfrag(f, bp, medialen);
-		netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	seglen = (medialen - IP4HDR) & ~7;
+	hlen = (eh->vihl & 0xF)<<2;
+	seglen = (medialen - hlen) & ~7;
 	if(seglen < 8){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	dlen = len - IP4HDR;
+	dlen = len - hlen;
 	xp = bp;
 	if(gating)
 		lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
 	else
 		lid = incref(&ip->id4);
 
-	offset = IP4HDR;
-	while(xp != nil && offset && offset >= BLEN(xp)) {
+	offset = hlen;
+	while(offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
@@ -341,30 +196,30 @@
 		fragoff = 0;
 	dlen += fragoff;
 	for(; fragoff < dlen; fragoff += seglen) {
-		nb = allocb(IP4HDR+seglen);
-		feh = (Ip4hdr*)(nb->rp);
+		nb = allocb(hlen+seglen);
+		feh = (Ip4hdr*)nb->rp;
 
-		memmove(nb->wp, eh, IP4HDR);
-		nb->wp += IP4HDR;
+		memmove(nb->wp, eh, hlen);
+		nb->wp += hlen;
 
 		if((fragoff + seglen) >= dlen) {
 			seglen = dlen - fragoff;
 			hnputs(feh->frag, fragoff>>3);
 		}
-		else	
+		else
 			hnputs(feh->frag, (fragoff>>3)|IP_MF);
 
-		hnputs(feh->length, seglen + IP4HDR);
+		hnputs(feh->length, seglen + hlen);
 		hnputs(feh->id, lid);
 
 		/* Copy up the data area */
 		chunk = seglen;
 		while(chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk %d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -376,12 +231,13 @@
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
 				xp = xp->next;
-		} 
+		}
 
 		feh->cksum[0] = 0;
 		feh->cksum[1] = 0;
 		hnputs(feh->cksum, ipcsum(&feh->vihl));
-		ifc->m->bwrite(ifc, nb, V4, gate);
+
+		ipifcoput(ifc, nb, V4, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
 void
 ipiput4(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos, proto, olen;
+	int hl, len, hop, tos;
+	uchar v6dst[IPaddrlen];
+	ushort frag;
 	Ip4hdr *h;
 	Proto *p;
-	ushort frag;
-	int notforme;
-	uchar *dp, v6dst[IPaddrlen];
 	IP *ip;
-	Route *r;
 
-	if(BLKIPVER(bp) != IP_VER4) {
+	if((bp->rp[0]&0xF0) != IP_VER4) {
 		ipiput6(f, ifc, bp);
 		return;
 	}
@@ -430,58 +283,45 @@
 			return;
 	}
 
-	h = (Ip4hdr*)(bp->rp);
-
-	/* Input NAT */
-	nati(bp, ifc);
-
-	/* dump anything that whose header doesn't checksum */
+	h = (Ip4hdr*)bp->rp;
+	hl = (h->vihl & 0xF)<<2;
+	if(hl < IP4HDR || hl > BLEN(bp)) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+		goto drop;
+	}
 	if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
 		ip->stats[InHdrErrors]++;
-		netlog(f, Logip, "ip: checksum error %V\n", h->src);
-		freeblist(bp);
+		netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+		goto drop;
+	}
+	len = nhgets(h->length);
+	if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+		if(bp != nil)
+			goto drop;
 		return;
 	}
-	v4tov6(v6dst, h->dst);
-	notforme = ipforme(f, v6dst) == 0;
+	h = (Ip4hdr*)bp->rp;
 
-	/* Check header length and version */
-	if((h->vihl&0x0F) != IP_HLEN4) {
-		hl = (h->vihl&0xF)<<2;
-		if(hl < (IP_HLEN4<<2)) {
-			ip->stats[InHdrErrors]++;
-			netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
-			freeblist(bp);
-			return;
-		}
-	  /* If this is not routed strip off the options */
-		if(notforme == 0) {
-			olen = nhgets(h->length);
-			dp = bp->rp + (hl - (IP_HLEN4<<2));
-			memmove(dp, h, IP_HLEN4<<2);
-			bp->rp = dp;
-			h = (Ip4hdr*)(bp->rp);
-			h->vihl = (IP_VER4|IP_HLEN4);
-			hnputs(h->length, olen-hl+(IP_HLEN4<<2));
-		}
-	}
-
 	/* route */
-	if(notforme) {
-		Conv conv;
+	v4tov6(v6dst, h->dst);
+	if(!ipforme(f, v6dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
 
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
-		}
+		if(!ip->iprouting)
+			goto drop;
 
 		/* don't forward to source's network */
-		conv.r = nil;
-		r = v4lookup(f, h->dst, &conv);
-		if(r == nil || r->ifc == ifc){
+		rh.r = nil;
+		r = v4lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
 		hop = h->ttl;
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
-			icmpttlexceeded(f, ifc->lifc->local, bp);
-			freeblist(bp);
-			return;
+			icmpttlexceeded(f, ifc, bp);
+			goto drop;
 		}
 
 		/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
-		if(r->ifc->reassemble){
+		if(nifc->reassemble){
 			frag = nhgets(h->frag);
-			if(frag) {
-				h->tos = 0;
-				if(frag & IP_MF)
-					h->tos = 1;
-				bp = ip4reassemble(ip, frag, bp, h);
+			if(frag & (IP_MF|IP_FO)) {
+				bp = ip4reassemble(ip, frag, bp);
 				if(bp == nil)
 					return;
-				h = (Ip4hdr*)(bp->rp);
+				h = (Ip4hdr*)bp->rp;
 			}
 		}
 
@@ -511,27 +346,30 @@
 		ip->stats[ForwDatagrams]++;
 		tos = h->tos;
 		hop = h->ttl;
-		ipoput4(f, bp, 1, hop - 1, tos, &conv);
+		ipoput4(f, bp, 1, hop - 1, tos, &rh);
 		return;
 	}
 
+	/* If this is not routed strip off the options */
+	if(hl > IP4HDR) {
+		hl -= IP4HDR;
+		len -= hl;
+		bp->rp += hl;
+		memmove(bp->rp, h, IP4HDR);
+		h = (Ip4hdr*)bp->rp;
+		h->vihl = IP_VER4|IP_HLEN4;
+		hnputs(h->length, len);
+	}
+
 	frag = nhgets(h->frag);
-	if(frag) {
-		h->tos = 0;
-		if(frag & IP_MF)
-			h->tos = 1;
-		bp = ip4reassemble(ip, frag, bp, h);
+	if(frag & (IP_MF|IP_FO)) {
+		bp = ip4reassemble(ip, frag, bp);
 		if(bp == nil)
 			return;
-		h = (Ip4hdr*)(bp->rp);
+		h = (Ip4hdr*)bp->rp;
 	}
 
-	/* don't let any frag info go up the stack */
-	h->frag[0] = 0;
-	h->frag[1] = 0;
-
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
 	}
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -550,45 +389,43 @@
 	int i;
 
 	ip = f->ip;
-	ip->stats[DefaultTTL] = MAXTTL;
-
 	p = buf;
 	e = p+len;
-	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+	for(i = 0; i < Nipstats; i++)
+		p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
 	return p - buf;
 }
 
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
 {
-	int fend;
+	int ovlap, fragsize, len;
+	ulong src, dst;
 	ushort id;
+	Block *bl, **l, *prev;
 	Fragment4 *f, *fnext;
-	ulong src, dst;
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Ipfrag *fp, *fq;
+	Ip4hdr *ih;
 
+	/*
+	 *  block lists are too hard, concatblock into a single block
+	 */
+	bp = concatblock(bp);
+
+	ih = (Ip4hdr*)bp->rp;
 	src = nhgetl(ih->src);
 	dst = nhgetl(ih->dst);
 	id = nhgets(ih->id);
+	fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
 
-	/*
-	 *  block lists are too hard, pullupblock into a single block
-	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip4hdr*)(bp->rp);
-	}
-
 	qlock(&ip->fraglock4);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead4; f; f = fnext){
+	for(f = ip->flisthead4; f != nil; f = fnext){
 		fnext = f->next;	/* because ipfragfree4 changes the list */
-		if(f->src == src && f->dst == dst && f->id == id)
+		if(f->id == id && f->src == src && f->dst == dst)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+	if((offset & (IP_MF|IP_FO)) == 0) {
 		if(f != nil) {
-			ipfragfree4(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree4(ip, f);
 		}
 		qunlock(&ip->fraglock4);
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset<<3;
-	BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = (offset & IP_FO)<<3;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -627,8 +465,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock4);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock4);
+
 		return nil;
 	}
 
@@ -638,7 +477,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -645,15 +484,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock4);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -662,26 +502,26 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 		/* Take completely covered segments out */
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
 			if(ovlap <= 0)
 				break;
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
-				(*l)->rp += ovlap;
+			if(ovlap < fq->flen) {
+				/* move up ip header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
 				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -689,35 +529,50 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  without IP_MF set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-		if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
-			bl = f->blist;
-			len = nhgets(BLKIP(bl)->length);
-			bl->wp = bl->rp + len;
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += IP4HDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		ih = (Ip4hdr*)bl->rp;
+		if(ih->frag[0]&(IP_MF>>8))
+			continue;
 
-			bl = f->blist;
-			f->blist = nil;
+		bl = f->blist;
+		len = BLEN(bl);
+
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
+
+		if(len >= IP_MAX){
 			ipfragfree4(ip, f);
-			ih = BLKIP(bl);
-			hnputs(ih->length, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock4);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree4(ip, f);
+
+		ih = (Ip4hdr*)bl->rp;
+		ih->frag[0] = 0;
+		ih->frag[1] = 0;
+		hnputs(ih->length, len);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock4);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock4);
 	return nil;
@@ -726,20 +581,20 @@
 /*
  * ipfragfree4 - Free a list of fragments - assume hold fraglock4
  */
-void
+static void
 ipfragfree4(IP *ip, Fragment4 *frag)
 {
 	Fragment4 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	frag->src = 0;
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	frag->src = 0;
+	frag->dst = 0;
 
 	l = &ip->flisthead4;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -755,7 +610,7 @@
 /*
  * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
  */
-Fragment4 *
+static Fragment4*
 ipfragallo4(IP *ip)
 {
 	Fragment4 *f;
@@ -762,7 +617,7 @@
 
 	while(ip->fragfree4 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead4; f->next; f = f->next)
+		for(f = ip->flisthead4; f->next != nil; f = f->next)
 			;
 		ipfragfree4(ip, f);
 	}
--- a/os/ip/ip.h
+++ b/os/ip/ip.h
@@ -1,35 +1,33 @@
 typedef struct	Conv	Conv;
+typedef struct	Fragment4 Fragment4;
+typedef struct	Fragment6 Fragment6;
 typedef struct	Fs	Fs;
 typedef union	Hwaddr	Hwaddr;
 typedef struct	IP	IP;
 typedef struct	IPaux	IPaux;
+typedef struct	Ip4hdr	Ip4hdr;
+typedef struct	Ipfrag	Ipfrag;
 typedef struct	Ipself	Ipself;
 typedef struct	Ipselftab	Ipselftab;
 typedef struct	Iplink	Iplink;
 typedef struct	Iplifc	Iplifc;
 typedef struct	Ipmulti	Ipmulti;
-typedef struct	IProuter IProuter;
 typedef struct	Ipifc	Ipifc;
 typedef struct	Iphash	Iphash;
 typedef struct	Ipht	Ipht;
 typedef struct	Netlog	Netlog;
-typedef struct	Ifclog	Ifclog;
 typedef struct	Medium	Medium;
 typedef struct	Proto	Proto;
 typedef struct	Arpent	Arpent;
 typedef struct	Arp Arp;
 typedef struct	Route	Route;
+typedef struct	Routehint Routehint;
 
 typedef struct	Routerparams	Routerparams;
 typedef struct 	Hostparams	Hostparams;
-typedef struct 	V6router	V6router;
-typedef struct	V6params	V6params;
+typedef struct	v6params	v6params;
 
-typedef struct Ip4hdr     Ip4hdr;
-typedef struct Nat	Nat;
-
 #pragma incomplete Arp
-#pragma	incomplete Ifclog
 #pragma incomplete Ipself
 #pragma incomplete Ipselftab
 #pragma incomplete IP
@@ -39,10 +37,9 @@
 {
 	Addrlen=	64,
 	Maxproto=	20,
-	Nhash=		64,
-	Maxincall=	5,
-	Nchans=		16383,
-	MAClen=		16,		/* longest mac address */
+	Maxincall=	10,
+	Nchans=		1024,
+	MAClen=		8,		/* longest mac address */
 
 	MAXTTL=		255,
 	DFLTTOS=	0,
@@ -57,6 +54,12 @@
 	V6=		6,
 	IP_VER4= 	0x40,
 	IP_VER6=	0x60,
+	IP_HLEN4=	5,		/* v4: Header length in words */
+	IP_DF=		0x4000,		/* v4: Don't fragment */
+	IP_MF=		0x2000,		/* v4: More fragments */
+	IP_FO=		0x1fff,		/* v4: Fragment offset */
+	IP4HDR=		IP_HLEN4<<2,	/* sizeof(Ip4hdr) */
+	IP_MAX=		64*1024,	/* Max. Internet packet size, v4 & v6 */
 
 	/* 2^Lroot trees in the root table */
 	Lroot=		10,
@@ -73,6 +76,79 @@
 	Connected=	4,
 };
 
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nipstats,
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+	uchar	payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+	uvlong		stats[Nipstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
 /* on the wire packet header */
 struct Ip4hdr
 {
@@ -86,9 +162,14 @@
 	uchar	cksum[2];	/* Header checksum */
 	uchar	src[4];		/* IP source */
 	uchar	dst[4];		/* IP destination */
-	uchar	data[1];	/* start of data */
 };
 
+struct Routehint
+{
+	Route	*r;			/* last route used */
+	ulong	rgen;			/* routetable generation for *r */
+};
+
 /*
  *  one per conversation directory
  */
@@ -100,9 +181,9 @@
 	Proto*	p;
 
 	int	restricted;		/* remote port is restricted */
+	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 	uint	ttl;			/* max time to live */
 	uint	tos;			/* type of service */
-	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 
 	uchar	ipversion;
 	uchar	laddr[IPaddrlen];	/* local IP address */
@@ -139,8 +220,7 @@
 
 	void*	ptcl;			/* protocol specific stuff */
 
-	Route	*r;			/* last route used */
-	ulong	rgen;			/* routetable generation for *r */
+	Routehint;
 };
 
 struct Medium
@@ -161,18 +241,8 @@
 	/* process packets written to 'data' */
 	void	(*pktin)(Fs *f, Ipifc *ifc, Block *bp);
 
-	/* routes for router boards */
-	void	(*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
-	void	(*remroute)(Ipifc *ifc, int, uchar*, uchar*);
-	void	(*flushroutes)(Ipifc *ifc);
-
-	/* for routing multicast groups */
-	void	(*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
-	void	(*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
 	/* address resolution */
-	void	(*ares)(Fs*, int, uchar*, uchar*, int, int);	/* resolve */
-	void	(*areg)(Ipifc*, uchar*);			/* register */
+	void	(*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 
 	/* v6 address generation */
 	void	(*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
 	uchar	mask[IPaddrlen];
 	uchar	remote[IPaddrlen];
 	uchar	net[IPaddrlen];
+	uchar	type;		/* route type */
 	uchar	tentative;	/* =1 => v6 dup disc on, =0 => confirmed unique */
 	uchar	onlink;		/* =1 => onlink, =0 offlink. */
 	uchar	autoflag;	/* v6 autonomous flag */
-	long 	validlt;	/* v6 valid lifetime */
-	long 	preflt;		/* v6 preferred lifetime */
-	long	origint;	/* time when addr was added */
+	ulong 	validlt;	/* v6 valid lifetime */
+	ulong 	preflt;		/* v6 preferred lifetime */
+	ulong	origint;	/* time when addr was added */
 	Iplink	*link;		/* addresses linked to this lifc */
 	Iplifc	*next;
 };
@@ -203,25 +274,25 @@
 	Ipself	*self;
 	Iplifc	*lifc;
 	Iplink	*selflink;	/* next link for this local address */
-	Iplink	*lifclink;	/* next link for this ifc */
-	ulong	expire;
+	Iplink	*lifclink;	/* next link for this lifc */
 	Iplink	*next;		/* free list */
+	ulong	expire;
 	int	ref;
 };
 
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
 
 /* default values, one per stack */
 struct Routerparams {
-	int	mflag;
-	int	oflag;
-	int 	maxraint;
-	int	minraint;
-	int	linkmtu;
-	int	reachtime;
-	int	rxmitra;
-	int	ttl;
-	int	routerlt;	
+	int	mflag;		/* flag: managed address configuration */
+	int	oflag;		/* flag: other stateful configuration */
+	int 	maxraint;	/* max. router adv interval (ms) */
+	int	minraint;	/* min. router adv interval (ms) */
+	int	linkmtu;	/* mtu options */
+	int	reachtime;	/* reachable time */
+	int	rxmitra;	/* retransmit interval */
+	int	ttl;		/* cur hop count limit */
+	int	routerlt;	/* router lifetime */
 };
 
 struct Hostparams {
@@ -231,22 +302,18 @@
 struct Ipifc
 {
 	RWlock;
-	
+
 	Conv	*conv;		/* link to its conversation structure */
 	char	dev[64];	/* device we're attached to */
 	Medium	*m;		/* Media pointer */
 	int	maxtu;		/* Maximum transfer unit */
 	int	mintu;		/* Minumum tranfer unit */
-	int	mbps;		/* megabits per second */
 	void	*arg;		/* medium specific */
-	int	reassemble;	/* reassemble IP packets before forwarding */
 
-	/* these are used so that we can unbind on the fly */
-	Lock	idlock;
+	uchar	reflect;	/* allow forwarded packets to go out the same interface */
+	uchar	reassemble;	/* reassemble IP packets before forwarding to this interface */
+	
 	uchar	ifcid;		/* incremented each 'bind/unbind/add/remove' */
-	int	ref;		/* number of proc's using this ipifc */
-	Rendez	wait;		/* where unbinder waits for ref == 0 */
-	int	unbinding;
 
 	uchar	mac[MAClen];	/* MAC address */
 
@@ -255,10 +322,16 @@
 	ulong	in, out;	/* message statistics */
 	ulong	inerr, outerr;	/* ... */
 
-	uchar	sendra6;	/* == 1 => send router advs on this ifc	*/
-	uchar	recvra6;	/* == 1 => recv router advs on this ifc */
-	Routerparams rp;	/* router parameters as in RFC 2461, pp.40--43. 
+	uchar	sendra6;	/* flag: send router advs on this ifc */
+	uchar	recvra6;	/* flag: recv router advs on this ifc */
+	Routerparams rp;	/* router parameters as in RFC 2461, pp.40—43.
 					used only if node is router */
+
+	int	speed;		/* link speed in bits per second */
+	int	delay;		/* burst delay in ms */
+	int	burst;		/* burst delay in bytes */
+	int	load;		/* bytes in flight */
+	ulong	ticks;
 };
 
 /*
@@ -330,20 +403,11 @@
 	int		nc;		/* number of conversations */
 	int		ac;
 	Qid		qid;		/* qid for protocol directory */
-	ushort		nextport;
 	ushort		nextrport;
 
 	void		*priv;
 };
 
-/*
- *  Stream for sending packets to user level
- */
-struct IProuter {
-	QLock;
-	int	opens;
-	Queue	*q;
-};
 
 /*
  *  one per IP protocol stack
@@ -362,8 +426,7 @@
 	IP	*ip;
 	Ipselftab	*self;
 	Arp	*arp;
-	V6params	*v6p;
-	IProuter iprouter;
+	v6params	*v6p;
 
 	Route	*v4root[1<<Lroot];	/* v4 routing forest */
 	Route	*v6root[1<<Lroot];	/* v6 routing forest */
@@ -370,7 +433,6 @@
 	Route	*queue;			/* used as temp when reinjecting routes */
 
 	Netlog	*alog;
-	Ifclog	*ilog;
 
 	char	ndb[1024];		/* an ndb entry for this interface */
 	int	ndbvers;
@@ -377,23 +439,10 @@
 	long	ndbmtime;
 };
 
-/* one per default router known to host */
-struct V6router {
-	uchar	inuse;
-	Ipifc	*ifc;
-	int	ifcid;
-	uchar	routeraddr[IPaddrlen];
-	long	ltorigin;
-	Routerparams	rp;
-};
-
-struct V6params
+struct v6params
 {
 	Routerparams	rp;		/* v6 params, one copy per node now */
 	Hostparams	hp;
-	V6router	v6rlist[3];	/* max 3 default routers, currently */
-	int		cdrouter;	/* uses only v6rlist[cdrouter] if   */ 
-					/* cdrouter >= 0. */
 };
 
 
@@ -410,8 +459,7 @@
 char*	Fsstdbind(Conv*, char**, int);
 ulong	scalednconv(void);
 void	closeconv(Conv*);
-
-/* 
+/*
  *  logging
  */
 enum
@@ -434,7 +482,6 @@
 	Logrudpmsg=	1<<16,
 	Logesp=		1<<17,
 	Logtcpwin=	1<<18,
-	Lognat=		1<<19,
 };
 
 void	netloginit(Fs*);
@@ -449,17 +496,17 @@
 void	ifclogopen(Fs*, Chan*);
 void	ifclogclose(Fs*, Chan*);
 
+#pragma varargck argpos netlog	3
+
 /*
  *  iproute.c
  */
 typedef	struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
 typedef struct V4route V4route;
 typedef struct V6route V6route;
 
 enum
 {
-
 	/* type bits */
 	Rv4=		(1<<0),		/* this is a version 4 route */
 	Rifc=		(1<<1),		/* this route is a directly connected interface */
@@ -468,27 +515,18 @@
 	Rbcast=		(1<<4),		/* a broadcast self address */
 	Rmulti=		(1<<5),		/* a multicast self address */
 	Rproxy=		(1<<6),		/* this route should be proxied */
+	Rsrc=		(1<<7),		/* source specific route */
 };
 
-struct Routewalk
-{
-	int	o;
-	int	h;
-	char*	p;
-	char*	e;
-	void*	state;
-	void	(*walk)(Route*, Routewalk*);
-};
-
 struct	RouteTree
 {
-	Route*	right;
-	Route*	left;
-	Route*	mid;
+	Route	*mid;
+	Route	*left;
+	Route	*right;
+	Ipifc	*ifc;
+	uchar	ifcid;		/* must match ifc->id */
 	uchar	depth;
 	uchar	type;
-	uchar	ifcid;		/* must match ifc->id */
-	Ipifc	*ifc;
 	char	tag[4];
 	int	ref;
 };
@@ -497,6 +535,10 @@
 {
 	ulong	address;
 	ulong	endaddress;
+
+	ulong	source;
+	ulong	endsource;
+
 	uchar	gate[IPv4addrlen];
 };
 
@@ -504,6 +546,10 @@
 {
 	ulong	address[IPllen];
 	ulong	endaddress[IPllen];
+
+	ulong	source[IPllen];
+	ulong	endsource[IPllen];
+
 	uchar	gate[IPaddrlen];
 };
 
@@ -516,17 +562,16 @@
 		V4route v4;
 	};
 };
-extern void	v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void	v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route*	v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route*	v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void	addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void	remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route*	v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v4source(Fs *f, uchar *a, uchar *s);
+extern Route*	v6source(Fs *f, uchar *a, uchar *s);
 extern long	routeread(Fs *f, char*, ulong, int);
 extern long	routewrite(Fs *f, Chan*, char*, int);
-extern void	routetype(int, char*);
-extern void	ipwalkroutes(Fs*, Routewalk*);
-extern void	convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void	routetype(int type, char p[8]);
 
 /*
  *  devip.c
@@ -543,7 +588,6 @@
 };
 
 extern IPaux*	newipaux(char*, char*);
-extern void	setlport(Conv*);
 
 /*
  *  arp.c
@@ -552,18 +596,16 @@
 {
 	uchar	ip[IPaddrlen];
 	uchar	mac[MAClen];
-	Medium	*type;			/* media type */
-	Arpent*	hash;
-	Block*	hold;
-	Block*	last;
-	uint	ctime;			/* time entry was created or refreshed */
-	uint	utime;			/* time entry was last used */
-	uchar	state;
+	Arpent	*hash;
 	Arpent	*nextrxt;		/* re-transmit chain */
-	uint	rtime;			/* time for next retransmission */
-	uchar	rxtsrem;
+	Block	*hold;
+	Block	*last;
 	Ipifc	*ifc;
 	uchar	ifcid;			/* must match ifc->id */
+	uchar	state;
+	uchar	rxtsrem;		/* re-tranmissions remaining */
+	ulong	ctime;			/* time entry was created or refreshed */
+	ulong	utime;			/* time entry was last used */
 };
 
 extern void	arpinit(Fs*);
@@ -572,15 +614,17 @@
 extern Arpent*	arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
 extern void	arprelease(Arp*, Arpent *a);
 extern Block*	arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void	arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int	arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void	ndpsendsol(Fs*, Ipifc*, Arpent*);
 
 /*
  * ipaux.c
  */
 
-extern int	myetheraddr(uchar*, char*);
-extern ulong	parseip(uchar*, char*);
-extern ulong	parseipmask(uchar*, char*);
+extern int	parseether(uchar*, char*);
+extern vlong	parseip(uchar*, char*);
+extern vlong	parseipmask(uchar*, char*, int);
+extern vlong	parseipandmask(uchar*, uchar*, char*, char*);
 extern char*	v4parseip(uchar*, char*);
 extern void	maskip(uchar *from, uchar *mask, uchar *to);
 extern int	parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
 extern void	v4tov6(uchar *v6, uchar *v4);
 extern int	v6tov4(uchar *v4, uchar *v6);
 extern int	eipfmt(Fmt*);
+extern int	convipvers(Conv *c);
 
 #define	ipmove(x, y) memmove(x, y, IPaddrlen)
 #define	ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
- 
-#define	ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define	ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
 
 extern uchar IPv4bcast[IPaddrlen];
 extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
 extern Medium	ethermedium;
 extern Medium	nullmedium;
 extern Medium	pktmedium;
-extern Medium	tripmedium;
 
 /*
  *  ipifc.c
@@ -619,33 +660,24 @@
  */
 extern Medium*	ipfindmedium(char *name);
 extern void	addipmedium(Medium *med);
+extern void	ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
 extern int	ipforme(Fs*, uchar *addr);
-extern int	iptentative(Fs*, uchar *addr);
-extern int	ipisbm(uchar *);
-extern int	ipismulticast(uchar *);
-extern Ipifc*	findipifc(Fs*, uchar *remote, int type);
-extern void	findprimaryip(Fs*, uchar*);
+extern int	ipismulticast(uchar *ip);
+extern Ipifc*	findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc*	findipifcstr(Fs *f, char *s);
 extern void	findlocalip(Fs*, uchar *local, uchar *remote);
-extern int	ipv4local(Ipifc *ifc, uchar *addr);
-extern int	ipv6local(Ipifc *ifc, uchar *addr);
-extern int	ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int	ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int	ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
 extern Iplifc*	iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc*	ipremoteonifc(Ipifc *ifc, uchar *ip);
 extern int	ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int	ipismulticast(uchar *ip);
-extern int	ipisbooting(void);
-extern int	ipifccheckin(Ipifc *ifc, Medium *med);
-extern void	ipifccheckout(Ipifc *ifc);
-extern int	ipifcgrab(Ipifc *ifc);
-extern void	ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void	ipifcremroute(Fs*, int, uchar*, uchar*);
 extern void	ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
 extern void	ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
 extern char*	ipifcrem(Ipifc *ifc, char **argv, int argc);
 extern char*	ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
 extern long	ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char*	ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void	ipsendra6(Fs *f, int on);
-
+extern char*	ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char*	ipifcremove6(Ipifc *ifc, char**argv, int argc);
 /*
  *  ip.c
  */
@@ -652,37 +684,26 @@
 extern void	iprouting(Fs*, int);
 extern void	icmpnoconv(Fs*, Block*);
 extern void	icmpcantfrag(Fs*, Block*, int);
-extern void	icmpttlexceeded(Fs*, uchar*, Block*);
+extern void	icmpttlexceeded(Fs*, Ipifc*, Block*);
 extern ushort	ipcsum(uchar*);
 extern void	ipiput4(Fs*, Ipifc*, Block*);
 extern void	ipiput6(Fs*, Ipifc*, Block*);
-extern int	ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int	ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int	ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int	ipoput6(Fs*, Block*, int, int, int, Routehint*);
 extern int	ipstats(Fs*, char*, int);
 extern ushort	ptclbsum(uchar*, int);
 extern ushort	ptclcsum(Block*, int, int);
 extern void	ip_init(Fs*);
-extern void	update_mtucache(uchar*, ulong);
-extern ulong	restrict_mtu(uchar*, ulong);
+extern void	ip_init_6(Fs*);
 
 /*
  * bootp.c
  */
-char*	(*bootp)(Ipifc*);
-int	(*bootpread)(char*, ulong, int);
+extern int	bootpread(char*, ulong, int);
 
 /*
- *  iprouter.c
- */
-void	useriprouter(Fs*, Ipifc*, Block*);
-void	iprouteropen(Fs*);
-void	iprouterclose(Fs*);
-long	iprouterread(Fs*, void*, int);
-
-/*
  *  resolving inferno/plan9 differences
  */
-Chan*		commonfdtochan(int, int, int, int);
 char*		commonuser(void);
 char*		commonerror(void);
 
@@ -695,15 +716,3 @@
  *  global to all of the stack
  */
 extern void	(*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int	nato(Block*, Ipifc*, Fs*);
-extern void	nati(Block*, Ipifc*);
-extern int	natgc(uchar);
-
-extern int	addnataddr(uchar*, uchar*, Iplifc*);
-extern int	removenataddr(uchar*, uchar*, Iplifc*);
-extern void	shownataddr(void);
-extern void flushnataddr(void);
--- a/os/ip/ipaux.c
+++ b/os/ip/ipaux.c
@@ -5,49 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 #include	"ip.h"
-#include  "ipv6.h"
+#include	"ipv6.h"
 
-/*
- *  well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- *  prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0, 0, 0, 0
-};
-
-
 char *v6hdrtypes[Maxhdrtype] =
 {
 	[HBH]		"HopbyHop",
@@ -54,7 +13,7 @@
 	[ICMP]		"ICMP",
 	[IGMP]		"IGMP",
 	[GGP]		"GGP",
-	[IPINIP]		"IP",
+	[IPINIP]	"IP",
 	[ST]		"ST",
 	[TCP]		"TCP",
 	[UDP]		"UDP",
@@ -87,6 +46,7 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+
 uchar v6linklocal[IPaddrlen] = {
 	0xfe, 0x80, 0, 0,
 	0, 0, 0, 0,
@@ -99,26 +59,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6llpreflen = 8;	// link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
-	0xfe, 0xc0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6slpreflen = 6;	// site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
-	0x08, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
+int v6llpreflen = 8;	/* link-local prefix length in bytes */
+
 uchar v6multicast[IPaddrlen] = {
 	0xff, 0, 0, 0,
 	0, 0, 0, 0,
@@ -131,7 +73,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6mcpreflen = 1;	// multicast prefix length
+int v6mcpreflen = 1;	/* multicast prefix length */
+
 uchar v6allnodesN[IPaddrlen] = {
 	0xff, 0x01, 0, 0,
 	0, 0, 0, 0,
@@ -138,6 +81,12 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+uchar v6allroutersN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
 uchar v6allnodesNmask[IPaddrlen] = {
 	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
@@ -144,7 +93,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6aNpreflen = 2;	// all nodes (N) prefix
+int v6aNpreflen = 2;	/* all nodes (N) prefix */
+
 uchar v6allnodesL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -151,19 +101,6 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
-uchar v6allnodesLmask[IPaddrlen] = {
-	0xff, 0xff, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6aLpreflen = 2;	// all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
-	0xff, 0x01, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0x02
-};
 uchar v6allroutersL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -170,12 +107,14 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x02
 };
-uchar v6allroutersS[IPaddrlen] = {
-	0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
 	0, 0, 0, 0,
-	0, 0, 0, 0x02
+	0, 0, 0, 0
 };
+int v6aLpreflen = 2;	/* all nodes (L) prefix */
+
 uchar v6solicitednode[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -190,9 +129,6 @@
 };
 int v6snpreflen = 13;
 
-
-
-
 ushort
 ptclcsum(Block *bp, int offset, int len)
 {
@@ -215,7 +151,7 @@
 	if(bp->next == nil) {
 		if(blocklen < len)
 			len = blocklen;
-		return ~ptclbsum(addr, len) & 0xffff;
+		return ptclbsum(addr, len) ^ 0xffff;
 	}
 
 	losum = 0;
@@ -247,7 +183,7 @@
 	while((csum = losum>>16) != 0)
 		losum = csum + (losum & 0xffff);
 
-	return ~losum & 0xffff;
+	return losum ^ 0xffff;
 }
 
 enum
@@ -255,306 +191,9 @@
 	Isprefix= 16,
 };
 
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
-	char buf[5*8];
-	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
-	static char *ifmt = "%d.%d.%d.%d";
-	uchar *p, ip[16];
-	ulong *lp;
-	ushort s;
-	int i, j, n, eln, eli;
-
-	switch(f->r) {
-	case 'E':		/* Ethernet address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
-	case 'I':		/* Ip address */
-		p = va_arg(f->args, uchar*);
-common:
-		if(memcmp(p, v4prefix, 12) == 0)
-			return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
-		/* find longest elision */
-		eln = eli = -1;
-		for(i = 0; i < 16; i += 2){
-			for(j = i; j < 16; j += 2)
-				if(p[j] != 0 || p[j+1] != 0)
-					break;
-			if(j > i && j - i > eln){
-				eli = i;
-				eln = j - i;
-			}
-		}
-
-		/* print with possible elision */
-		n = 0;
-		for(i = 0; i < 16; i += 2){
-			if(i == eli){
-				n += sprint(buf+n, "::");
-				i += eln;
-				if(i >= 16)
-					break;
-			} else if(i != 0)
-				n += sprint(buf+n, ":");
-			s = (p[i]<<8) + p[i+1];
-			n += sprint(buf+n, "%ux", s);
-		}
-		return fmtstrcpy(f, buf);
-
-	case 'i':		/* v6 address as 4 longs */
-		lp = va_arg(f->args, ulong*);
-		for(i = 0; i < 4; i++)
-			hnputl(ip+4*i, *lp++);
-		p = ip;
-		goto common;
-
-	case 'V':		/* v4 ip address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
-	case 'M':		/* ip mask */
-		p = va_arg(f->args, uchar*);
-
-		/* look for a prefix mask */
-		for(i = 0; i < 16; i++)
-			if(p[i] != 0xff)
-				break;
-		if(i < 16){
-			if((prefixvals[p[i]] & Isprefix) == 0)
-				goto common;
-			for(j = i+1; j < 16; j++)
-				if(p[j] != 0)
-					goto common;
-			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
-		} else
-			n = 8*16;
-
-		/* got one, use /xx format */
-		return fmtprint(f, "/%d", n);
-	}
-	return fmtstrcpy(f, "(eipfmt)");
-}
-
 #define CLASS(p) ((*(uchar*)(p))>>6)
 
-extern char*
-v4parseip(uchar *to, char *from)
-{
-	int i;
-	char *p;
-
-	p = from;
-	for(i = 0; i < 4 && *p; i++){
-		to[i] = strtoul(p, &p, 0);
-		if(*p == '.')
-			p++;
-	}
-	switch(CLASS(to)){
-	case 0:	/* class A - 1 uchar net */
-	case 1:
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = to[1];
-			to[1] = 0;
-		} else if(i == 2){
-			to[3] = to[1];
-			to[1] = 0;
-		}
-		break;
-	case 2:	/* class B - 2 uchar net */
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = 0;
-		}
-		break;
-	}
-	return p;
-}
-
-int
-isv4(uchar *ip)
-{
-	return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- *  the following routines are unrolled with no memset's to speed
- *  up the usual case
- */
 void
-v4tov6(uchar *v6, uchar *v4)
-{
-	v6[0] = 0;
-	v6[1] = 0;
-	v6[2] = 0;
-	v6[3] = 0;
-	v6[4] = 0;
-	v6[5] = 0;
-	v6[6] = 0;
-	v6[7] = 0;
-	v6[8] = 0;
-	v6[9] = 0;
-	v6[10] = 0xff;
-	v6[11] = 0xff;
-	v6[12] = v4[0];
-	v6[13] = v4[1];
-	v6[14] = v4[2];
-	v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
-	if(v6[0] == 0
-	&& v6[1] == 0
-	&& v6[2] == 0
-	&& v6[3] == 0
-	&& v6[4] == 0
-	&& v6[5] == 0
-	&& v6[6] == 0
-	&& v6[7] == 0
-	&& v6[8] == 0
-	&& v6[9] == 0
-	&& v6[10] == 0xff
-	&& v6[11] == 0xff)
-	{
-		v4[0] = v6[12];
-		v4[1] = v6[13];
-		v4[2] = v6[14];
-		v4[3] = v6[15];
-		return 0;
-	} else {
-		memset(v4, 0, 4);
-		return -1;
-	}
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
-	int i, elipsis = 0, v4 = 1;
-	ulong x;
-	char *p, *op;
-
-	memset(to, 0, IPaddrlen);
-	p = from;
-	for(i = 0; i < 16 && *p; i+=2){
-		op = p;
-		x = strtoul(p, &p, 16);
-		if(*p == '.' || (*p == 0 && i == 0)){
-			p = v4parseip(to+i, op);
-			i += 4;
-			break;
-		} else {
-			to[i] = x>>8;
-			to[i+1] = x;
-		}
-		if(*p == ':'){
-			v4 = 0;
-			if(*++p == ':'){
-				elipsis = i+2;
-				p++;
-			}
-		}
-	}
-	if(i < 16){
-		memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
-		memset(&to[elipsis], 0, 16-i);
-	}
-	if(v4){
-		to[10] = to[11] = 0xff;
-		return nhgetl(to+12);
-	} else
-		return 6;
-}
-
-/*
- *  hack to allow ip v4 masks to be entered in the old
- *  style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
-	ulong x;
-	int i;
-	uchar *p;
-
-	if(*from == '/'){
-		/* as a number of prefix bits */
-		i = atoi(from+1);
-		if(i < 0)
-			i = 0;
-		if(i > 128)
-			i = 128;
-		memset(to, 0, IPaddrlen);
-		for(p = to; i >= 8; i -= 8)
-			*p++ = 0xff;
-		if(i > 0)
-			*p = ~((1<<(8-i))-1);
-		x = nhgetl(to+IPv4off);
-	} else {
-		/* as a straight bit mask */
-		x = parseip(to, from);
-		if(memcmp(to, v4prefix, IPv4off) == 0)
-			memset(to, 0xff, IPv4off);
-	}
-	return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
-	int i;
-
-	for(i = 0; i < IPaddrlen; i++)
-		to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
-	if(isv4(ip))
-		return classmask[ip[IPv4off]>>6];
-	else {
-		if(ipcmp(ip, v6loopback) == 0)
-			return IPallbits;
-		else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
-			return v6linklocalmask;
-		else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
-			return v6sitelocalmask;
-		else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
-			return v6solicitednodemask;
-		else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
-			return v6multicastmask;
-		return IPallbits;
-	}
-}
-
-void
 ipv62smcast(uchar *smcast, uchar *a)
 {
 	assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
 ulong
 iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
 {
-	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
 }
 
 void
@@ -678,7 +317,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address and port */
 	hv = iphash(IPnoaddr, 0, da, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match just port */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address */
 	hv = iphash(IPnoaddr, 0, da, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
 			return c;
 		}
 	}
-	
+
 	/* look for something that matches anything */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
 	}
 	unlock(ht);
 	return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+	if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+		return V4;
+	else
+		return V6;
 }
--- a/os/ip/ipifc.c
+++ b/os/ip/ipifc.c
@@ -11,17 +11,14 @@
 #define DPRINT if(0)print
 
 enum {
-	Maxmedia = 32,
-	Nself = Maxmedia*5,
-	NHASH = (1<<6),
-	NCACHE = 256,
-	QMAX = 64*1024-1,
+	Maxmedia	= 32,
+	Nself		= Maxmedia*5,
+	NHASH		= 1<<6,
+	NCACHE		= 256,
+	QMAX		= 192*1024-1,
 };
 
-Medium *media[Maxmedia] =
-{
-	0
-};
+Medium *media[Maxmedia] = { 0 };
 
 /*
  *  cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
 struct Ipself
 {
 	uchar	a[IPaddrlen];
-	Ipself	*hnext;		/* next address in the hash table */
+	Ipself	*next;		/* next address in the hash table */
 	Iplink	*link;		/* binding twixt Ipself and Ipifc */
 	ulong	expire;
 	uchar	type;		/* type of address */
-	int	ref;
-	Ipself	*next;		/* free list */
 };
 
 struct Ipselftab
@@ -64,11 +59,47 @@
 
 static void	addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
 static void	remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char*	ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char*	ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char*	ipifcremlifc(Ipifc*, Iplifc*);
+static void	ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char*	ipifcremlifc(Ipifc*, Iplifc**);
 
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+	unknownv6,		/* UGH */
+	unspecifiedv6,
+	linklocalv6,
+	globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+	if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+		return unknownv6;
+	else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+	    isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+		return linklocalv6;
+	else
+		return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+	int i, c;
+
+	for(i = 0; i < n; i++){
+		if((c = a[i] ^ b[i]) == 0)
+			continue;
+		for(i <<= 3; (c & 0x80) == 0; i++)
+			c <<= 1;
+		return i;
+	}
+	return i << 3;
+}
+
 /*
  *  link in a new medium
  */
@@ -121,7 +152,7 @@
 	wlock(ifc);
 	if(ifc->m != nil){
 		wunlock(ifc);
-		return "interface already bound";	
+		return Ebound;
 	}
 	if(waserror()){
 		wunlock(ifc);
@@ -142,18 +173,14 @@
 	ifc->m = m;
 	ifc->mintu = ifc->m->mintu;
 	ifc->maxtu = ifc->m->maxtu;
+	ifc->delay = 40;
+	ifc->speed = 0;
 	if(ifc->m->unbindonclose == 0)
 		ifc->conv->inuse++;
-	ifc->rp.mflag = 0;		// default not managed
-	ifc->rp.oflag = 0;
-	ifc->rp.maxraint = 600000;	// millisecs
-	ifc->rp.minraint = 200000;
-	ifc->rp.linkmtu = 0;		// no mtu sent
-	ifc->rp.reachtime = 0;
-	ifc->rp.rxmitra = 0;
-	ifc->rp.ttl = MAXTTL;
-	ifc->rp.routerlt = 3*(ifc->rp.maxraint);
 
+	/* default router paramters */
+	ifc->rp = c->p->f->v6p->rp;
+
 	/* any ancillary structures (like routes) no longer pertain */
 	ifc->ifcid++;
 
@@ -170,29 +197,44 @@
 
 /*
  *  detach a device from an interface, close the interface
- *  called with ifc->conv closed
  */
 static char*
 ipifcunbind(Ipifc *ifc)
 {
-	char *err;
+	Medium *m;
 
-	if(waserror()){
+	wlock(ifc);
+	m = ifc->m;
+	if(m == nil){
 		wunlock(ifc);
-		nexterror();
+		return Eunbound;
 	}
-	wlock(ifc);
 
-	/* dissociate routes */
-	if(ifc->m != nil && ifc->m->unbindonclose == 0)
-		ifc->conv->inuse--;
-	ifc->ifcid++;
+	/* disassociate logical interfaces (before zeroing ifc->arg) */
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 
 	/* disassociate device */
-	if(ifc->m != nil && ifc->m->unbind)
-		(*ifc->m->unbind)(ifc);
+	if(m->unbind != nil){
+		extern Medium nullmedium;
+
+		/*
+		 * unbind() might unlock the ifc, so change the medium
+		 * to the nullmedium to prevent packets from getting
+		 * sent while the medium is shutting down.
+		 */
+		ifc->m = &nullmedium;
+
+		if(!waserror()){
+			(*m->unbind)(ifc);
+			poperror();
+		}
+	}
+
 	memset(ifc->dev, 0, sizeof(ifc->dev));
 	ifc->arg = nil;
+
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 
 	/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
 	qclose(ifc->conv->wq);
 	qclose(ifc->conv->sq);
 
-	/* disassociate logical interfaces */
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
-
+	/* dissociate routes */
+	ifc->ifcid++;
+	if(m->unbindonclose == 0)
+		ifc->conv->inuse--;
 	ifc->m = nil;
 	wunlock(ifc);
-	poperror();
+
 	return nil;
 }
 
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
 
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
 char slineformat[] = "	%-40I %-10M %-40I %-12lud %-12lud\n";
 
-
 static int
 ipifcstate(Conv *c, char *state, int n)
 {
@@ -228,19 +266,18 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
 	m = snprint(state, n, sfixedformat,
 		ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
 		ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
 		ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
 		ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
-		ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+		ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+		ifc->speed, ifc->delay);
 
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
-		m += snprint(state+m, n - m, slineformat,
-			lifc->local, lifc->mask, lifc->remote,
-			lifc->validlt, lifc->preflt);
+	for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+		m += snprint(state+m, n - m, slineformat, lifc->local,
+			lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
 	if(ifc->lifc == nil)
 		m += snprint(state+m, n - m, "\n");
 	runlock(ifc);
@@ -256,13 +293,11 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
-	m = 0;
-
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+	m = 0;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 		m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
-		for(link = lifc->link; link; link = link->lifclink)
+		for(link = lifc->link; link != nil; link = link->lifclink)
 			m += snprint(state+m, n - m, " %-40.40I", link->self->a);
 		m += snprint(state+m, n - m, "\n");
 	}
@@ -279,6 +314,59 @@
 	return ifc->m != nil;
 }
 
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+	int burst;
+
+	burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+	if(burst < ifc->maxtu)
+		burst = ifc->maxtu;
+	ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+	if(delay < 0)
+		delay = 0;
+	else if(delay > 1000)
+		delay = 1000;
+	ifc->delay = delay;
+	ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+	if(speed < 0)
+		speed = 0;
+	ifc->speed = speed;
+	ifc->load = 0;
+	ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	if(ifc->speed){
+		ulong now = MACHP(0)->ticks;
+		int dt = TK2MS(now - ifc->ticks);
+		ifc->ticks = now;
+		ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+		if(ifc->load < 0 || dt < 0 || dt > 1000)
+			ifc->load = 0;
+		else if(ifc->load > ifc->burst){
+			freeblist(bp);
+			return;
+		}
+	}
+	bp = concatblock(bp);
+	ifc->load += BLEN(bp);
+	ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
 /*
  *  called when a process writes to an interface's 'data'
  */
@@ -294,18 +382,15 @@
 		return;
 
 	ifc = (Ipifc*)c->ptcl;
-	if(!canrlock(ifc)){
-		freeb(bp);
-		return;
-	}
+	rlock(ifc);
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
-	if(ifc->m == nil || ifc->m->pktin == nil)
-		freeb(bp);
-	else
+	if(ifc->m != nil && ifc->m->pktin != nil)
 		(*ifc->m->pktin)(c->p->f, ifc, bp);
+	else
+		freeb(bp);
 	runlock(ifc);
 	poperror();
 }
@@ -319,27 +404,26 @@
 	Ipifc *ifc;
 
 	c->rq = qopen(QMAX, 0, 0, 0);
-	c->sq = qopen(2*QMAX, 0, 0, 0);
 	c->wq = qopen(QMAX, Qkick, ipifckick, c);
+	c->sq = qopen(QMAX, 0, 0, 0);
+	if(c->rq == nil || c->wq == nil || c->sq == nil)
+		error(Enomem);
 	ifc = (Ipifc*)c->ptcl;
 	ifc->conv = c;
-	ifc->unbinding = 0;
 	ifc->m = nil;
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 }
 
 /*
  *  called after last close of ipifc data or ctl
- *  called with c locked, we must unlock
  */
 static void
 ipifcclose(Conv *c)
 {
-	Ipifc *ifc;
-	Medium *m;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
+	Medium *m = ifc->m;
 
-	ifc = (Ipifc*)c->ptcl;
-	m = ifc->m;
 	if(m != nil && m->unbindonclose)
 		ipifcunbind(ifc);
 }
@@ -347,19 +431,17 @@
 /*
  *  change an interface's mtu
  */
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
 {
-	int mtu;
+	Medium *m = ifc->m;
 
-	if(argc < 2)
+	if(m == nil)
+		return Eunbound;
+	if(mtu < m->mintu || mtu > m->maxtu)
 		return Ebadarg;
-	if(ifc->m == nil)
-		return Ebadarg;
-	mtu = strtoul(argv[1], 0, 0);
-	if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
-		return Ebadarg;
 	ifc->maxtu = mtu;
+	ipifcadjustburst(ifc);
 	return nil;
 }
 
@@ -374,13 +456,8 @@
 	Iplifc *lifc, **l;
 	int i, type, mtu;
 	Fs *f;
-	int sendnbrdisc = 0;
 
-	if(ifc->m == nil)
-		return "ipifc not yet bound to device";
-
-	f = ifc->conv->p->f;
-
+	mtu = 0;
 	type = Rifc;
 	memset(ip, 0, IPaddrlen);
 	memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
 		/* fall through */
 	case 5:
 		mtu = strtoul(argv[4], 0, 0);
-		if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
-			ifc->maxtu = mtu;
 		/* fall through */
 	case 4:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
-		parseip(rem, argv[3]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+			return Ebadip;
 		maskip(rem, mask, net);
 		break;
 	case 3:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+			return Ebadip;
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
 		break;
 	case 2:
-		parseip(ip, argv[1]);
+		if (parseip(ip, argv[1]) == -1)
+			return Ebadip;
 		memmove(mask, defmask(ip), IPaddrlen);
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
@@ -415,26 +490,55 @@
 		break;
 	default:
 		return Ebadarg;
-		break;
 	}
-	if(isv4(ip))
+
+	/* check for point-to-point interface */
+	if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+	if(ipcmp(mask, IPallbits) == 0)
+		type |= Rptpt;
+
+	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+		type |= Rv4;
 		tentative = 0;
+	}
+
 	wlock(ifc);
+	if(ifc->m == nil){
+		wunlock(ifc);
+		return Eunbound;
+	}
+	f = ifc->conv->p->f;
+	if(waserror()){
+		wunlock(ifc);
+		return up->errstr;
+	}
 
+	if(mtu > 0)
+		ipifcsetmtu(ifc, mtu);
+
 	/* ignore if this is already a local address for this ifc */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, ip) == 0) {
-			if(lifc->tentative != tentative)
-				lifc->tentative = tentative;
-			if(lifcp != nil) {
-				lifc->onlink = lifcp->onlink;
-				lifc->autoflag = lifcp->autoflag;
-				lifc->validlt = lifcp->validlt;
-				lifc->preflt = lifcp->preflt;
-				lifc->origint = lifcp->origint;
+	if((lifc = iplocalonifc(ifc, ip)) != nil){
+		if(lifcp != nil) {
+			if(!lifc->onlink && lifcp->onlink){
+				lifc->onlink = 1;
+				addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+					lifc->remote, lifc->type, ifc, tifc);
+				if(v6addrtype(ip) != linklocalv6)
+					addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+						lifc->remote, lifc->type, ifc, tifc);
 			}
-			goto out;
+			lifc->autoflag = lifcp->autoflag;
+			lifc->validlt = lifcp->validlt;
+			lifc->preflt = lifcp->preflt;
+			lifc->origint = lifcp->origint;
 		}
+		if(lifc->tentative != tentative){
+			lifc->tentative = tentative;
+			goto done;
+		}
+		wunlock(ifc);
+		poperror();
+		return nil;
 	}
 
 	/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
 	ipmove(lifc->mask, mask);
 	ipmove(lifc->remote, rem);
 	ipmove(lifc->net, net);
+	lifc->type = type;
 	lifc->tentative = tentative;
 	if(lifcp != nil) {
 		lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
 		lifc->validlt = lifcp->validlt;
 		lifc->preflt = lifcp->preflt;
 		lifc->origint = lifcp->origint;
+	} else {		/* default values */
+		lifc->onlink = lifc->autoflag = 1;
+		lifc->validlt = lifc->preflt = ~0UL;
+		lifc->origint = NOW / 1000;
 	}
-	else {		// default values
-		lifc->onlink = 1;
-		lifc->autoflag = 1;
-		lifc->validlt = 0xffffffff;
-		lifc->preflt = 0xffffffff;
-		lifc->origint = NOW / 10^3;
-	}
 	lifc->next = nil;
 
-	for(l = &ifc->lifc; *l; l = &(*l)->next)
+	for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
 		;
 	*l = lifc;
 
-	/* check for point-to-point interface */
-	if(ipcmp(ip, v6loopback))  /* skip v6 loopback, it's a special address */
-	if(ipcmp(mask, IPallbits) == 0)
-		type |= Rptpt;
+	/* add route for this logical interface */
+	if(lifc->onlink){
+		addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+		if(v6addrtype(ip) != linklocalv6)
+			addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+	}
 
-	/* add local routes */
-	if(isv4(ip))
-		v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
-	else
-		v6addroute(f, tifc, rem, mask, rem, type);
-
 	addselfcache(f, ifc, lifc, ip, Runi);
 
-	if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
-		ipifcregisterproxy(f, ifc, rem);
-		goto out;
+	/* register proxy */
+	if(type & Rptpt){
+		if(type & Rproxy)
+			ipifcregisterproxy(f, ifc, rem, 1);
+		goto done;
 	}
 
-	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+	if(type & Rv4) {
 		/* add subnet directed broadcast address to the self cache */
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) & mask[i];
 		addselfcache(f, ifc, lifc, bcast, Rbcast);
-		
+
 		addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
-	}
-	else {
+	} else {
 		if(ipcmp(ip, v6loopback) == 0) {
 			/* add node-local mcast address */
 			addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
 
 			/* add route for all node multicast */
-			v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+			addroute(f, v6allnodesN, v6allnodesNmask,
+				ip, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
 		}
 
 		/* add all nodes multicast address */
 		addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-		
+
 		/* add route for all nodes multicast */
-		v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-		
+		addroute(f, v6allnodesL, v6allnodesLmask,
+			ip, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
+
 		/* add solicited-node multicast address */
 		ipv62smcast(bcast, ip);
 		addselfcache(f, ifc, lifc, bcast, Rmulti);
-
-		sendnbrdisc = 1;
 	}
 
-	/* register the address on this network for address resolution */
-	if(isv4(ip) && ifc->m->areg != nil)
-		(*ifc->m->areg)(ifc, ip);
-
-out:
+done:
 	wunlock(ifc);
-	if(tentative && sendnbrdisc)
-		icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+
+	rlock(ifc);
+	ipifcregisteraddr(f, ifc, lifc, ip);
+	runlock(ifc);
+
 	return nil;
 }
 
 /*
  *  remove a logical interface from an ifc
- *  always called with ifc wlock'd
+ *	called with ifc wlock'd
  */
 static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
 {
-	Iplifc **l;
-	Fs *f;
+	Iplifc *lifc = *l;
+	Fs *f = ifc->conv->p->f;
 
-	f = ifc->conv->p->f;
-
-	/*
-	 *  find address on this interface and remove from chain.
-	 *  for pt to pt we actually specify the remote address as the
-	 *  addresss to remove.
-	 */
-	for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
-		;
-	if(*l == nil)
+	if(lifc == nil)
 		return "address not on this interface";
 	*l = lifc->next;
 
 	/* disassociate any addresses */
-	while(lifc->link)
+	while(lifc->link != nil)
 		remselfcache(f, ifc, lifc, lifc->link->self->a);
 
 	/* remove the route for this logical interface */
-	if(isv4(lifc->local))
-		v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
-	else {
-		v6delroute(f, lifc->remote, lifc->mask, 1);
+	if(lifc->onlink){
+		remroute(f, lifc->remote, lifc->mask,
+			lifc->local, IPallbits,
+			lifc->remote, lifc->type, ifc, tifc);
+		if(v6addrtype(lifc->local) != linklocalv6)
+			remroute(f, lifc->remote, lifc->mask,
+				lifc->local, IPnoaddr,
+				lifc->remote, lifc->type, ifc, tifc);
+	}
+
+	/* unregister proxy */
+	if(lifc->type & Rptpt){
+		if(lifc->type & Rproxy)
+			ipifcregisterproxy(f, ifc, lifc->remote, 0);
+		goto done;
+	}
+
+	/* remove route for all nodes multicast */
+	if((lifc->type & Rv4) == 0){
 		if(ipcmp(lifc->local, v6loopback) == 0)
-			/* remove route for all node multicast */
-			v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
-		else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
-			/* remove route for all link multicast */
-			v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+			remroute(f, v6allnodesN, v6allnodesNmask,
+				lifc->local, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
+
+		remroute(f, v6allnodesL, v6allnodesLmask,
+			lifc->local, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
 	}
 
+done:
 	free(lifc);
 	return nil;
-
 }
 
 /*
  *  remove an address from an interface.
- *  called with c locked
  */
 char*
 ipifcrem(Ipifc *ifc, char **argv, int argc)
 {
-	uchar ip[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar rem[IPaddrlen];
-	Iplifc *lifc;
-	char *rv;
+	uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+	Iplifc *lifc, **l;
+	char *err;
 
 	if(argc < 3)
 		return Ebadarg;
-
-	parseip(ip, argv[1]);
-	parseipmask(mask, argv[2]);
+	if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+		return Ebadip;
 	if(argc < 4)
 		maskip(ip, mask, rem);
-	else
-		parseip(rem, argv[3]);
+	else if(parseip(rem, argv[3]) == -1)
+		return Ebadip;
 
-	wlock(ifc);
-
 	/*
 	 *  find address on this interface and remove from chain.
 	 *  for pt to pt we actually specify the remote address as the
 	 *  addresss to remove.
 	 */
+	wlock(ifc);
+	l = &ifc->lifc;
 	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
-		if (memcmp(ip, lifc->local, IPaddrlen) == 0
-		&& memcmp(mask, lifc->mask, IPaddrlen) == 0
-		&& memcmp(rem, lifc->remote, IPaddrlen) == 0)
+		if(ipcmp(ip, lifc->local) == 0
+		&& ipcmp(mask, lifc->mask) == 0
+		&& ipcmp(rem, lifc->remote) == 0)
 			break;
+		l = &lifc->next;
 	}
-
-	rv = ipifcremlifc(ifc, lifc);
+	err = ipifcremlifc(ifc, l);
 	wunlock(ifc);
-	return rv;
+	return err;
 }
 
 /*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->addroute != nil)
-				m->addroute(ifc, vers, addr, mask, gate, type);
-		}
-	}
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->remroute != nil)
-				m->remroute(ifc, vers, addr, mask);
-		}
-	}
-}
-
-/*
  *  associate an address with the interface.  This wipes out any previous
  *  addresses.  This is a macro that means, remove all the old interfaces
  *  and add a new one.
@@ -679,170 +740,89 @@
 static char*
 ipifcconnect(Conv* c, char **argv, int argc)
 {
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 	char *err;
-	Ipifc *ifc;
 
-	ifc = (Ipifc*)c->ptcl;
-
-	if(ifc->m == nil)
-		 return "ipifc not yet bound to device";
-
-	if(waserror()){
-		wunlock(ifc);
-		nexterror();
-	}
 	wlock(ifc);
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 	wunlock(ifc);
-	poperror();
 
 	err = ipifcadd(ifc, argv, argc, 0, nil);
-	if(err)
+	if(err != nil)
 		return err;
 
 	Fsconnected(c, nil);
-
 	return nil;
 }
 
 char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
 {
-	int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+	int i, argsleft;
+	uchar sendra, recvra;
+	Routerparams rp;
 
-	argsleft = argc - 1;
 	i = 1;
-
-	if(argsleft % 2 != 0)
+	argsleft = argc - 1;
+	if((argsleft % 2) != 0)
 		return Ebadarg;
 
+	sendra = ifc->sendra6;
+	recvra = ifc->recvra6;
+	rp = ifc->rp;
+
 	while (argsleft > 1) {
-		if(strcmp(argv[i],"recvra")==0)
-			ifc->recvra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"sendra")==0)
-			ifc->sendra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"mflag")==0)
-			ifc->rp.mflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"oflag")==0)
-			ifc->rp.oflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"maxraint")==0)
-			ifc->rp.maxraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"minraint")==0)
-			ifc->rp.minraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"linkmtu")==0)
-			ifc->rp.linkmtu = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"reachtime")==0)
-			ifc->rp.reachtime = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"rxmitra")==0)
-			ifc->rp.rxmitra = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"ttl")==0)
-			ifc->rp.ttl = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"routerlt")==0)
-			ifc->rp.routerlt = atoi(argv[i+1]);
+		if(strcmp(argv[i], "recvra") == 0)
+			recvra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "sendra") == 0)
+			sendra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "mflag") == 0)
+			rp.mflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "oflag") == 0)
+			rp.oflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "maxraint") == 0)
+			rp.maxraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "minraint") == 0)
+			rp.minraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "linkmtu") == 0)
+			rp.linkmtu = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "reachtime") == 0)
+			rp.reachtime = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "rxmitra") == 0)
+			rp.rxmitra = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "ttl") == 0)
+			rp.ttl = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "routerlt") == 0)
+			rp.routerlt = atoi(argv[i+1]);
 		else
-			return Ebadarg;	
+			return Ebadarg;
 
 		argsleft -= 2;
 		i += 2;
 	}
 
-	// consistency check
-	if(ifc->rp.maxraint < ifc->rp.minraint) {
-		ifc->rp.maxraint = vmax;
-		ifc->rp.minraint = vmin;
+	/* consistency check */
+	if(rp.maxraint < rp.minraint)
 		return Ebadarg;
-	}
 
-	return nil;
-}
+	ifc->rp = rp;
+	ifc->sendra6 = sendra;
+	ifc->recvra6 = recvra;
 
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->sendra6 = (i!=0);
 	return nil;
 }
 
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->recvra6 = (i!=0);	
-	return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
-	uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
-	Iplifc *lifc;
-
-	if(argc == 2){
-		if((strcmp(argv[1], "show") == 0)){
-			shownataddr();
-			return nil;
-		}else if((strcmp(argv[1], "flush") == 0)){
-			flushnataddr();
-			return nil;
-		}else
-			return Ebadarg;
-	}
-
-	if(argc != 5)
-		return Ebadarg;
-
-	if (parseip(src, argv[2]) == -1)
-		return Ebadip;
-
-	if (parseipmask(mask, argv[3]) == -1)
-		return Ebadip;
-
-	if (parseip(dst, argv[4]) == -1)
-		return Ebadip;
-
-	if((lifc=iplocalonifc(ifc, dst)) == nil)
-		return Ebadip;
-
-	if(strcmp(argv[1], "add") == 0){
-		if(addnataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else if(strcmp(argv[1], "remove") == 0){
-		if(removenataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else
-		return Ebadarg;
-
-	return nil;
-}
-
 /*
  *  non-standard control messages.
- *  called with c locked.
  */
 static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
 {
-	Ipifc *ifc;
-	int i;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 
-	ifc = (Ipifc*)c->ptcl;
 	if(strcmp(argv[0], "add") == 0)
 		return ipifcadd(ifc, argv, argc, 0, nil);
-	else if(strcmp(argv[0], "bootp") == 0)
-		return bootp(ifc);
 	else if(strcmp(argv[0], "try") == 0)
 		return ipifcadd(ifc, argv, argc, 1, nil);
 	else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
 		return ipifcrem(ifc, argv, argc);
 	else if(strcmp(argv[0], "unbind") == 0)
 		return ipifcunbind(ifc);
-	else if(strcmp(argv[0], "joinmulti") == 0)
-		return ipifcjoinmulti(ifc, argv, argc);
-	else if(strcmp(argv[0], "leavemulti") == 0)
-		return ipifcleavemulti(ifc, argv, argc);
 	else if(strcmp(argv[0], "mtu") == 0)
-		return ipifcsetmtu(ifc, argv, argc);
-	else if(strcmp(argv[0], "reassemble") == 0){
-		ifc->reassemble = 1;
+		return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+	else if(strcmp(argv[0], "speed") == 0){
+		ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
 		return nil;
 	}
+	else if(strcmp(argv[0], "delay") == 0){
+		ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+		return nil;
+	}
 	else if(strcmp(argv[0], "iprouting") == 0){
-		i = 1;
-		if(argc > 1)
-			i = atoi(argv[1]);
-		iprouting(c->p->f, i);
+		iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
 		return nil;
 	}
-	else if(strcmp(argv[0], "addpref6") == 0)
-		return ipifcaddpref6(ifc, argv, argc);
-	else if(strcmp(argv[0], "setpar6") == 0)
-		return ipifcsetpar6(ifc, argv, argc);
-	else if(strcmp(argv[0], "sendra6") == 0)
-		return ipifcsendra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "recvra6") == 0)
-		return ipifcrecvra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "nat") == 0)
-		return ipifcnat(ifc, argv, argc);
+	else if(strcmp(argv[0], "reflect") == 0){
+		ifc->reflect = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "reassemble") == 0){
+		ifc->reassemble = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "add6") == 0)
+		return ipifcadd6(ifc, argv, argc);
+	else if(strcmp(argv[0], "remove6") == 0)
+		return ipifcremove6(ifc, argv, argc);
+	else if(strcmp(argv[0], "ra6") == 0)
+		return ipifcra6(ifc, argv, argc);
 	return "unsupported ctl";
 }
 
+int
 ipifcstats(Proto *ipifc, char *buf, int len)
 {
 	return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
 	ipifc->nc = Maxmedia;
 	ipifc->ptclsize = sizeof(Ipifc);
 
-	f->ipifc = ipifc;			/* hack for ipifcremroute, findipifc, ... */
+	f->ipifc = ipifc;	/* hack for ipifcremroute, findipifc, ... */
 	f->self = smalloc(sizeof(Ipselftab));	/* hack for ipforme */
 
 	Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
 
 /*
  *  add to self routing cache
- *	called with c locked
  */
 static void
 addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
 {
-	Ipself *p;
 	Iplink *lp;
+	Ipself *p;
 	int h;
 
+	type |= (lifc->type & Rv4);
 	qlock(f->self);
+	if(waserror()){
+		qunlock(f->self);
+		nexterror();
+	}
 
 	/* see if the address already exists */
 	h = hashipa(a);
-	for(p = f->self->hash[h]; p; p = p->next)
-		if(memcmp(a, p->a, IPaddrlen) == 0)
+	for(p = f->self->hash[h]; p != nil; p = p->next)
+		if(ipcmp(a, p->a) == 0)
 			break;
 
 	/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
 	}
 
 	/* look for a link for this lifc */
-	for(lp = p->link; lp; lp = lp->selflink)
+	for(lp = p->link; lp != nil; lp = lp->selflink)
 		if(lp->lifc == lifc)
 			break;
 
@@ -962,18 +948,19 @@
 		lifc->link = lp;
 
 		/* add to routing table */
-		if(isv4(a))
-			v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
-		else
-			v6addroute(f, tifc, a, IPallbits, a, type);
+		addroute(f, a, IPallbits,
+			lifc->local, 
+			((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+				IPallbits : IPnoaddr,
+			a, type, ifc, tifc);
 
 		if((type & Rmulti) && ifc->m->addmulti != nil)
 			(*ifc->m->addmulti)(ifc, a, lifc->local);
-	} else {
+	} else
 		lp->ref++;
-	}
 
 	qunlock(f->self);
+	poperror();
 }
 
 /*
@@ -992,8 +979,8 @@
 	ulong now = NOW;
 
 	l = &freeiplink;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1000,10 +987,11 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
+
 static void
 ipselffree(Ipself *p)
 {
@@ -1011,8 +999,8 @@
 	ulong now = NOW;
 
 	l = &freeipself;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1019,7 +1007,7 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
@@ -1027,7 +1015,6 @@
 /*
  *  Decrement reference for this address on this link.
  *  Unlink from selftab if this is the last ref.
- *	called with c locked
  */
 static void
 remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
 
 	/* find the unique selftab entry */
 	l = &f->self->hash[hashipa(a)];
-	for(p = *l; p; p = *l){
+	for(p = *l; p != nil; p = *l){
 		if(ipcmp(p->a, a) == 0)
 			break;
 		l = &p->next;
@@ -1053,7 +1040,7 @@
 	 *  that matches the selftab entry
 	 */
 	l_lifc = &lifc->link;
-	for(link = *l_lifc; link; link = *l_lifc){
+	for(link = *l_lifc; link != nil; link = *l_lifc){
 		if(link->self == p)
 			break;
 		l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
 	 *  the one we just found
 	 */
 	l_self = &p->link;
-	for(link = *l_self; link; link = *l_self){
-		if(link == *(l_lifc))
+	for(link = *l_self; link != nil; link = *l_self){
+		if(link == *l_lifc)
 			break;
 		l_self = &link->selflink;
 	}
@@ -1079,9 +1066,20 @@
 	if(--(link->ref) != 0)
 		goto out;
 
-	if((p->type & Rmulti) && ifc->m->remmulti != nil)
-		(*ifc->m->remmulti)(ifc, a, lifc->local);
+	/* remove from routing table */
+	remroute(f, a, IPallbits,
+		lifc->local, 
+		((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+			IPallbits : IPnoaddr,
+		a, p->type, ifc, tifc);
 
+	if((p->type & Rmulti) && ifc->m->remmulti != nil){
+		if(!waserror()){
+			(*ifc->m->remmulti)(ifc, a, lifc->local);
+			poperror();
+		}
+	}
+
 	/* ref == 0, remove from both chains and free the link */
 	*l_lifc = link->lifclink;
 	*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
 	if(p->link != nil)
 		goto out;
 
-	/* remove from routing table */
-	if(isv4(a))
-		v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
-	else
-		v6delroute(f, a, IPallbits, 1);
-	
+	/* if null address, forget */
+	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+		f->self->acceptall = 0;
+
 	/* no more links, remove from hash and free */
 	*l = p->next;
 	ipselffree(p);
 
-	/* if IPnoaddr, forget */
-	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
-		f->self->acceptall = 0;
-
 out:
 	qunlock(f->self);
 }
 
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
-	Nstformat= 41,
-};
-
 long
 ipselftabread(Fs *f, char *cp, ulong offset, int n)
 {
@@ -1124,14 +1110,14 @@
 
 	m = 0;
 	off = offset;
-	qlock(f->self);
 	for(i = 0; i < NHASH && m < n; i++){
 		for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
 			nifc = 0;
-			for(link = p->link; link; link = link->selflink)
+			for(link = p->link; link != nil; link = link->selflink)
 				nifc++;
 			routetype(p->type, state);
-			m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+			m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+				p->a, nifc, state);
 			if(off > 0){
 				off -= m;
 				m = 0;
@@ -1138,30 +1124,15 @@
 			}
 		}
 	}
-	qunlock(f->self);
 	return m;
 }
 
-int
-iptentative(Fs *f, uchar *addr)
-{
- 	Ipself *p;
-
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
-		if(ipcmp(addr, p->a) == 0) {
-			return p->link->lifc->tentative;
-		}
-	}
-	return 0;
-}
-
 /*
  *  returns
  *	0		- no match
  *	Runi
  *	Rbcast
- *	Rmcast
+ *	Rmulti
  */
 int
 ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
 {
 	Ipself *p;
 
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
+	for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
 		if(ipcmp(addr, p->a) == 0)
-			return p->type;
-	}
+			return p->type & (Runi|Rbcast|Rmulti);
 
 	/* hack to say accept anything */
 	if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
  *  return nil.
  */
 Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
 {
+	uchar gnet[IPaddrlen];
+	int spec, xspec;
 	Ipifc *ifc, *x;
 	Iplifc *lifc;
-	Conv **cp, **e;
-	uchar gnet[IPaddrlen];
-	uchar xmask[IPaddrlen];
+	Conv **cp;
 
-	x = nil; memset(xmask, 0, IPaddrlen);
-
-	/* find most specific match */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
-
+	x = nil;
+	xspec = 0;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!canrlock(ifc))
+			continue;
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if(type & Runi){
+				if(ipcmp(remote, lifc->local) == 0){
+				Found:
+					runlock(ifc);
+					return ifc;
+				}
+			} else if(type & (Rbcast|Rmulti)) {
+				if(ipcmp(local, lifc->local) == 0)
+					goto Found;
+			}
 			maskip(remote, lifc->mask, gnet);
 			if(ipcmp(gnet, lifc->net) == 0){
-				if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+				spec = comprefixlen(remote, lifc->local, IPaddrlen);
+				if(spec > xspec){
 					x = ifc;
-					ipmove(xmask, lifc->mask);
+					xspec = spec;
 				}
 			}
 		}
+		runlock(ifc);
 	}
-	if(x != nil)
-		return x;
+	return x;
+}
 
-	/* for now for broadcast and multicast, just use first interface */
-	if(type & (Rbcast|Rmulti)){
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == 0)
-				continue;
-			ifc = (Ipifc*)(*cp)->ptcl;
-			if(ifc->lifc != nil)
-				return ifc;
-		}
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+	uchar ip[IPaddrlen];
+	Conv *c;
+	char *p;
+	long x;
+
+	x = strtol(s, &p, 10);
+	if(p > s && *p == '\0'){
+		if(x < 0)
+			return nil;
+		if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+			return (Ipifc*)c->ptcl;
 	}
-		
+	if(parseip(ip, s) != -1)
+		return findipifc(f, ip, ip, Runi);
 	return nil;
 }
 
-enum {
-	unknownv6,
-	multicastv6,
-	unspecifiedv6,
-	linklocalv6,
-	sitelocalv6,
-	globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
-	if(isv6global(addr))
-		return globalv6;
-	if(islinklocal(addr))
-		return linklocalv6;
-	if(isv6mcast(addr))
-		return multicastv6;
-	if(issitelocal(addr))
-		return sitelocalv6;
-	return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ *  find "best" (global > link local > unspecified)
+ *  local address; address must be current.
+ */
 static void
 findprimaryipv6(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
-	Iplifc *lifc;
+	ulong now = NOW/1000;
 	int atype, atypel;
+	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	ipmove(local, v6Unspecified);
 	atype = unspecifiedv6;
 
-	/* find "best" (global > sitelocal > link local > unspecified)
-	 * local address; address must be current */
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 			atypel = v6addrtype(lifc->local);
 			if(atypel > atype)
-			if(v6addrcurr(lifc)) {
+			if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
 				ipmove(local, lifc->local);
 				atype = atypel;
-				if(atype == globalv6)
+				if(atype == globalv6){
+					runlock(ifc);
 					return;
+				}
 			}
 		}
+		runlock(ifc);
 	}
 }
 
 /*
- *  returns first ip address configured
+ *  returns first v4 address configured
  */
 static void
 findprimaryipv4(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
 	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	/* find first ifc local address */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		if((lifc = ifc->lifc) != nil){
-			ipmove(local, lifc->local);
-			return;
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if((lifc->type & Rv4) != 0){
+				ipmove(local, lifc->local);
+				runlock(ifc);
+				return;
+			}
 		}
+		runlock(ifc);
 	}
+	ipmove(local, IPnoaddr);
 }
 
 /*
- *  find the local address 'closest' to the remote system, copy it to
- *  local and return the ifc for that address
+ * ipv4local, ipv6local:
+ *  return a local address associated with an interface close to remote.
+ *  prefixlen is the number of leading bits in the local address that
+ *  have to match an interface address to be considered. this is used
+ *  by source specific routes to filter on the source address.
+ *  return non-zero on success or zero when no address was found.
+ *
+ *  for ipv4local, all addresses are 4 byte format.
  */
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
-	Ipifc *ifc;
 	Iplifc *lifc;
-	Route *r;
-	uchar gate[IPaddrlen];
-	uchar gnet[IPaddrlen];
-	int version;
-	int atype = unspecifiedv6, atypel = unknownv6;
+	int a, b;
 
-	USED(atype);
-	USED(atypel);
-	qlock(f->ipifc);
-	r = v6lookup(f, remote, nil);
- 	version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-	
-	if(r != nil){
-		ifc = r->ifc;
-		if(r->type & Rv4)
-			v4tov6(gate, r->v4.gate);
-		else {
-			ipmove(gate, r->v6.gate);
-			ipmove(local, v6Unspecified);
-		}
+	b = -1;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+			continue;
 
-		/* find ifc address closest to the gateway to use */
-		switch(version) {
-		case V4:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0){
-					ipmove(local, lifc->local);
-					goto out;
-				}
-			}
-			break;
-		case V6:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				atypel = v6addrtype(lifc->local);
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0)
-				if(atypel > atype)
-				if(v6addrcurr(lifc)) {
-					ipmove(local, lifc->local);
-					atype = atypel;
-					if(atype == globalv6)
-						break;
-				}
-			}
-			if(atype > unspecifiedv6)
-				goto out;
-			break;
-		default:
-			panic("findlocalip: version %d", version);
+		if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+			continue;
+		
+		a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+		if(a > b){
+			b = a;
+			memmove(local, lifc->local+IPv4off, IPv4addrlen);
 		}
 	}
-
-	switch(version){
-	case V4:
-		findprimaryipv4(f, local);
-		break;
-	case V6:
-		findprimaryipv6(f, local);
-		break;
-	default:
-		panic("findlocalip2: version %d", version);
-	}
-
-out:
-	qunlock(f->ipifc);
+	return b >= 0;
 }
 
-/*
- *  return first v4 address associated with an interface
- */
 int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
+	struct {
+		int	atype;
+		int	deprecated;
+		int	comprefixlen;
+	} a, b;
+	int atype;
+	ulong now;
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(isv4(lifc->local)){
-			memmove(addr, lifc->local+IPv4off, IPv4addrlen);
-			return 1;
-		}
+	if(isv4(remote)){
+		memmove(local, v4prefix, IPv4off);
+		if((prefixlen -= IPv4off*8) < 0)
+			prefixlen = 0;
+		return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
 	}
-	return 0;
-}
 
-/*
- *  return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
-	Iplifc *lifc;
+	atype = v6addrtype(remote);
+	b.atype = unknownv6;
+	b.deprecated = 1;
+	b.comprefixlen = 0;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local) && !(lifc->tentative)){
-			ipmove(addr, lifc->local);
-			return 1;
+	now = NOW/1000;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if(lifc->tentative)
+			continue;
+
+		if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+			continue;
+
+		a.atype = v6addrtype(lifc->local);
+		a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+		a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+		/* prefer appropriate scope */
+		if(a.atype != b.atype){
+			if(a.atype > b.atype && b.atype < atype ||
+			   a.atype < b.atype && b.atype > atype)
+				goto Good;
+			continue;
 		}
+		/* prefer non-deprecated addresses */
+		if(a.deprecated != b.deprecated){
+			if(b.deprecated)
+				goto Good;
+			continue;
+		}
+		/* prefer longer common prefix */
+		if(a.comprefixlen != b.comprefixlen){
+			if(a.comprefixlen > b.comprefixlen)
+				goto Good;
+			continue;
+		}
+		continue;
+	Good:
+		b = a;
+		ipmove(local, lifc->local);
 	}
-	return 0;
+
+	return b.atype >= atype;
 }
 
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ *  find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
 {
-	Iplifc *lifc;
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local)){
-			ipmove(addr, lifc->local);
-			return SRC_UNI;
-		}
+	if(isv4(remote)) {
+		memmove(local, v4prefix, IPv4off);
+		if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+			findprimaryipv4(f, local);
+	} else {
+		if(v6source(f, remote, local) == nil)
+			findprimaryipv6(f, local);
 	}
-	return SRC_UNSPEC;
 }
 
 /*
@@ -1444,13 +1396,28 @@
 {
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
 		if(ipcmp(ip, lifc->local) == 0)
 			return lifc;
+
 	return nil;
 }
 
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+	uchar net[IPaddrlen];
+	Iplifc *lifc;
 
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		maskip(ip, lifc->mask, net);
+		if(ipcmp(net, lifc->remote) == 0)
+			return lifc;
+	}
+	return nil;
+}
+
+
 /*
  *  See if we're proxying for this address on this interface
  */
@@ -1458,24 +1425,13 @@
 ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
 {
 	Route *r;
-	uchar net[IPaddrlen];
-	Iplifc *lifc;
 
 	/* see if this is a direct connected pt to pt address */
-	r = v6lookup(f, ip, nil);
-	if(r == nil)
+	r = v6lookup(f, ip, ip, nil);
+	if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
 		return 0;
-	if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
-		return 0;
 
-	/* see if this is on the right interface */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		maskip(ip, lifc->mask, net);
-		if(ipcmp(net, lifc->remote) == 0)
-			return 1;
-	}
-
-	return 0;
+	return ipremoteonifc(ifc, ip) != nil;
 }
 
 /*
@@ -1487,73 +1443,53 @@
 	if(isv4(ip)){
 		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
 			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
 	}
+	else if(ip[0] == 0xff)
+		return V6;
 	return 0;
 }
 
-int
-ipisbm(uchar *ip)
-{
-	if(isv4(ip)){
-		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
-			return V4;
-		if(ipcmp(ip, IPv4bcast) == 0)
-			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
-	}
-	return 0;
-}
-
-
 /*
- *  add a multicast address to an interface, called with c locked
+ *  add a multicast address to an interface.
  */
 void
 ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
 {
-	Ipifc *ifc;
-	Iplifc *lifc;
-	Conv **p;
 	Ipmulti *multi, **l;
+	Iplifc *lifc;
+	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	if(isv4(ma) != isv4(ia))
+		error("incompatible multicast/interface ip address");
+
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			return;		/* it's already there */
 
-	multi = *l = smalloc(sizeof(*multi));
-	ipmove(multi->ma, ma);
-	ipmove(multi->ia, ia);
-	multi->next = nil;
-
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-		ifc = (Ipifc*)(*p)->ptcl;
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
 		if(waserror()){
-			wunlock(ifc);
+			runlock(ifc);
 			nexterror();
 		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
-				addselfcache(f, ifc, lifc, ma, Rmulti);
-		wunlock(ifc);
+		if((lifc = iplocalonifc(ifc, ia)) != nil)
+			addselfcache(f, ifc, lifc, ma, Rmulti);
+		runlock(ifc);
 		poperror();
 	}
+
+	multi = smalloc(sizeof(*multi));
+	ipmove(multi->ma, ma);
+	ipmove(multi->ia, ia);
+	multi->next = nil;
+	*l = multi;
 }
 
 
 /*
- *  remove a multicast address from an interface, called with c locked
+ *  remove a multicast address from an interface.
  */
 void
 ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
 {
 	Ipmulti *multi, **l;
 	Iplifc *lifc;
-	Conv **p;
 	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			break;
 
 	multi = *l;
@@ -1576,161 +1508,101 @@
 		return; 	/* we don't have it open */
 
 	*l = multi->next;
+	multi->next = nil;
 
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-
-		ifc = (Ipifc*)(*p)->ptcl;
-		if(waserror()){
-			wunlock(ifc);
-			nexterror();
-		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
+		if(!waserror()){
+			if((lifc = iplocalonifc(ifc, ia)) != nil)
 				remselfcache(f, ifc, lifc, ma);
-		wunlock(ifc);
-		poperror();
+			poperror();
+		}
+		runlock(ifc);
 	}
-
 	free(multi);
 }
 
-/*
- *  make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
 {
-	USED(ifc, argv, argc);
-	return nil;
+	if(waserror()){
+		print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+		return;
+	}
+	if(ifc->m != nil && ifc->m->areg != nil)
+		(*ifc->m->areg)(f, ifc, lifc, ip);
+	poperror();
 }
 
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
-	USED(ifc, argv, argc);
-	return nil;
-}
-
 static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
 {
-	Conv **cp, **e;
-	Ipifc *nifc;
+	uchar a[IPaddrlen];
 	Iplifc *lifc;
-	Medium *m;
-	uchar net[IPaddrlen];
+	Ipifc *nifc;
+	Conv **cp;
 
-	/* register the address on any network that will proxy for us */
-	e = &f->ipifc->conv[f->ipifc->nc];
+	/* register the address on any interface that will proxy for the ip */
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
+		nifc = (Ipifc*)(*cp)->ptcl;
+		if(nifc == ifc || !canrlock(nifc))
+			continue;
 
-	if(!isv4(ip)) { // V6
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->addmulti == nil) {
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
-					ipv62smcast(net, ip);
-					addselfcache(f, nifc, lifc, net, Rmulti);
-					arpenter(f, V6, ip, nifc->mac, 6, 0);
-					//(*m->addmulti)(nifc, net, ip);
-					break;
-				}
-			}
+		if(nifc->m == nil
+		|| (lifc = ipremoteonifc(nifc, ip)) == nil
+		|| (lifc->type & Rptpt) != 0
+		|| waserror()){
 			runlock(nifc);
+			continue;
 		}
-		return;
-	}
-	else { // V4
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->areg == nil){
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0){
-					(*m->areg)(nifc, ip);
-					break;
-				}
-			}
-			runlock(nifc);
+		if((lifc->type & Rv4) == 0){
+			/* add solicited-node multicast addr */
+			ipv62smcast(a, ip);
+			if(add)
+				addselfcache(f, nifc, lifc, a, Rmulti);
+			else
+				remselfcache(f, nifc, lifc, a);
 		}
+		if(add)
+			ipifcregisteraddr(f, nifc, lifc, ip);
+		runlock(nifc);
+		poperror();
 	}
 }
 
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
-	Route *r;
-
-	r = v6lookup(f, v6Unspecified, nil);
-	if(r!=nil)
-	if(!(force) && (strcmp(r->tag,"ra")!=0))	// route entries generated
-		return;			// by all other means take
-					// precedence over router annc
-
-	v6delroute(f, v6Unspecified, v6Unspecified, 1);
-	v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
-	Ngates = 3,
-};
-
 char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
 {
-	uchar	onlink = 1;
-	uchar	autoflag = 1;
-	long 	validlt = 0xffffffff;
-	long 	preflt = 0xffffffff;
-	long	origint = NOW / 10^3;
-	uchar	prefix[IPaddrlen];
-	int	plen = 64;
-	Iplifc	*lifc;
-	char	addr[40], preflen[6];
-	char	*params[3];
+	int plen = 64;
+	char addr[40], preflen[6];
+	char *params[3];
+	uchar prefix[IPaddrlen];
+	Iplifc lifc;
+	Medium *m;
 
+	lifc.onlink = 1;
+	lifc.autoflag = 1;
+	lifc.validlt = lifc.preflt = ~0UL;
+	lifc.origint = NOW / 1000;
+
 	switch(argc) {
 	case 7:
-		preflt = atoi(argv[6]);
+		lifc.preflt = strtoul(argv[6], 0, 10);
 		/* fall through */
 	case 6:
-		validlt = atoi(argv[5]);
+		lifc.validlt = strtoul(argv[5], 0, 10);
 		/* fall through */
 	case 5:
-		autoflag =  atoi(argv[4]);
+		lifc.autoflag = atoi(argv[4]) != 0;
 		/* fall through */
 	case 4:
-		onlink = atoi(argv[3]);
+		lifc.onlink = atoi(argv[3]) != 0;
 		/* fall through */
 	case 3:
 		plen = atoi(argv[2]);
+		/* fall through */
 	case 2:
 		break;
 	default:
@@ -1737,25 +1609,16 @@
 		return Ebadarg;
 	}
 
-	if((parseip(prefix, argv[1])!=6) ||
-	 	(validlt < preflt) ||
-		(plen < 0) || (plen > 64) ||
-		(islinklocal(prefix))
-	)
+	if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+	    plen > 64 || islinklocal(prefix))
 		return Ebadarg;
 
-	lifc = smalloc(sizeof(Iplifc));
-	lifc->onlink = (onlink!=0);
-	lifc->autoflag = (autoflag!=0);
-	lifc->validlt = validlt;
-	lifc->preflt = preflt;
-	lifc->origint = origint;
+	/* issue "add" ctl msg for v6 link-local addr and prefix len */
+	m = ifc->m;
+	if(m == nil || m->pref2addr == nil)
+		return Eunbound;
+	(*m->pref2addr)(prefix, ifc->mac);	/* mac → v6 link-local addr */
 
-	if(ifc->m->pref2addr!=nil)
-		ifc->m->pref2addr(prefix, ifc->mac);
-	else
-		return Ebadarg;
-	
 	sprint(addr, "%I", prefix);
 	sprint(preflen, "/%d", plen);
 	params[0] = "add";
@@ -1762,6 +1625,28 @@
 	params[1] = addr;
 	params[2] = preflen;
 
-	return ipifcadd(ifc, params, 3, 0, lifc);
+	return ipifcadd(ifc, params, 3, 0, &lifc);
 }
 
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+	Iplifc *lifc, **l;
+	ulong now;
+
+	if(argc != 1)
+		return Ebadarg;
+
+	wlock(ifc);
+	now = NOW/1000;
+	for(l = &ifc->lifc; (lifc = *l) != nil;) {
+		if((lifc->type & Rv4) == 0)
+		if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+			if(ipifcremlifc(ifc, l) == nil)
+				continue;
+		l = &lifc->next;
+	}
+	wunlock(ifc);
+
+	return nil;
+}
--- a/os/ip/ipmux.c
+++ b/os/ip/ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -6,30 +9,14 @@
 #include "../port/error.h"
 
 #include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
 
 typedef struct Ipmuxrock  Ipmuxrock;
 typedef struct Ipmux      Ipmux;
-typedef struct Ip6hdr     Ip6hdr;
 
 enum
 {
-	IPHDR		= 20,		/* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
-	uchar vcf[4];		/* version, class label, and flow label */ 
-	uchar ploadlen[2];	/* payload length */
-	uchar proto;		/* next header, i.e. proto */
-	uchar ttl;		/* hop limit, i.e. ttl */
-	uchar src[16];		/* IP source */
-	uchar dst[16];		/* IP destination */
-};
-
-
-enum
-{
+	Tver,
 	Tproto,
 	Tdata,
 	Tiph,
@@ -36,28 +23,8 @@
 	Tdst,
 	Tsrc,
 	Tifc,
-
-	Cother = 0,
-	Cbyte,		/* single byte */
-	Cmbyte,		/* single byte with mask */
-	Cshort,		/* single short */
-	Cmshort,	/* single short with mask */
-	Clong,		/* single long */
-	Cmlong,		/* single long with mask */
-	Cifc,
-	Cmifc,
 };
 
-char *ftname[] = 
-{
-[Tproto]	"proto",
-[Tdata]		"data",
-[Tiph]	 	"iph",
-[Tdst]		"dst",
-[Tsrc]		"src",
-[Tifc]		"ifc",
-};
-
 /*
  *  a node in the decision tree
  */
@@ -66,16 +33,12 @@
 	Ipmux	*yes;
 	Ipmux	*no;
 	uchar	type;		/* type of field(Txxxx) */
-	uchar	ctype;		/* tupe of comparison(Cxxxx) */
 	uchar	len;		/* length in bytes of item to compare */
 	uchar	n;		/* number of items val points to */
-	short	off;		/* offset of comparison */
-	short	eoff;		/* end offset of comparison */
-	uchar	skiphdr;	/* should offset start after ipheader */
+	int	off;		/* offset of comparison */
 	uchar	*val;
 	uchar	*mask;
 	uchar	*e;		/* val+n*len*/
-
 	int	ref;		/* so we can garbage collect */
 	Conv	*conv;
 };
@@ -90,6 +53,7 @@
 
 static int	ipmuxsprint(Ipmux*, int, char*, int);
 static void	ipmuxkick(void *x);
+static void	ipmuxfree(Ipmux *f);
 
 static char*
 skipwhite(char *p)
@@ -122,27 +86,33 @@
 	Ipmux *f;
 
 	p = skipwhite(p);
-	if(strncmp(p, "dst", 3) == 0){
+	if(strncmp(p, "ver", 3) == 0){
+		type = Tver;
+		off = 0;
+		len = 1;
+		p += 3;
+	}
+	else if(strncmp(p, "dst", 3) == 0){
 		type = Tdst;
-		off = offsetof(Ip4hdr, dst[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, dst[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "src", 3) == 0){
 		type = Tsrc;
-		off = offsetof(Ip4hdr, src[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, src[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "ifc", 3) == 0){
 		type = Tifc;
-		off = -IPv4addrlen;
-		len = IPv4addrlen;
+		off = -IPaddrlen;
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "proto", 5) == 0){
 		type = Tproto;
-		off = offsetof(Ip4hdr, proto);
+		off = offsetof(Ip6hdr, proto);
 		len = 1;
 		p += 5;
 	}
@@ -160,7 +130,7 @@
 			return nil;
 		p++;
 		off = strtoul(p, &p, 0);
-		if(off < 0 || off > (64-IPHDR))
+		if(off < 0)
 			return nil;
 		p = skipwhite(p);
 		if(*p != ':')
@@ -189,11 +159,6 @@
 	f->mask = nil;
 	f->n = 1;
 	f->ref = 1;
-	if(type == Tdata)
-		f->skiphdr = 1;
-	else
-		f->skiphdr = 0;
-
 	return f;	
 }
 
@@ -229,7 +194,7 @@
 static Ipmux*
 parsemux(char *p)
 {
-	int n, nomask;
+	int n;
 	Ipmux *f;
 	char *val;
 	char *mask;
@@ -247,7 +212,7 @@
 		goto parseerror;
 
 	/* parse mask */
-	mask = follows(val, '&');
+	mask = follows(p, '&');
 	if(mask != nil){
 		switch(f->type){
 		case Tsrc:
@@ -254,7 +219,7 @@
 		case Tdst:
 		case Tifc:
 			f->mask = smalloc(f->len);
-			v4parseip(f->mask, mask);
+			parseipmask(f->mask, mask, 0);
 			break;
 		case Tdata:
 		case Tiph:
@@ -264,15 +229,13 @@
 		default:
 			goto parseerror;
 		}
-		nomask = 0;
-	} else {
-		nomask = 1;
+	} else if(f->type == Tver){
 		f->mask = smalloc(f->len);
-		memset(f->mask, 0xff, f->len);
+		f->mask[0] = 0xF0;
 	}
 
 	/* parse vals */
-	f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+	f->n = getfields(val, vals, nelem(vals), 1, "|");
 	if(f->n == 0)
 		goto parseerror;
 	f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
 	v = f->val;
 	for(n = 0; n < f->n; n++){
 		switch(f->type){
+		case Tver:
+			if(f->n != 1)
+				goto parseerror;
+			if(strcmp(vals[n], "6") == 0)
+				*v = IP_VER6;
+			else if(strcmp(vals[n], "4") == 0)
+				*v = IP_VER4;
+			else
+				goto parseerror;
+			break;
 		case Tsrc:
 		case Tdst:
 		case Tifc:
-			v4parseip(v, vals[n]);
+			if(parseip(v, vals[n]) == -1)
+				goto parseerror;
 			break;
 		case Tproto:
 		case Tdata:
@@ -292,34 +266,11 @@
 		}
 		v += f->len;
 	}
-
-	f->eoff = f->off + f->len;
 	f->e = f->val + f->n*f->len;
-	f->ctype = Cother;
-	if(f->n == 1){
-		switch(f->len){
-		case 1:
-			f->ctype = nomask ? Cbyte : Cmbyte;
-			break;
-		case 2:
-			f->ctype = nomask ? Cshort : Cmshort;
-			break;
-		case 4:
-			if(f->type == Tifc)
-				f->ctype = nomask ? Cifc : Cmifc;
-			else
-				f->ctype = nomask ? Clong : Cmlong;
-			break;
-		}
-	}
 	return f;
 
 parseerror:
-	if(f->mask)
-		free(f->mask);
-	if(f->val)
-		free(f->val);
-	free(f);
+	ipmuxfree(f);
 	return nil;
 }
 
@@ -342,8 +293,7 @@
 		return n;
 
 	/* compare offsets, call earlier ones more specific */
-	n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) - 
-		(b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+	n = a->off - b->off;
 	if(n != 0)
 		return n;
 
@@ -413,6 +363,10 @@
 	*nf = *f;
 	nf->no = ipmuxcopy(f->no);
 	nf->yes = ipmuxcopy(f->yes);
+	if(f->mask != nil){
+		nf->mask = smalloc(f->len);
+		memmove(nf->mask, f->mask, f->len);
+	}
 	nf->val = smalloc(f->n*f->len);
 	nf->e = nf->val + f->len*f->n;
 	memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
 static void
 ipmuxfree(Ipmux *f)
 {
-	if(f->val != nil)
-		free(f->val);
+	if(f == nil)
+		return;
+	free(f->val);
+	free(f->mask);
 	free(f);
 }
 
@@ -432,10 +388,8 @@
 {
 	if(f == nil)
 		return;
-	if(f->no != nil)
-		ipmuxfree(f->no);
-	if(f->yes != nil)
-		ipmuxfree(f->yes);
+	ipmuxfree(f->no);
+	ipmuxfree(f->yes);
 	ipmuxfree(f);
 }
 
@@ -510,6 +464,8 @@
 		return ipmuxremove(&ft->no, f);
 	}
 
+	ipmuxremove(&ft->no, f->no);
+
 	/* we found a match */
 	if(--(ft->ref) == 0){
 		/*
@@ -531,8 +487,55 @@
 }
 
 /*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+	int i, n;
+
+	if(f == nil)
+		return nil;
+
+	switch(f->type){
+	case Tproto:
+		f->off = offsetof(Ip4hdr, proto);
+		break;
+	case Tdst:
+		f->off = offsetof(Ip4hdr, dst[0]);
+		if(0){
+	case Tsrc:
+		f->off = offsetof(Ip4hdr, src[0]);
+		}
+		if(f->len != IPaddrlen)
+			break;
+		n = 0;
+		for(i = 0; i < f->n; i++){
+			if(isv4(f->val + i*IPaddrlen)){
+				memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+				n++;
+			}
+		}
+		if(n == 0){
+			ipmuxtreefree(f);
+			return nil;
+		}
+		f->n = n;
+		f->len = IPv4addrlen;
+		if(f->mask != nil)
+			memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+	}
+	f->e = f->val + f->n*f->len;
+
+	f->yes = ipmuxconv4(f->yes);
+	f->no = ipmuxconv4(f->no);
+
+	return f;
+}
+
+/*
  *  connection request is a semi separated list of filters
- *  e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *  e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
  *
  *  there's no protection against overlapping specs.
  */
@@ -568,6 +571,18 @@
 		return Ebadarg;
 	mux->conv = c;
 
+	if(chain->type != Tver) {
+		char ver6[] = "ver=6";
+		mux = parsemux(ver6);
+		mux->yes = chain;
+		mux->no = ipmuxcopy(chain);
+		chain = mux;
+	}
+	if(*chain->val == IP_VER4)
+		chain->yes = ipmuxconv4(chain->yes);
+	else
+		chain->no = ipmuxconv4(chain->no);
+
 	/* save a copy of the chain so we can later remove it */
 	mux = ipmuxcopy(chain);
 	r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
 	Block *bp;
 
 	bp = qget(c->wq);
-	if(bp == nil)
-		return;
-	else {
+	if(bp != nil) {
 		Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
-		if((ih4->vihl)&0xF0 != 0x60)
+
+		if((ih4->vihl & 0xF0) != IP_VER6)
 			ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
-		else {
-			Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
-			ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
-		}
+		else
+			ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
 	}
 }
 
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+	int i;
+
+	if(m == nil)
+		return memcmp(v, c, n) != 0;
+
+	for(i = 0; i < n; i++)
+		if((v[i] & m[i]) != c[i])
+			return 1;
+	return 0;
+}
+
 static void
 ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
 {
-	int len, hl;
 	Fs *f = p->f;
-	uchar *m, *h, *v, *e, *ve, *hp;
 	Conv *c;
+	Iplifc *lifc;
 	Ipmux *mux;
-	Ip4hdr *ip;
+	uchar *v;
+	Ip4hdr *ip4;
 	Ip6hdr *ip6;
+	int off, hl;
 
-	ip = (Ip4hdr*)bp->rp;
-	hl = (ip->vihl&0x0F)<<2;
+	ip4 = (Ip4hdr*)bp->rp;
+	if((ip4->vihl & 0xF0) == IP_VER4) {
+		hl = (ip4->vihl&0x0F)<<2;
+		ip6 = nil;
+	} else {
+		hl = IP6HDR;
+		ip6 = (Ip6hdr*)ip4;
+	}
 
 	if(p->priv == nil)
 		goto nomatch;
 
-	h = bp->rp;
-	len = BLEN(bp);
+	c = nil;
+	lifc = nil;
 
-	/* run the v4 filter */
+	/* run the filter */
 	rlock(f);
-	c = nil;
 	mux = f->ipmux->priv;
 	while(mux != nil){
-		if(mux->eoff > len){
-			mux = mux->no;
-			continue;
-		}
-		hp = h + mux->off + ((int)mux->skiphdr)*hl;
-		switch(mux->ctype){
-		case Cbyte:
-			if(*mux->val == *hp)
-				goto yes;
+		switch(mux->type){
+		case Tifc:
+			if(mux->len != IPaddrlen)
+				goto no;
+			for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+				for(v = mux->val; v < mux->e; v += IPaddrlen)
+					if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+						goto yes;
+			goto no;
+		case Tdata:
+			off = hl;
 			break;
-		case Cmbyte:
-			if((*hp & *mux->mask) == *mux->val)
-				goto yes;
-			break;
-		case Cshort:
-			if(*((ushort*)mux->val) == *(ushort*)hp)
-				goto yes;
-			break;
-		case Cmshort:
-			if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
-				goto yes;
-			break;
-		case Clong:
-			if(*((ulong*)mux->val) == *(ulong*)hp)
-				goto yes;
-			break;
-		case Cmlong:
-			if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
-		case Cifc:
-			if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
-				goto yes;
-			break;
-		case Cmifc:
-			if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
 		default:
-			v = mux->val;
-			for(e = mux->e; v < e; v = ve){
-				m = mux->mask;
-				hp = h + mux->off;
-				for(ve = v + mux->len; v < ve; v++){
-					if((*hp++ & *m++) != *v)
-						break;
-				}
-				if(v == ve)
-					goto yes;
-			}
+			off = 0;
+			break;
 		}
+		off += mux->off;
+		if(off < 0 || off + mux->len > BLEN(bp))
+			goto no;
+		for(v = mux->val; v < mux->e; v += mux->len)
+			if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+				goto yes;
+no:
 		mux = mux->no;
 		continue;
 yes:
@@ -743,28 +747,24 @@
 	if(c != nil){
 		/* tack on interface address */
 		bp = padblock(bp, IPaddrlen);
-		ipmove(bp->rp, ifc->lifc->local);
-		bp = concatblock(bp);
-		if(bp != nil)
-			if(qpass(c->rq, bp) < 0)
-				print("Q");
+		if(lifc == nil)
+			lifc = ifc->lifc;
+		ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+		qpass(c->rq, concatblock(bp));
 		return;
 	}
 
 nomatch:
 	/* doesn't match any filter, hand it to the specific protocol handler */
-	ip = (Ip4hdr*)bp->rp;
-	if((ip->vihl&0xF0)==0x40) {
-		p = f->t2p[ip->proto];
-	} else {
-		ip6 = (Ip6hdr*)bp->rp;
+	if(ip6 != nil)
 		p = f->t2p[ip6->proto];
-	}
-	if(p && p->rcv)
-		(*p->rcv)(p, ifc, bp);
 	else
-		freeblist(bp);
-	return;
+		p = f->t2p[ip4->proto];
+	if(p != nil && p->rcv != nil){
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+	freeblist(bp);
 }
 
 static int
@@ -780,11 +780,14 @@
 		n += snprint(buf+n, len-n, "\n");
 		return n;
 	}
-	n += snprint(buf+n, len-n, "h[%d:%d]&", 
-               mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])), 
-               mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
-	for(i = 0; i < mux->len; i++)
-		n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	n += snprint(buf+n, len-n, "%s[%d:%d]", 
+		mux->type == Tdata ? "data": "iph",
+		mux->off, mux->off+mux->len-1);
+	if(mux->mask != nil){
+		n += snprint(buf+n, len-n, "&");
+		for(i = 0; i < mux->len; i++)
+			n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	}
 	n += snprint(buf+n, len-n, "=");
 	v = mux->val;
 	for(j = 0; j < mux->n; j++){
--- a/os/ip/iproute.c
+++ b/os/ip/iproute.c
@@ -12,10 +12,10 @@
 static void	calcd(Route*);
 
 /* these are used for all instances of IP */
-Route*	v4freelist;
-Route*	v6freelist;
-RWlock	routelock;
-ulong	v4routegeneration, v6routegeneration;
+static Route*	v4freelist;
+static Route*	v6freelist;
+static RWlock	routelock;
+static ulong	v4routegeneration, v6routegeneration;
 
 static void
 freeroute(Route *r)
@@ -22,6 +22,7 @@
 {
 	Route **l;
 
+	r->ref = 0;
 	r->left = nil;
 	r->right = nil;
 	if(r->type & Rv4)
@@ -35,9 +36,8 @@
 static Route*
 allocroute(int type)
 {
-	Route *r;
+	Route *r, **l;
 	int n;
-	Route **l;
 
 	if(type & Rv4){
 		n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
 		return;
 
 	l = allocroute(r->type);
+	l->left = r;
 	l->mid = *q;
 	*q = l;
-	l->left = r;
 }
 
 /*
@@ -99,11 +99,11 @@
  */
 enum
 {
-	Rpreceeds,
-	Rfollows,
-	Requals,
-	Rcontains,
-	Rcontained,
+	Rpreceeds,	/* a left of b */
+	Rfollows,	/* a right of b */
+	Requals,	/* a equals b */
+	Rcontains,	/* a contians b */
+	Roverlaps,	/* a overlaps b */
 };
 
 static int
@@ -112,44 +112,88 @@
 	if(a->type & Rv4){
 		if(a->v4.endaddress < b->v4.address)
 			return Rpreceeds;
-
 		if(a->v4.address > b->v4.endaddress)
 			return Rfollows;
-
 		if(a->v4.address <= b->v4.address
 		&& a->v4.endaddress >= b->v4.endaddress){
 			if(a->v4.address == b->v4.address
-			&& a->v4.endaddress == b->v4.endaddress)
-				return Requals;
+			&& a->v4.endaddress == b->v4.endaddress){
+				if(a->v4.source <= b->v4.source
+				&& a->v4.endsource >= b->v4.endsource){
+					if(a->v4.source == b->v4.source
+					&& a->v4.endsource == b->v4.endsource)
+						return Requals;
+					return Rcontains;
+				}
+				return Roverlaps;
+			}
 			return Rcontains;
 		}
-		return Rcontained;
+		return Roverlaps;
 	}
 
 	if(lcmp(a->v6.endaddress, b->v6.address) < 0)
 		return Rpreceeds;
-
 	if(lcmp(a->v6.address, b->v6.endaddress) > 0)
 		return Rfollows;
-
 	if(lcmp(a->v6.address, b->v6.address) <= 0
 	&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
 		if(lcmp(a->v6.address, b->v6.address) == 0
-		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
-				return Requals;
+		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+			if(lcmp(a->v6.source, b->v6.source) <= 0
+			&& lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+				if(lcmp(a->v6.source, b->v6.source) == 0
+				&& lcmp(a->v6.endsource, b->v6.endsource) == 0)
+					return Requals;
+				return Rcontains;
+			}
+			return Roverlaps;
+		}
 		return Rcontains;
 	}
+	return Roverlaps;
+}
 
-	return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+	if(a == b)
+		return 1;
+
+	if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+		return 0;
+
+	if(a->type & Rv4){
+		if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+		&& memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+			return 0;
+	} else {
+		if(ipcmp(a->v6.gate, IPnoaddr) != 0
+		&& ipcmp(a->v6.gate, b->v6.gate) != 0)
+			return 0;
+	}
+
+	if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+		return 0;
+
+	if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+		return 0;
+
+	return 1;
 }
 
 static void
 copygate(Route *old, Route *new)
 {
+	old->type = new->type;
+	old->ifc = new->ifc;
+	old->ifcid = new->ifcid;
 	if(new->type & Rv4)
 		memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
 	else
-		memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+		ipmove(old->v6.gate, new->v6.gate);
+	strncpy(old->tag, new->tag, sizeof(new->tag));
 }
 
 /*
@@ -162,12 +206,12 @@
 
 	l = p->left;
 	r = p->right;
-	p->left = 0;
-	p->right = 0;
+	p->left = nil;
+	p->right = nil;
 	addnode(f, root, p);
-	if(l)
+	if(l != nil)
 		walkadd(f, root, l);
-	if(r)
+	if(r != nil)
 		walkadd(f, root, r);
 }
 
@@ -180,16 +224,16 @@
 	Route *q;
 	int d;
 
-	if(p) {
+	if(p != nil) {
 		d = 0;
 		q = p->left;
-		if(q)
+		if(q != nil)
 			d = q->depth;
 		q = p->right;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		q = p->mid;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		p->depth = d+1;
 	}
@@ -210,8 +254,8 @@
 	 * rotate tree node
 	 */
 	p = *cur;
-	dl = 0; if(l = p->left) dl = l->depth;
-	dr = 0; if(r = p->right) dr = r->depth;
+	dl = 0; if((l = p->left) != nil) dl = l->depth;
+	dr = 0; if((r = p->right) != nil) dr = r->depth;
 
 	if(dl > dr+1) {
 		p->left = l->right;
@@ -239,7 +283,7 @@
 	Route *p;
 
 	p = *cur;
-	if(p == 0) {
+	if(p == nil) {
 		*cur = new;
 		new->depth = 1;
 		return;
@@ -269,15 +313,13 @@
 		 *  supercede the old entry if the old one isn't
 		 *  a local interface.
 		 */
-		if((p->type & Rifc) == 0){
-			p->type = new->type;
-			p->ifcid = -1;
+		if((p->type & Rifc) == 0)
 			copygate(p, new);
-		} else if(new->type & Rifc)
+		else if(new->type & Rifc)
 			p->ref++;
 		freeroute(new);
 		break;
-	case Rcontained:
+	case Roverlaps:
 		addnode(f, &p->mid, new);
 		break;
 	}
@@ -285,241 +327,316 @@
 	balancetree(cur);
 }
 
-#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ *  find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
 {
 	Route *p;
-	ulong sa;
-	ulong m;
-	ulong ea;
-	int h, eh;
 
-	m = nhgetl(mask);
-	sa = nhgetl(a) & m;
-	ea = sa | ~m;
+	for(;;){
+		p = *cur;
+		if(p == nil)
+			return nil;
+		switch(rangecompare(r, p)){
+		case Rcontains:
+			return nil;
+		case Rpreceeds:
+			cur = &p->left;
+			break;
+		case Rfollows:
+			cur = &p->right;
+			break;
+		case Roverlaps:
+			cur = &p->mid;
+			break;
+		case Requals:
+			if((p->type & Rifc) == 0 && !matchroute(r, p))
+				return nil;
+			return cur;
+		}
+	}
+}
 
-	eh = V4H(ea);
-	for(h=V4H(sa); h<=eh; h++) {
-		p = allocroute(Rv4 | type);
-		p->v4.address = sa;
-		p->v4.endaddress = ea;
-		memmove(p->v4.gate, gate, sizeof(p->v4.gate));
-		memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+	Route *x;
 
-		wlock(&routelock);
-		addnode(f, &f->v4root[h], p);
-		while(p = f->queue) {
-			f->queue = p->mid;
-			walkadd(f, &f->v4root[h], p->left);
-			freeroute(p);
-		}
-		wunlock(&routelock);
+	if(r == nil)
+		return nil;
+
+	if((x = looknodetag(r->mid, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->left, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->right, tag)) != nil)
+		return x;
+
+	if((r->type & Rifc) == 0){
+		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+			return r;
 	}
-	v4routegeneration++;
 
-	ipifcaddroute(f, Rv4, a, mask, gate, type);
+	return nil;
 }
 
-#define	V6H(a)	(((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
+#define	V6H(a)	(((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
 
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
 {
-	Route *p;
-	ulong sa[IPllen], ea[IPllen];
-	ulong x, y;
-	int h, eh;
+	Route **h, **e, *p;
 
-	/*
-	if(ISDFLT(a, mask, tag))
-		f->v6p->cdrouter = -1;
-	*/
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
 
+	for(; h <= e; h++) {
+		p = allocroute(r->type);
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		sa[h] = x & y;
-		ea[h] = x | ~y;
-	}
+		p->ifc = r->ifc;
+		p->ifcid = r->ifcid;
 
-	eh = V6H(ea);
-	for(h = V6H(sa); h <= eh; h++) {
-		p = allocroute(type);
-		memmove(p->v6.address, sa, IPaddrlen);
-		memmove(p->v6.endaddress, ea, IPaddrlen);
-		memmove(p->v6.gate, gate, IPaddrlen);
-		memmove(p->tag, tag, sizeof(p->tag));
+		if(r->type & Rv4)
+			memmove(&p->v4, &r->v4, sizeof(r->v4));
+		else
+			memmove(&p->v6, &r->v6, sizeof(r->v6));
 
-		wlock(&routelock);
-		addnode(f, &f->v6root[h], p);
-		while(p = f->queue) {
+		memmove(p->tag, r->tag, sizeof(r->tag));
+
+		addnode(f, h, p);
+		while((p = f->queue) != nil) {
 			f->queue = p->mid;
-			walkadd(f, &f->v6root[h], p->left);
+			walkadd(f, h, p->left);
 			freeroute(p);
 		}
-		wunlock(&routelock);
 	}
-	v6routegeneration++;
 
-	ipifcaddroute(f, 0, a, mask, gate, type);
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
 {
-	Route *p;
+	Route **h, **e, **l, *p;
 
-	for(;;){
-		p = *cur;
-		if(p == 0)
-			return 0;
-	
-		switch(rangecompare(r, p)){
-		case Rcontains:
-			return 0;
-		case Rpreceeds:
-			cur = &p->left;
-			break;
-		case Rfollows:
-			cur = &p->right;
-			break;
-		case Rcontained:
-			cur = &p->mid;
-			break;
-		case Requals:
-			return cur;
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
+
+	for(; h <= e; h++) {
+		if((l = looknode(h, r)) == nil)
+			continue;
+		p = *l;
+		if(--(p->ref) != 0)
+			continue;
+		*l = nil;
+		addqueue(&f->queue, p->left);
+		addqueue(&f->queue, p->mid);
+		addqueue(&f->queue, p->right);
+		freeroute(p);
+
+		while((p = f->queue) != nil) {
+			f->queue = p->mid;
+			walkadd(f, h, p->left);
+			freeroute(p);
 		}
 	}
+
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong m;
+	ulong x, y;
+	Route r;
+	int h;
 
-	m = nhgetl(mask);
-	rt.v4.address = nhgetl(a) & m;
-	rt.v4.endaddress = rt.v4.address | ~m;
-	rt.type = Rv4;
+	memset(&r, 0, sizeof(r));
 
-	eh = V4H(rt.v4.endaddress);
-	for(h=V4H(rt.v4.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v4root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v4root[h], p->left);
-					freeroute(p);
-				}
-			}
+	r.type = type;
+
+	if(type & Rv4){
+		x = nhgetl(a+IPv4off);
+		y = nhgetl(mask+IPv4off);
+		r.v4.address = x & y;
+		r.v4.endaddress = x | ~y;
+
+		x = nhgetl(s+IPv4off);
+		y = nhgetl(smask+IPv4off);
+		if(y != 0)
+			r.type |= Rsrc;
+		r.v4.source = x & y;
+		r.v4.endsource = x | ~y;
+
+		memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+	} else {
+		for(h = 0; h < IPllen; h++){
+			x = nhgetl(a+4*h);
+			y = nhgetl(mask+4*h);
+			r.v6.address[h] = x & y;
+			r.v6.endaddress[h] = x | ~y;
+
+			x = nhgetl(s+4*h);
+			y = nhgetl(smask+4*h);
+			if(y != 0)
+				r.type |= Rsrc;
+			r.v6.source[h] = x & y;
+			r.v6.endsource[h] = x | ~y;
 		}
-		if(dolock)
-			wunlock(&routelock);
+
+		memmove(r.v6.gate, gate, IPaddrlen);
 	}
-	v4routegeneration++;
 
-	ipifcremroute(f, Rv4, a, mask);
+	if(ifc != nil){
+		r.ifc = ifc;
+		r.ifcid = ifc->ifcid;
+	}
+
+	if(tag != nil)
+		strncpy(r.tag, tag, sizeof(r.tag));
+
+	return r;
 }
 
 void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong x, y;
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routeadd(f, &r);
+	wunlock(&routelock);
+}
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		rt.v6.address[h] = x & y;
-		rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routerem(f, &r);
+	wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+	uchar local[IPaddrlen], gate[IPaddrlen];
+	Ipifc *ifc;
+	int i;
+
+	ifc = r->ifc;
+	if(ifc != nil && ifc->ifcid == r->ifcid)
+		return ifc;
+
+	if(r->type & Rsrc) {
+		if(r->type & Rv4) {
+			hnputl(local+IPv4off, r->v4.source);
+			memmove(local, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(local+4*i, r->v6.source[i]);
+		}
+	} else {
+		ipmove(local, IPnoaddr);
 	}
-	rt.type = 0;
 
-	eh = V6H(rt.v6.endaddress);
-	for(h=V6H(rt.v6.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v6root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v6root[h], p->left);
-					freeroute(p);
-				}
-			}
+	if(r->type & Rifc) {
+		if(r->type & Rv4) {
+			hnputl(gate+IPv4off, r->v4.address);
+			memmove(gate, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(gate+4*i, r->v6.address[i]);
 		}
-		if(dolock)
-			wunlock(&routelock);
+	} else {
+		if(r->type & Rv4)
+			v4tov6(gate, r->v4.gate);
+		else
+			ipmove(gate, r->v6.gate);
 	}
-	v6routegeneration++;
 
-	ipifcremroute(f, 0, a, mask);
+	if((ifc = findipifc(f, local, gate, r->type)) == nil)
+		return nil;
+
+	r->ifc = ifc;
+	r->ifcid = ifc->ifcid;
+	return ifc;
 }
 
+/*
+ * v4lookup, v6lookup:
+ *  lookup a route to destination address a from source address s
+ *  and return the route. returns nil if no route was found.
+ *  an optional Routehint can be passed in rh to cache the lookup.
+ *
+ *  for v4lookup, addresses are in 4 byte format.
+ */
 Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
+	ulong la, ls;
 	Route *p, *q;
-	ulong la;
-	uchar gate[IPaddrlen];
 	Ipifc *ifc;
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v4routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
 	la = nhgetl(a);
+	ls = nhgetl(s);
 	q = nil;
-	for(p=f->v4root[V4H(la)]; p;)
-		if(la >= p->v4.address) {
-			if(la <= p->v4.endaddress) {
-				q = p;
-				p = p->mid;
-			} else
-				p = p->right;
-		} else
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
 			p = p->left;
-
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			hnputl(gate+IPv4off, q->v4.address);
-			memmove(gate, v4prefix, IPv4off);
-		} else
-			v4tov6(gate, q->v4.gate);
-		ifc = findipifc(f, gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		if(p->type & Rsrc){
+			if(ls < p->v4.source){
+				p = p->mid;
+				continue;
+			}
+			if(ls > p->v4.endsource){
+				p = p->mid;
+				continue;
+			}
+		}
+		q = p;
+		p = p->mid;
 	}
 
-	if(c != nil){
-		c->r = q;
-		c->rgen = v4routegeneration;
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v4routegeneration;
 	}
 
 	return q;
@@ -526,29 +643,35 @@
 }
 
 Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
-	Route *p, *q;
-	ulong la[IPllen];
-	int h;
+	ulong la[IPllen], ls[IPllen];
 	ulong x, y;
-	uchar gate[IPaddrlen];
+	Route *p, *q;
 	Ipifc *ifc;
+	int h;
 
-	if(memcmp(a, v4prefix, IPv4off) == 0){
-		q = v4lookup(f, a+IPv4off, c);
-		if(q != nil)
-			return q;
+	if(isv4(s)){
+		if(isv4(a))
+			return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+		return nil;
 	}
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v6routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
-	for(h = 0; h < IPllen; h++)
+	for(h = 0; h < IPllen; h++){
 		la[h] = nhgetl(a+4*h);
+		ls[h] = nhgetl(s+4*h);
+	}
 
-	q = 0;
-	for(p=f->v6root[V6H(la)]; p;){
+	q = nil;
+	for(p = f->v6root[V6H(la)]; p != nil;){
 		for(h = 0; h < IPllen; h++){
 			x = la[h];
 			y = p->v6.address[h];
@@ -571,42 +694,202 @@
 			}
 			break;
 		}
+		if(p->type & Rsrc){
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.source[h];
+				if(x == y)
+					continue;
+				if(x < y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.endsource[h];
+				if(x == y)
+					continue;
+				if(x > y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+		}
 		q = p;
 		p = p->mid;
 next:		;
 	}
 
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			for(h = 0; h < IPllen; h++)
-				hnputl(gate+4*h, q->v6.address[h]);
-			ifc = findipifc(f, gate, q->type);
-		} else
-			ifc = findipifc(f, q->v6.gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v6routegeneration;
 	}
-	if(c != nil){
-		c->r = q;
-		c->rgen = v6routegeneration;
-	}
 	
 	return q;
 }
 
+/*
+ * v4source, v6source:
+ *  lookup a route to destination address a and also find
+ *  a suitable source address s on the outgoing interface.
+ *  return the route on success or nil when no route
+ *  was found.
+ *
+ *  for v4source, addresses are in 4 byte format.
+ */
+Route*
+v4source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPv4addrlen];
+	int splen;
+	ulong x, la;
+	Route *p, *q;
+	Ipifc *ifc;
+
+	q = nil;
+	la = nhgetl(a);
+	rlock(&routelock);
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
+			p = p->left;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+				splen++;
+			hnputl(src, p->v4.source);
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+			p = p->mid;
+			continue;
+		}
+		memmove(s, src, IPv4addrlen);
+		q = p;
+		p = p->mid;
+	}
+	runlock(&routelock);
+	return q;
+}
+
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPaddrlen];
+	int splen, h;
+	ulong x, y, la[IPllen];
+	Route *p, *q;
+	Ipifc *ifc;
+
+	q = nil;
+	for(h = 0; h < IPllen; h++)
+		la[h] = nhgetl(a+4*h);
+	rlock(&routelock);
+	for(p = f->v6root[V6H(la)]; p != nil;){
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.address[h];
+			if(x == y)
+				continue;
+			if(x < y){
+				p = p->left;
+				goto next;
+			}
+			break;
+		}
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.endaddress[h];
+			if(x == y)
+				continue;
+			if(x > y){
+				p = p->right;
+				goto next;
+			}
+			break;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(h = 0; h < IPllen; h++){
+				hnputl(src+4*h, p->v6.source[h]);
+				if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+					for(; x & 0x80000000UL; x <<= 1)
+						splen++;
+					break;
+				}
+				splen += 32;
+			}
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv6local(ifc, src, splen, a)){
+			p = p->mid;
+			continue;
+		}
+		ipmove(s, src);
+		q = p;
+		p = p->mid;
+next:		;
+	}
+	runlock(&routelock);
+	return q;
+}
+
+static int
+parseroutetype(char *p)
+{
+	int type = 0;
+	switch(*p++){
+	default:	return -1;	
+	case '4':	type |= Rv4;
+	case '6':	break;
+	}
+	for(;;) switch(*p++){
+	default: 
+		return -1;
+	case 'i':
+		if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+		break;
+	case 'u':
+		if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+		break;
+	case 'b':
+		if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+		break;
+	case 'm':
+		if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+		break;
+	case 'p':
+		if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+		break;
+	case '\0':
+		return type;
+	}
+}
+
 void
-routetype(int type, char *p)
+routetype(int type, char p[8])
 {
-	memset(p, ' ', 4);
-	p[4] = 0;
 	if(type & Rv4)
 		*p++ = '4';
 	else
 		*p++ = '6';
+
 	if(type & Rifc)
 		*p++ = 'i';
+
 	if(type & Runi)
 		*p++ = 'u';
 	else if(type & Rbcast)
@@ -613,14 +896,14 @@
 		*p++ = 'b';
 	else if(type & Rmulti)
 		*p++ = 'm';
+
 	if(type & Rptpt)
-		*p = 'p';
+		*p++ = 'p';
+	*p = 0;
 }
 
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
 {
 	int i;
 
@@ -627,8 +910,16 @@
 	if(r->type & Rv4){
 		memmove(addr, v4prefix, IPv4off);
 		hnputl(addr+IPv4off, r->v4.address);
+
 		memset(mask, 0xff, IPv4off);
 		hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+		memmove(src, v4prefix, IPv4off);
+		hnputl(src+IPv4off, r->v4.source);
+
+		memset(smask, 0xff, IPv4off);
+		hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
 		memmove(gate, v4prefix, IPv4off);
 		memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
 	} else {
@@ -635,162 +926,186 @@
 		for(i = 0; i < IPllen; i++){
 			hnputl(addr + 4*i, r->v6.address[i]);
 			hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+			hnputl(src + 4*i, r->v6.source[i]);
+			hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
 		}
 		memmove(gate, r->v6.gate, IPaddrlen);
 	}
+}
 
-	routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+	uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+	char type[8], ifbuf[4], *iname;
 
-	if(r->ifc)
-		*nifc = r->ifc->conv->x;
+	convroute(r, addr, mask, src, smask, gate);
+	routetype(r->type, type);
+	if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+		snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
 	else
-		*nifc = -1;
+		iname = "-";
+	return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+		addr, mask, gate, type, r->tag, iname, src, smask);
 }
 
-/*
- *  this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
 {
-	int nifc, n;
-	char t[5], *iname, ifbuf[5];
-	uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
-	char *p;
+	int	o;
+	int	h;
+	char*	p;
+	char*	e;
+};
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	iname = "-";
-	if(nifc != -1) {
-		iname = ifbuf;
-		snprint(ifbuf, sizeof ifbuf, "%d", nifc);
-	}
-	p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+	int n = seprintroute(rw->p, rw->e, r) - rw->p;
 	if(rw->o < 0){
-		n = p - rw->p;
 		if(n > -rw->o){
-			memmove(rw->p, rw->p-rw->o, n+rw->o);
-			rw->p = p + rw->o;
+			memmove(rw->p, rw->p - rw->o, n + rw->o);
+			rw->p += n + rw->o;
 		}
 		rw->o += n;
 	} else
-		rw->p = p;
+		rw->p += n;
+	return rw->p < rw->e;
 }
 
-/*
- *  recurse descending tree, applying the function in Routewalk
- */
 static int
 rr(Route *r, Routewalk *rw)
 {
 	int h;
 
-	if(rw->e <= rw->p)
-		return 0;
 	if(r == nil)
 		return 1;
-
 	if(rr(r->left, rw) == 0)
 		return 0;
-
 	if(r->type & Rv4)
 		h = V4H(r->v4.address);
 	else
 		h = V6H(r->v6.address);
-
-	if(h == rw->h)
-		rw->walk(r, rw);
-
+	if(h == rw->h){
+		if(rr1(rw, r) == 0)
+			return 0;
+	}
 	if(rr(r->mid, rw) == 0)
 		return 0;
-
 	return rr(r->right, rw);
 }
 
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
 {
+	Routewalk rw[1];
+
+	rw->p = p;
+	rw->e = p+n;
+	rw->o = -offset;
+	if(rw->o > 0)
+		return 0;
+
 	rlock(&routelock);
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
 			if(rr(f->v4root[rw->h], rw) == 0)
 				break;
 	}
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
 			if(rr(f->v6root[rw->h], rw) == 0)
 				break;
 	}
 	runlock(&routelock);
-}
 
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
-	Routewalk rw;
-
-	rw.p = p;
-	rw.e = p+n;
-	rw.o = -offset;
-	rw.walk = sprintroute;
-
-	ipwalkroutes(f, &rw);
-
-	return rw.p - p;
+	return rw->p - p;
 }
 
 /*
- *  this code is not in routeflush to reduce stack size
+ *	4	add	addr	mask	gate
+ *	5	add	addr	mask	gate			ifc
+ *	6	add	addr	mask	gate				src	smask
+ *	7	add	addr	mask	gate			ifc	src	smask
+ *	8	add	addr	mask	gate		tag	ifc	src	smask
+ *	9	add	addr	mask	gate	type	tag	ifc	src	smask
+ *	3	remove	addr	mask
+ *	4	remove	addr	mask	gate
+ *	5	remove	addr	mask					src	smask
+ *	6	remove	addr	mask	gate				src	smask
+ *	7	remove	addr	mask	gate			ifc	src	smask
+ *	8	remove	addr	mask	gate		tag	ifc	src	smask
+ *	9	remove	addr	mask	gate	type	tag	ifc	src	smask
  */
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
 {
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
+	uchar addr[IPaddrlen], mask[IPaddrlen];
+	uchar src[IPaddrlen], smask[IPaddrlen];
 	uchar gate[IPaddrlen];
-	char t[5];
-	int nifc;
+	Ipifc *ifc;
+	char *tag;
+	int type;
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	if(r->type & Rv4)
-		v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
-	else
-		v6delroute(f, addr, mask, dolock);
-}
+	type = 0;
+	tag = nil;
+	ifc = nil;
+	ipmove(gate, IPnoaddr);
+	ipmove(src, IPnoaddr);
+	ipmove(smask, IPnoaddr);
 
-/*
- *  recurse until one route is deleted
- *    returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
-	if(r == nil)
-		return 0;
-	if(routeflush(f, r->mid, tag))
-		return 1;
-	if(routeflush(f, r->left, tag))
-		return 1;
-	if(routeflush(f, r->right, tag))
-		return 1;
-	if((r->type & Rifc) == 0){
-		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
-			delroute(f, r, 0);
-			return 1;
-		}
+	if(argc < 3)
+		error(Ebadctl);
+	if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+		error(Ebadip);
+
+	if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+		if(argc < 4)
+			error(Ebadctl);
+		if(parseip(gate, argv[3]) == -1)
+			error(Ebadip);
 	}
-	return 0;
+	if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+		if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+			error(Ebadip);
+	}
+	if(argc == 5 && strcmp(argv[0], "add") == 0)
+		ifc = findipifcstr(f, argv[4]);
+	if(argc > 6)
+		ifc = findipifcstr(f, argv[argc-3]);
+	if(argc > 7)
+		tag = argv[argc-4];
+	if(argc > 8){
+		if((type = parseroutetype(argv[argc-5])) < 0)
+			error(Ebadctl);
+	} else {
+		if(isv4(addr))
+			type |= Rv4;
+	}
+	if(argc > 9)
+		error(Ebadctl);
+
+	if(type & Rv4){
+		if(!isv4(addr))
+			error(Ebadip);
+		if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+			error(Ebadip);
+		if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+			error(Ebadip);
+	} else {
+		if(isv4(addr))
+			error(Ebadip);
+	}
+
+	return mkroute(addr, mask, src, smask, gate, type, ifc, tag);	
 }
 
 long
 routewrite(Fs *f, Chan *c, char *p, int n)
 {
-	int h, changed;
-	char *tag;
 	Cmdbuf *cb;
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar gate[IPaddrlen];
-	IPaux *a, *na;
+	IPaux *a;
+	Route *x, r;
 
 	cb = parsecmd(p, n);
 	if(waserror()){
@@ -797,54 +1112,44 @@
 		free(cb);
 		nexterror();
 	}
-
+	if(cb->nf < 1)
+		error("short control request");
 	if(strcmp(cb->f[0], "flush") == 0){
-		tag = cb->f[1];
+		char *tag = cb->nf < 2 ? nil : cb->f[1];
+		int h;
+
+		wlock(&routelock);
 		for(h = 0; h < nelem(f->v4root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v4root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v4root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+				routerem(f, &r);
 			}
 		for(h = 0; h < nelem(f->v6root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v6root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v6root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+				routerem(f, &r);
 			}
-	} else if(strcmp(cb->f[0], "remove") == 0){
-		if(cb->nf < 3)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
-		else
-			v6delroute(f, addr, mask, 1);
-	} else if(strcmp(cb->f[0], "add") == 0){
-		if(cb->nf < 4)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		parseip(gate, cb->f[3]);
-		tag = "none";
-		if(c != nil){
+		wunlock(&routelock);
+	} else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+		r = parseroute(f, cb->f, cb->nf);
+		if(*r.tag == 0){
 			a = c->aux;
-			tag = a->tag;
+			strncpy(r.tag, a->tag, sizeof(r.tag));
 		}
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+		wlock(&routelock);
+		if(strcmp(cb->f[0], "add") == 0)
+			routeadd(f, &r);
 		else
-			v6addroute(f, tag, addr, mask, gate, 0);
+			routerem(f, &r);
+		wunlock(&routelock);
 	} else if(strcmp(cb->f[0], "tag") == 0) {
 		if(cb->nf < 2)
 			error(Ebadarg);
-
 		a = c->aux;
-		na = newipaux(a->owner, cb->f[1]);
-		c->aux = na;
+		c->aux = newipaux(a->owner, cb->f[1]);
 		free(a);
-	}
+	} else
+		error(Ebadctl);
 
 	poperror();
 	free(cb);
--- a/os/ip/ipv6.c
+++ b/os/ip/ipv6.c
@@ -8,250 +8,127 @@
 #include	"ip.h"
 #include	"ipv6.h"
 
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= (32*1024),	/* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp)	(((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
-
-typedef struct	IP	IP;
-typedef struct	Fragment4	Fragment4;
-typedef struct	Fragment6	Fragment6;
-typedef struct	Ipfrag	Ipfrag;
-
-Block*		ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void		ipfragfree6(IP*, Fragment6*);
-Fragment6*	ipfragallo6(IP*);
+static Block*		ip6reassemble(IP*, int, Block*);
+static Fragment6*	ipfragallo6(IP*);
+static void		ipfragfree6(IP*, Fragment6*);
+static Block*		procopts(Block *bp);
 static Block*		procxtns(IP *ip, Block *bp, int doreasm);
-int		unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block*		procopts(Block *bp);
+static int		unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
 
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
 {
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
+	v6params *v6p;
 
-	Nstats,
-};
+	v6p = smalloc(sizeof(v6params));
 
-static char *statnames[] =
-{
-[Forwarding]	"Forwarding",
-[DefaultTTL]	"DefaultTTL",
-[InReceives]	"InReceives",
-[InHdrErrors]	"InHdrErrors",
-[InAddrErrors]	"InAddrErrors",
-[ForwDatagrams]	"ForwDatagrams",
-[InUnknownProtos]	"InUnknownProtos",
-[InDiscards]	"InDiscards",
-[InDelivers]	"InDelivers",
-[OutRequests]	"OutRequests",
-[OutDiscards]	"OutDiscards",
-[OutNoRoutes]	"OutNoRoutes",
-[ReasmTimeout]	"ReasmTimeout",
-[ReasmReqds]	"ReasmReqds",
-[ReasmOKs]	"ReasmOKs",
-[ReasmFails]	"ReasmFails",
-[FragOKs]	"FragOKs",
-[FragFails]	"FragFails",
-[FragCreates]	"FragCreates",
-};
+	v6p->rp.mflag		= 0;		/* default not managed */
+	v6p->rp.oflag		= 0;
+	v6p->rp.maxraint	= 600000;	/* millisecs */
+	v6p->rp.minraint	= 200000;
+	v6p->rp.linkmtu		= 0;		/* no mtu sent */
+	v6p->rp.reachtime	= 0;
+	v6p->rp.rxmitra		= 0;
+	v6p->rp.ttl		= MAXTTL;
+	v6p->rp.routerlt	= (3 * v6p->rp.maxraint) / 1000;
 
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
+	v6p->hp.rxmithost	= 1000;		/* v6 RETRANS_TIMER */
 
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
+	f->v6p			= v6p;
+}
 
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
-	int tentative;
-	Ipifc *ifc;
+	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+	int morefrags, blklen, rv = 0;
 	uchar *gate, nexthdr;
-	Ip6hdr *eh;
-	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
-	Route *r, *sr;
-	Fraghdr6 fraghdr;
 	Block *xp, *nb;
+	Fraghdr6 fraghdr;
 	IP *ip;
-	int rv = 0;
+	Ip6hdr *eh;
+	Ipifc *ifc;
+	Route *r;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip6hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip6hdr*)bp->rp;
+	assert(BLEN(bp) >= IP6HDR);
 	len = blocklen(bp);
-	
-	tentative = iptentative(f, eh->src);
-	if(tentative){
-		netlog(f, Logip, "reject tx of packet with tentative src address\n");
-		goto free;
-	}
-
-	if(gating){
-		chunk = nhgets(eh->ploadlen);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk + IPV6HDR_LEN < len)
-			len = chunk + IPV6HDR_LEN;
-	}
-
 	if(len >= IP_MAX){
-//		print("len > IP_MAX, free\n");
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v6lookup(f, eh->dst, c);
-	if(r == nil){
-//		print("no route for %I, src %I free\n", eh->dst, eh->src);
+	r = v6lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v6lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v6.gate;
 
-	if(!gating)
-		eh->vcf[0] = IP_VER6;
-	eh->ttl = ttl;
-	if(!gating) {
-		eh->vcf[0] |= (tos >> 4);
-		eh->vcf[1] = (tos << 4);
-	}
-
-	if(!canrlock(ifc)) {
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
 	}
-
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
 
-	if(ifc->m == nil) {
+	if(ifc->m == nil)
 		goto raise;
+
+	if(!gating){
+		eh->vcf[0] = IP_VER6;
+		eh->vcf[0] |= tos >> 4;
+		eh->vcf[1]  = tos << 4;
 	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		hnputs(eh->ploadlen, len-IPV6HDR_LEN);
-		ifc->m->bwrite(ifc, bp, V6, gate);
+		hnputs(eh->ploadlen, len - IP6HDR);
+		ipifcoput(ifc, bp, V6, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-	if(gating) 
-	if(ifc->reassemble <= 0) {
-
-		/* v6 intermediate nodes are not supposed to fragment pkts;
-		   we fragment if ifc->reassemble is turned on; an exception
-		   needed for nat.
+	if(gating && !ifc->reassemble) {
+		/*
+		 * v6 intermediate nodes are not supposed to fragment pkts;
+		 * we fragment if ifc->reassemble is turned on; an exception
+		 * needed for nat.
 		 */
-
 		ip->stats[OutDiscards]++;
 		icmppkttoobig6(f, ifc, bp);
-		netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
 		goto raise;
 	}
-		
+
 	/* start v6 fragmentation */
-	uflen = unfraglen(bp, &nexthdr, 1);
+	uflen = unfraglen(bp, &nexthdr, 1, 0);
+	if(uflen < IP6HDR || nexthdr == FH) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+		goto raise;
+	}
 	if(uflen > medialen) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
 		goto raise;
 	}
 
@@ -260,7 +137,7 @@
 	if(seglen < 8) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
@@ -271,13 +148,13 @@
 
 	xp = bp;
 	offset = uflen;
-	while (xp != nil && offset && offset >= BLEN(xp)) {
+	while (offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
 	xp->rp += offset;
 
-	fragoff = 0; 
+	fragoff = 0;
 	morefrags = 1;
 
 	for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
 		memmove(nb->wp, eh, uflen);
 		nb->wp += uflen;
 
-		hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+		hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
 		fraghdr.offsetRM[1] |= morefrags;
 		memmove(nb->wp, &fraghdr, IP6FHDR);
 		nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
 		/* Copy data */
 		chunk = seglen;
 		while (chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -316,10 +193,9 @@
 			xp->rp += blklen;
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
-				xp = xp->next; 
+				xp = xp->next;
 		}
-
-		ifc->m->bwrite(ifc, nb, V6, gate);
+		ipifcoput(ifc, nb, V6, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
 	runlock(ifc);
 	poperror();
 free:
-	freeblist(bp);	
+	freeblist(bp);
 	return rv;
 }
 
@@ -335,16 +211,10 @@
 void
 ipiput6(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos;
-	uchar proto;
+	int hl, len, hop, tos;
+	IP *ip;
 	Ip6hdr *h;
 	Proto *p;
-	int notforme;
-	int tentative;
-	uchar v6dst[IPaddrlen];
-	IP *ip;
-	Route *r, *sr;
 
 	ip = f->ip;
 	ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
 			return;
 	}
 
-	h = (Ip6hdr *)(bp->rp);
-
-	memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
-	notforme = ipforme(f, v6dst) == 0;
-	tentative = iptentative(f, v6dst);
-  
-	if(tentative && (h->proto != ICMPv6)) {
-		print("tentative addr, drop\n");
-		freeblist(bp);
-		return;
-	}
-
 	/* Check header version */
-	if(BLKIPVER(bp) != IP_VER6) {
+	h = (Ip6hdr*)bp->rp;
+	if((h->vcf[0] & 0xF0) != IP_VER6) {
 		ip->stats[InHdrErrors]++;
 		netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
-		freeblist(bp);
+		goto drop;
+	}
+	len = IP6HDR + nhgets(h->ploadlen);
+	if((bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
 		return;
 	}
+	h = (Ip6hdr*)bp->rp;
 
 	/* route */
-	if(notforme) {
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
+	if(!ipforme(f, h->dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
+
+		if(!ip->iprouting)
+			goto drop;
+
+		/* don't forward to link-local destinations */
+		if(islinklocal(h->dst) ||
+		   (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+			ip->stats[OutDiscards]++;
+			goto drop;
 		}
+			
 		/* don't forward to source's network */
-		sr = v6lookup(f, h->src, nil);
-		r = v6lookup(f, h->dst, nil);
-
-		if(r == nil || sr == r){
+		rh.r = nil;
+		r  = v6lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
 			icmpttlexceeded6(f, ifc, bp);
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* process headers & reassemble if the interface expects it */
-		bp = procxtns(ip, bp, r->ifc->reassemble);
-
+		bp = procxtns(ip, bp, nifc->reassemble);
 		if(bp == nil)
 			return;
 
 		ip->stats[ForwDatagrams]++;
-		h = (Ip6hdr *) (bp->rp);
-		tos = IPV6CLASS(h);
+		h = (Ip6hdr*)bp->rp;
+		tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
 		hop = h->ttl;
-		ipoput6(f, bp, 1, hop-1, tos, nil);
+		ipoput6(f, bp, 1, hop-1, tos, &rh);
 		return;
 	}
 
 	/* reassemble & process headers if needed */
 	bp = procxtns(ip, bp, 1);
-
 	if(bp == nil)
 		return;
 
-	h = (Ip6hdr *) (bp->rp);
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	h = (Ip6hdr*)bp->rp;
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
 
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -447,20 +318,20 @@
 /*
  * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
  */
-void
+static void
 ipfragfree6(IP *ip, Fragment6 *frag)
 {
 	Fragment6 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	memset(frag->src, 0, IPaddrlen);
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	memset(frag->src, 0, IPaddrlen);
+	memset(frag->dst, 0, IPaddrlen);
 
 	l = &ip->flisthead6;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -470,13 +341,12 @@
 
 	frag->next = ip->fragfree6;
 	ip->fragfree6 = frag;
-
 }
 
 /*
  * ipfragallo6 - copied from ipfragalloc4
  */
-Fragment6*
+static Fragment6*
 ipfragallo6(IP *ip)
 {
 	Fragment6 *f;
@@ -483,7 +353,7 @@
 
 	while(ip->fragfree6 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead6; f->next; f = f->next)
+		for(f = ip->flisthead6; f->next != nil; f = f->next)
 			;
 		ipfragfree6(ip, f);
 	}
@@ -497,108 +367,109 @@
 }
 
 static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
-	int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
 	uchar proto;
-	Ip6hdr *h;
+	int offset;
 
-	h = (Ip6hdr *) (bp->rp);
-	offset = unfraglen(bp, &proto, 0);
-
-	if((proto == FH) && (doreasm != 0)) {
-		bp = ip6reassemble(ip, offset, bp, h);
-		if(bp == nil) 
-			return nil; 
-		offset = unfraglen(bp, &proto, 0);
+	offset = unfraglen(bp, &proto, 0, doreasm);
+	if(offset >= IP6HDR && proto == FH && doreasm) {
+		bp = ip6reassemble(ip, offset, bp);
+		if(bp == nil)
+			return nil;
+		offset = unfraglen(bp, &proto, 0, 0);
+		if(proto == FH)
+			offset = -1;
 	}
-
-	if(proto == DOH || offset > IP6HDR) 
+	if(offset < IP6HDR){
+		ip->stats[InHdrErrors]++;
+		ip->stats[InDiscards]++;
+		freeblist(bp);
+		return nil;
+	}
+	if(proto == DOH || offset > IP6HDR)
 		bp = procopts(bp);
-
 	return bp;
 }
 
-
-/*	returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- *	hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- *	of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- *	field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
  */
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
 {
-	uchar *p, *q;
-	int ufl, hs;
+	uchar *e, *p, *q;
 
+	e = bp->wp;
 	p = bp->rp;
-	q = p+6;	/* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+	q = p+6;   /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
 	*nexthdr = *q;
-	ufl = IP6HDR;
-	p += ufl;
-
-	for(;;) {
-		if(*nexthdr == HBH || *nexthdr == RH) {
-			*nexthdr = *p;
-			hs = ((int)*(p+1) + 1) * 8;
-			ufl += hs;
-			q = p;
-			p += hs;
-		}
-		else
-			break;
+	p += IP6HDR;
+	while(*nexthdr == HBH || *nexthdr == RH){
+		if(p+2 > e)
+			return -1;
+		q = p;
+		*nexthdr = *q;
+		p += ((int)p[1] + 1) * 8;
 	}
-
-	if(*nexthdr == FH)
-		*q = *p;
-
-	if(setfh)
+	if(p > e)
+		return -1;
+	if(*nexthdr == FH){
+		if(p+IP6FHDR > e || *p == FH)
+			return -1;
+		if(popfh)
+			*q = *p;
+	} else if(setfh)
 		*q = FH;
-
-	return ufl;
+	return p - bp->rp;
 }
 
-Block*
+static Block*
 procopts(Block *bp)
 {
 	return bp;
 }
 
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
 {
-
-	int fend, offset;
+	int offset, ovlap, fragsize, len;
+	uchar src[IPaddrlen], dst[IPaddrlen];
 	uint id;
-	Fragment6 *f, *fnext;
+	Block *bl, **l, *prev;
 	Fraghdr6 *fraghdr;
-	uchar src[IPaddrlen], dst[IPaddrlen];
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Fragment6 *f, *fnext;
+	Ipfrag *fp, *fq;
+	Ip6hdr* ih;
 
-	fraghdr = (Fraghdr6 *) (bp->rp + uflen);
-	memmove(src, ih->src, IPaddrlen);
-	memmove(dst, ih->dst, IPaddrlen);
-	id = nhgetl(fraghdr->id);
-	offset = nhgets(fraghdr->offsetRM) & ~7;
-
 	/*
-	 *  block lists are too hard, pullupblock into a single block
+	 *  block lists are too hard, concatblock into a single block
 	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip6hdr *)(bp->rp);
-	}
+	bp = concatblock(bp);
 
+	ih = (Ip6hdr*)bp->rp;
+	fraghdr = (Fraghdr6*)(bp->rp + uflen);
+	id = nhgetl(fraghdr->id);
+	offset = nhgets(fraghdr->offsetRM);
+	fragsize = BLEN(bp) - uflen - IP6FHDR;
 
+	memmove(src, ih->src, IPaddrlen);
+	memmove(dst, ih->dst, IPaddrlen);
+
 	qlock(&ip->fraglock6);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead6; f; f = fnext){
+	for(f = ip->flisthead6; f != nil; f = fnext){
 		fnext = f->next;
-		if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+		if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
 		}
 	}
 
-
 	/*
 	 *  if this isn't a fragmented packet, accept it
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(nhgets(fraghdr->offsetRM)==0) {	// first frag is also the last
+	if((offset & ~6) == 0) {	/* 1st frag is also last */
 		if(f != nil) {
-			ipfragfree6(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree6(ip, f);
 		}
 		qunlock(&ip->fraglock6);
+
+		/* get rid of frag header */
+		memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+		bp->rp += IP6FHDR;
+		ih = (Ip6hdr*)bp->rp;
+		hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset;
-	BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = offset & ~7;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -638,8 +516,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock6);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock6);
+
 		return nil;
 	}
 
@@ -649,7 +528,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -656,15 +535,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock6);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -673,29 +553,27 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 
 		/* Take completely covered segments out */
-
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
-
-			if(ovlap <= 0) 
-				break; 
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, uflen);
-				(*l)->rp += ovlap;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
+			if(ovlap <= 0)
 				break;
+			if(ovlap < fq->flen) {
+				/* move up ip and frag header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
+				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -703,45 +581,55 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-	
-		fraghdr = (Fraghdr6 *) (bl->rp + uflen);
-		if((fraghdr->offsetRM[1] & 1) == 0) {
 
-			bl = f->blist;
+		fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+		if(fraghdr->offsetRM[1] & 1)
+			continue;
 
-			/* get rid of frag header in first fragment */
+		bl = f->blist;
+		fq = (Ipfrag*)bl->base;
 
-			memmove(bl->rp + IP6FHDR, bl->rp, uflen);
-			bl->rp += IP6FHDR;
-			len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
-			bl->wp = bl->rp + len + IP6HDR;
+		/* get rid of frag header in first fragment */
+		memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+		bl->rp += IP6FHDR;
+		len = BLEN(bl);
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += uflen + IP6FHDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
 
-			bl = f->blist;
-			f->blist = nil;
+		if(len >= IP_MAX){
 			ipfragfree6(ip, f);
-			ih = (Ip6hdr*)(bl->rp);
-			hnputs(ih->ploadlen, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock6);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree6(ip, f);
+
+		ih = (Ip6hdr*)bl->rp;
+		hnputs(ih->ploadlen, len-IP6HDR);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock6);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock6);
 	return nil;
 }
-
--- a/os/ip/ipv6.h
+++ b/os/ip/ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration.  rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
 #define isv6mcast(addr)	  ((addr)[0] == 0xff)
 #define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
 
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np)	(nhgets((np)->ploadlen) > 24)
+#define issmcast(addr)	(memcmp((addr), v6solicitednode, 13) == 0)
 
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
 
-typedef struct Ip6hdr     Ip6hdr;
-typedef struct Opthdr     Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6    Fraghdr6;
-
-struct Ip6hdr {
-	uchar vcf[4];       	// version:4, traffic class:8, flow label:20
-	uchar ploadlen[2];  	// payload length: packet length - 40
-	uchar proto;		// next header type
-	uchar ttl;          	// hop limit
-	uchar src[IPaddrlen];
-	uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
-	uchar nexthdr;
-	uchar len;
-};
-
-struct Routinghdr {
-	uchar nexthdr;
-	uchar len;
-	uchar rtetype;
-	uchar segrem;
-};
-
-struct Fraghdr6 {
-	uchar nexthdr;
-	uchar res;
-	uchar offsetRM[2];	// Offset, Res, M flag
-	uchar id[4];
-};
-
-
-enum {			/* Header Types */
-	HBH		= 0,	//?
+enum {				/* Header Types */
+	HBH		= 0,	/* hop-by-hop multicast routing protocol */
 	ICMP		= 1,
 	IGMP		= 2,
 	GGP		= 3,
@@ -72,89 +50,113 @@
 	Maxhdrtype	= 256,
 };
 
-
 enum {
-	//	multicast flgs and scop
+	/* multicast flags and scopes */
 
-	well_known_flg				= 0,
-	transient_flg				= 1,
+//	Well_known_flg	= 0,
+//	Transient_flg	= 1,
 
-	node_local_scop 			= 1,
-	link_local_scop 			= 2,
-	site_local_scop 			= 5,
-	org_local_scop				= 8,
-	global_scop				= 14,
+//	Interface_local_scop = 1,
+	Link_local_scop	= 2,
+//	Site_local_scop	= 5,
+//	Org_local_scop	= 8,
+	Global_scop	= 14,
 
-	//	various prefix lengths
+	/* various prefix lengths */
+	SOLN_PREF_LEN	= 13,
 
-	SOLN_PREF_LEN				= 13,
+	/* icmpv6 unreachability codes */
+	Icmp6_no_route		= 0,
+	Icmp6_ad_prohib		= 1,
+	Icmp6_out_src_scope	= 2,
+	Icmp6_adr_unreach	= 3,
+	Icmp6_port_unreach	= 4,
+	Icmp6_gress_src_fail	= 5,
+	Icmp6_rej_route		= 6,
+	Icmp6_unknown		= 7,  /* our own invention for internal use */
 
-	//	icmpv6 unreach codes
-	icmp6_no_route				= 0,
-	icmp6_ad_prohib				= 1,
-	icmp6_unassigned			= 2,
-	icmp6_adr_unreach			= 3,
-	icmp6_port_unreach			= 4,
-	icmp6_unkn_code				= 5,
+	/* various flags & constants */
+	v6MINTU		= 1280,
+	HOP_LIMIT	= 255,
+	IP6HDR		= 40,		/* sizeof(Ip6hdr) = 8 + 2*16 */
+	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
 
-	// 	various flags & constants
+	/* option types */
 
-	v6MINTU      				= 1280,
-	HOP_LIMIT    				= 255,
-	ETHERHDR_LEN 				= 14,
-	IPV6HDR_LEN  				= 40,
-	IPV4HDR_LEN  				= 20,
+	/* neighbour discovery */
+	SRC_LLADDR	= 1,
+	TARGET_LLADDR	= 2,
+	PREFIX_INFO	= 3,
+	REDIR_HEADER	= 4,
+	MTU_OPTION	= 5,
+	/* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+	V6nd_home	= 8,
+	V6nd_srcaddrs	= 9,		/* rfc3122 */
+	V6nd_ip		= 17,
+	/* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+	V6nd_rdns	= 25,
+	/* plan 9 extensions */
+	V6nd_9fs	= 250,
+	V6nd_9auth	= 251,
 
-	// 	option types
+	SRC_UNSPEC	= 0,
+	SRC_UNI		= 1,
+	TARG_UNI	= 2,
+	TARG_MULTI	= 3,
 
-	SRC_LLADDRESS    			= 1,
-	TARGET_LLADDRESS 			= 2,
-	PREFIX_INFO      			= 3,
-	REDIR_HEADER     			= 4,
-	MTU_OPTION       			= 5,
+	Tunitent	= 1,
+	Tuniproxy	= 2,
+	Tunirany	= 3,
 
-	SRC_UNSPEC  				= 0,
-	SRC_UNI     				= 1,
-	TARG_UNI    				= 2,
-	TARG_MULTI  				= 3,
+	/* Node constants */
+	MAX_MULTICAST_SOLICIT	= 3,
+	RETRANS_TIMER		= 1000,
+};
 
-	t_unitent   				= 1,
-	t_uniproxy  				= 2,
-	t_unirany   				= 3,
+typedef struct Ip6hdr	Ip6hdr;
+typedef struct Opthdr	Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6	Fraghdr6;
 
-	//	Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */\
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */ \
+	uchar	proto;		/* next header type */ \
+	uchar	ttl;		/* hop limit */ \
+	uchar	src[IPaddrlen]; \
+	uchar	dst[IPaddrlen]
 
-	MAX_INITIAL_RTR_ADVERT_INTERVAL 	= 16000,
-	MAX_INITIAL_RTR_ADVERTISEMENTS  	= 3,
-	MAX_FINAL_RTR_ADVERTISEMENTS    	= 3,
-	MIN_DELAY_BETWEEN_RAS 			= 3000,
-	MAX_RA_DELAY_TIME     			= 500,
+struct	Ip6hdr {
+	IPV6HDR;
+	uchar	payload[];
+};
 
-	//	Host constants
+struct	Opthdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+};
 
-	MAX_RTR_SOLICITATION_DELAY 		= 1000,
-	RTR_SOLICITATION_INTERVAL  		= 4000,
-	MAX_RTR_SOLICITATIONS      		= 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused.  Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct	Routinghdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+	uchar	rtetype;
+	uchar	segrem;
+};
 
-	//	Node constants
-
-	MAX_MULTICAST_SOLICIT   		= 3,
-	MAX_UNICAST_SOLICIT     		= 3,
-	MAX_ANYCAST_DELAY_TIME  		= 1000,
-	MAX_NEIGHBOR_ADVERTISEMENT 		= 3,
-	REACHABLE_TIME 				= 30000,
-	RETRANS_TIMER  				= 1000,
-	DELAY_FIRST_PROBE_TIME 			= 5000,
-
+struct	Fraghdr6 {
+	uchar	nexthdr;
+	uchar	res;
+	uchar	offsetRM[2];	/* Offset, Res, M flag */
+	uchar	id[4];
 };
 
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
 extern uchar v6allnodesN[IPaddrlen];
 extern uchar v6allnodesL[IPaddrlen];
 extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
 extern uchar v6allroutersL[IPaddrlen];
 extern uchar v6allnodesNmask[IPaddrlen];
 extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
 extern uchar v6solicitednode[IPaddrlen];
 extern uchar v6solicitednodemask[IPaddrlen];
 extern uchar v6Unspecified[IPaddrlen];
 extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
 extern uchar v6linklocal[IPaddrlen];
 extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
 extern uchar v6multicast[IPaddrlen];
 extern uchar v6multicastmask[IPaddrlen];
 
 extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
 extern int v6mcpreflen;
 extern int v6snpreflen;
 extern int v6aNpreflen;
@@ -184,3 +179,10 @@
 extern int v6aLpreflen;
 
 extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
--- a/os/ip/loopbackmedium.c
+++ b/os/ip/loopbackmedium.c
@@ -28,13 +28,12 @@
 	LB *lb;
 
 	lb = smalloc(sizeof(*lb));
+	lb->readp = (void*)-1;
 	lb->f = ifc->conv->p->f;
-	/* TO DO: make queue size a function of kernel memory */
-	lb->q = qopen(128*1024, Qmsg, nil, nil);
+	lb->q = qopen(1024*1024, Qmsg, nil, nil);
 	ifc->arg = lb;
-	ifc->mbps = 1000;
 
-	kproc("loopbackread", loopbackread, ifc, 0);
+	kproc("loopbackread", loopbackread, ifc);
 
 }
 
@@ -43,13 +42,29 @@
 {
 	LB *lb = ifc->arg;
 
-	if(lb->readp)
+	while(waserror())
+		;
+
+	/* wat for reader to start */
+	while(lb->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+		
+	if(lb->readp != nil)
 		postnote(lb->readp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for reader to die */
-	while(lb->readp != 0)
+	while(lb->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	/* clean up */
 	qfree(lb->q);
 	free(lb);
@@ -76,23 +91,14 @@
 	ifc = a;
 	lb = ifc->arg;
 	lb->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		lb->readp = 0;
-		pexit("hangup", 1);
-	}
-	for(;;){
-		bp = qbread(lb->q, Maxtu);
-		if(bp == nil)
-			continue;
-		ifc->in++;
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+	if(!waserror())
+	while((bp = qbread(lb->q, Maxtu)) != nil){
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
+		ifc->in++;
 		if(ifc->lifc == nil)
 			freeb(bp);
 		else
@@ -100,6 +106,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	lb->readp = nil;
+	pexit("hangup", 1);
 }
 
 Medium loopbackmedium =
--- a/os/ip/netdevmedium.c
+++ b/os/ip/netdevmedium.c
@@ -49,12 +49,13 @@
 	mchan = namec(argv[2], Aopen, ORDWR, 0);
 
 	er = smalloc(sizeof(*er));
+	er->readp = (void*)-1;
 	er->mchan = mchan;
 	er->f = ifc->conv->p->f;
 
 	ifc->arg = er;
 
-	kproc("netdevread", netdevread, ifc, 0);
+	kproc("netdevread", netdevread, ifc);
 }
 
 /*
@@ -65,13 +66,29 @@
 {
 	Netdevrock *er = ifc->arg;
 
+	while(waserror())
+		;
+
+	/* wait for reader to start */
+	while(er->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
 	if(er->readp != nil)
 		postnote(er->readp, 1, "unbind", 0);
 
-	/* wait for readers to die */
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
+	/* wait for reader to die */
 	while(er->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan != nil)
 		cclose(er->mchan);
 
@@ -86,8 +103,6 @@
 {
 	Netdevrock *er = ifc->arg;
 
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 
@@ -104,34 +119,22 @@
 	Ipifc *ifc;
 	Block *bp;
 	Netdevrock *er;
-	char *argv[1];
 
 	ifc = a;
 	er = ifc->arg;
 	er->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->readp = nil;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
 		if(bp == nil){
-			/*
-			 * get here if mchan is a pipe and other side hangs up
-			 * clean up this interface & get out
-ZZZ is this a good idea?
-			 */
 			poperror();
-			er->readp = nil;
-			argv[0] = "unbind";
-			if(!waserror())
+			if(!waserror()){
+				static char *argv[]  = { "unbind" };
 				ifc->conv->p->ctl(ifc->conv, argv, 1);
-			pexit("hangup", 1);
+			}
+			break;
 		}
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
@@ -144,6 +147,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	er->readp = nil;
+	pexit("hangup", 1);
 }
 
 void
--- a/os/ip/netlog.c
+++ b/os/ip/netlog.c
@@ -7,7 +7,7 @@
 #include	"../ip/ip.h"
 
 enum {
-	Nlog		= 4*1024,
+	Nlog		= 16*1024,
 };
 
 /*
@@ -39,12 +39,12 @@
 	{ "ppp",	Logppp, },
 	{ "ip",		Logip, },
 	{ "fs",		Logfs, },
-	{ "tcp",	Logtcp, },
 	{ "il",		Logil, },
+	{ "tcp",	Logtcp, },
 	{ "icmp",	Logicmp, },
 	{ "udp",	Logudp, },
 	{ "compress",	Logcompress, },
-	{ "ilmsg",	Logil|Logilmsg, },
+	{ "logilmsg",	Logilmsg, },
 	{ "gre",	Loggre, },
 	{ "tcpwin",	Logtcp|Logtcpwin, },
 	{ "tcprxmt",	Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
 		nexterror();
 	}
 	if(f->alog->opens == 0){
-		if(f->alog->buf == nil)
+		if(f->alog->buf == nil){
 			f->alog->buf = malloc(Nlog);
+			if(f->alog->buf == nil)
+				error(Enomem);
+		}
 		f->alog->rptr = f->alog->buf;
 		f->alog->end = f->alog->buf + Nlog;
 	}
@@ -202,6 +205,7 @@
 		else
 			f->alog->iponlyset = 1;
 		free(cb);
+		poperror();
 		return;
 
 	default:
@@ -227,7 +231,7 @@
 void
 netlog(Fs *f, int mask, char *fmt, ...)
 {
-	char buf[128], *t, *fp;
+	char buf[256], *t, *fp;
 	int i, n;
 	va_list arg;
 
--- a/os/ip/nullmedium.c
+++ b/os/ip/nullmedium.c
@@ -19,8 +19,9 @@
 }
 
 static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
 {
+	freeb(bp);
 	error("nullbwrite");
 }
 
--- a/os/ip/pktmedium.c
+++ b/os/ip/pktmedium.c
@@ -16,10 +16,10 @@
 Medium pktmedium =
 {
 .name=		"pkt",
-.hsize=		14,
-.mintu=		40,
+.hsize=		0,
+.mintu=		0,
 .maxtu=		4*1024,
-.maclen=	6,
+.maclen=	0,
 .bind=		pktbind,
 .unbind=	pktunbind,
 .bwrite=	pktbwrite,
@@ -28,12 +28,13 @@
 };
 
 /*
- *  called to bind an IP ifc to an ethernet device
+ *  called to bind an IP ifc to an packet device
  *  called with ifc wlock'd
  */
 static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
 {
+	USED(argc, argv);
 }
 
 /*
@@ -51,7 +52,6 @@
 pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
 {
 	/* enqueue onto the conversation's rq */
-	bp = concatblock(bp);
 	if(ifc->conv->snoopers.ref > 0)
 		qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
 	qpass(ifc->conv->rq, bp);
--- a/os/ip/rudp.c
+++ b/os/ip/rudp.c
@@ -1,4 +1,5 @@
 /*
+ *  Reliable User Datagram Protocol, currently only for IPv4.
  *  This protocol is compatible with UDP's packet format.
  *  It could be done over UDP if need be.
  */
@@ -25,20 +26,17 @@
 
 enum
 {
-	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_PHDRSIZE	= 12,	/* pseudo header */
+//	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_RHDRSIZE	= 36,	/* pseudo header + udp header + rudp header */
 	UDP_IPHDR	= 8,	/* ip header */
 	IP_UDPPROTO	= 254,
-	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
-	UDP_USEAD4	= 12,
+	UDP_USEAD7	= 52,	/* size of new ipv6 headers struct */
 
 	Rudprxms	= 200,
 	Rudptickms	= 50,
 	Rudpmaxxmit	= 10,
 	Maxunacked	= 100,
-
 };
 
 #define Hangupgen	0xffffffff	/* used only in hangup messages */
@@ -205,7 +203,7 @@
 		qlock(&rpriv->apl);
 		if(rpriv->ackprocstarted == 0){
 			sprint(kpname, "#I%drudpack", rudp->f->dev);
-			kproc(kpname, relackproc, rudp, 0);
+			kproc(kpname, relackproc, rudp);
 			rpriv->ackprocstarted = 1;
 		}
 		qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
 	qlock(ucb);
 	for(r = ucb->r; r; r = r->next)
 		m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+	m += snprint(state+m, n-m, "\n");
 	qunlock(ucb);
 	return m;
 }
@@ -281,7 +280,7 @@
 	/* force out any delayed acks */
 	ucb = (Rudpcb*)c->ptcl;
 	qlock(ucb);
-	for(r = ucb->r; r; r = r->next){
+	for(r = ucb->r; r != nil; r = r->next){
 		if(r->acksent != r->rcvseq)
 			relsendack(c, r, 0);
 	}
@@ -374,27 +373,10 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-
-		bp->rp += 4;			/* Igonore local port */
-		break;
 	default:
 		ipmove(raddr, c->raddr);
 		ipmove(laddr, c->laddr);
 		rport = c->rport;
-
 		break;
 	}
 
@@ -402,9 +384,6 @@
 
 	/* Make space to fit rudp & ip header */
 	bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
-
 	uh = (Udphdr *)(bp->rp);
 	uh->vihl = IP_VER4;
 
@@ -417,7 +396,6 @@
 	uh->frag[1] = 0;
 	hnputs(uh->udpplen, ptcllen);
 	switch(ucb->headers){
-	case 6:
 	case 7:
 		v6tov4(uh->udpdst, raddr);
 		hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.rudpNoPorts++;
 		qunlock(rudp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	default:
 		/* connection oriented rudp */
 		if(ipcmp(c->raddr, IPnoaddr) == 0){
-			/* save the src address in the conversation */
+			/* reply with the same ip address (if not broadcast) */
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
+			ipmove(c->laddr, laddr);
 		 	ipmove(c->raddr, raddr);
 			c->rport = rport;
-
-			/* reply with the same ip address (if not broadcast) */
-			if(ipforme(f, laddr) == Runi)
-				ipmove(c->laddr, laddr);
-			else
-				v4tov6(c->laddr, ifc->lifc->local);
 		}
 		break;
 	}
-	if(bp->next)
-		bp = concatblock(bp);
 
 	if(qfull(c->rq)) {
-		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
-			laddr, lport);
+		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-	else
-		qpass(c->rq, bp);
-	
 	qunlock(ucb);
 }
 
@@ -629,16 +594,14 @@
 	if(n < 1)
 		return rudpunknown;
 
-	if(strcmp(f[0], "headers++4") == 0){
-		ucb->headers = 7;
+	if(strcmp(f[0], "headers") == 0){
+		ucb->headers = 7;		/* new headers format */
 		return nil;
-	} else if(strcmp(f[0], "headers") == 0){
-		ucb->headers = 6;
-		return nil;
 	} else if(strcmp(f[0], "hangup") == 0){
 		if(n < 3)
 			return "bad syntax";
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			return Ebadip;
 		x = atoi(f[2]);
 		qlock(ucb);
 		relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
 		qunlock(ucb);
 		return nil;
 	} else if(strcmp(f[0], "randdrop") == 0){
-		x = 10;		/* default is 10% */
+		x = 10;			/* default is 10% */
 		if(n > 1)
 			x = atoi(f[1]);
 		if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
 	pdest = nhgets(h->udpdport);
 
 	/* Look for a connection */
-	for(p = rudp->conv; *p; p++) {
-		s = *p;
+	for(p = rudp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -701,12 +665,6 @@
 		upriv->orders);
 }
 
-int
-rudpgc(Proto *rudp)
-{
-	return natgc(rudp->ipproto);
-}
-
 void
 rudpinit(Fs *fs)
 {
@@ -725,9 +683,8 @@
 	rudp->rcv = rudpiput;
 	rudp->advise = rudpadvise;
 	rudp->stats = rudpstats;
-	rudp->gc = rudpgc;
 	rudp->ipproto = IP_UDPPROTO;
-	rudp->nc = 16;
+	rudp->nc = 32;
 	rudp->ptclsize = sizeof(Rudpcb);
 
 	Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
 
 	rudp = (Proto *)a;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Rudptickms);
 
@@ -989,8 +948,6 @@
 	Fs *f;
 
 	bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
 	bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
 	f = c->p->f;
 	uh = (Udphdr *)(bp->rp);
--- a/os/ip/tcp.c
+++ b/os/ip/tcp.c
@@ -41,13 +41,13 @@
 	EOLOPT		= 0,
 	NOOPOPT		= 1,
 	MSSOPT		= 2,
-	MSS_LENGTH	= 4,		/* Mean segment size */
+	MSS_LENGTH	= 4,		/* Maximum segment size */
 	WSOPT		= 3,
 	WS_LENGTH	= 3,		/* Bits to scale window size by */
 	MSL2		= 10,
 	MSPTICK		= 50,		/* Milliseconds per timer tick */
-	DEF_MSS		= 1460,		/* Default mean segment */
-	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
+	DEF_MSS		= 1460,		/* Default maximum segment */
+	DEF_MSS6	= 1220,		/* Default maximum segment (min) for v6 */
 	DEF_RTT		= 500,		/* Default round trip */
 	DEF_KAT		= 120000,	/* Default time (ms) between keep alives */
 	TCP_LISTEN	= 0,		/* Listen connection */
@@ -81,7 +81,13 @@
 	NLHT		= 256,		/* hash table size, must be a power of 2 */
 	LHTMASK		= NLHT-1,
 
-	HaveWS		= 1<<8,
+	/*
+	 * window is 64kb · 2ⁿ
+	 * these factors determine the ultimate bandwidth-delay product.
+	 * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+	 */
+	Maxqscale	= 4,		/* maximum queuing scale */
+	Defadvscale	= 4,		/* default advertisement */
 };
 
 /* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
 	ulong	seq;
 	ulong	ack;
 	uchar	flags;
-	ushort	ws;	/* window scale option (if not zero) */
-	ulong	wnd;
+	uchar	update;
+	ushort	ws;	/* window scale option */
+	ulong	wnd;	/* prescaled window*/
 	ushort	urg;
 	ushort	mss;	/* max segment size option (if not zero) */
 	ushort	len;	/* size of data */
@@ -205,44 +212,53 @@
 		ulong	wnd;		/* Tcp send window */
 		ulong	urg;		/* Urgent data pointer */
 		ulong	wl2;
-		int	scale;		/* how much to right shift window in xmitted packets */
+		uint	scale;		/* how much to right shift window in xmitted packets */
 		/* to implement tahoe and reno TCP */
 		ulong	dupacks;	/* number of duplicate acks rcvd */
+		ulong	partialack;
 		int	recovery;	/* loss recovery flag */
-		ulong	rxt;		/* right window marker for recovery */
+		int	retransmit;	/* retransmit 1 packet @ una flag */
+		int	rto;
+		ulong	rxt;		/* right window marker for recovery "recover" rfc3782 */
 	} snd;
 	struct {
 		ulong	nxt;		/* Receive pointer to next uchar slot */
 		ulong	wnd;		/* Receive window incoming */
+		ulong	wsnt;		/* Last wptr sent.  important to track for large bdp */
+		ulong	wptr;
 		ulong	urg;		/* Urgent pointer */
+		ulong	ackptr;		/* last acked sequence */
 		int	blocked;
-		int	una;		/* unacked data segs */
-		int	scale;		/* how much to left shift window in rcved packets */
+		uint	scale;		/* how much to left shift window in rcv'd packets */
 	} rcv;
 	ulong	iss;			/* Initial sequence number */
-	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
 	ulong	cwind;			/* Congestion window */
-	int	scale;			/* desired snd.scale */
-	ushort	ssthresh;		/* Slow start threshold */
+	ulong	abcbytes;		/* appropriate byte counting rfc 3465 */
+	uint	scale;			/* desired snd.scale */
+	ulong	ssthresh;		/* Slow start threshold */
 	int	resent;			/* Bytes just resent */
 	int	irs;			/* Initial received squence */
-	ushort	mss;			/* Mean segment size */
+	ushort	mss;			/* Maximum segment size */
 	int	rerecv;			/* Overlap of data rerecevived */
-	ulong	window;			/* Recevive window */
+	ulong	window;			/* Our receive window (queue) */
+	uint	qscale;			/* Log2 of our receive window (queue) */
 	uchar	backoff;		/* Exponential backoff counter */
 	int	backedoff;		/* ms we've backed off for rexmits */
 	uchar	flags;			/* State flags */
 	Reseq	*reseq;			/* Resequencing queue */
+	int	nreseq;
+	int	reseqlen;
 	Tcptimer	timer;			/* Activity timer */
 	Tcptimer	acktimer;		/* Acknowledge timer */
 	Tcptimer	rtt_timer;		/* Round trip timer */
 	Tcptimer	katimer;		/* keep alive timer */
 	ulong	rttseq;			/* Round trip sequence */
-	int	srtt;			/* Shortened round trip */
+	int	srtt;			/* Smoothed round trip */
 	int	mdev;			/* Mean deviation of round trip */
 	int	kacounter;		/* count down for keep alive */
 	uint	sndsyntime;		/* time syn sent */
 	ulong	time;			/* time Finwait2 or Syn_received was sent */
+	ulong	timeuna;			/* snd.una when time was set */
 	int	nochecksum;		/* non-zero means don't send checksums */
 	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
 
@@ -285,11 +301,11 @@
 };
 
 int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
-ushort	tcp_mss = DEF_MSS;	/* Maximum segment size to be sent */
 
 enum {
 	/* MIB stats */
 	MaxConn,
+	Mss,
 	ActiveOpens,
 	PassiveOpens,
 	EstabResets,
@@ -297,6 +313,7 @@
 	InSegs,
 	OutSegs,
 	RetransSegs,
+	RetransSegsSent,
 	RetransTimeouts,
 	InErrs,
 	OutRsts,
@@ -305,14 +322,27 @@
 	CsumErrs,
 	HlenErrs,
 	LenErrs,
+	Resequenced,
 	OutOfOrder,
+	ReseqBytelim,
+	ReseqPktlim,
+	Delayack,
+	Wopenack,
 
+	Recovery,
+	RecoveryDone,
+	RecoveryRTO,
+	RecoveryNoSeq,
+	RecoveryCwind,
+	RecoveryPA,
+
 	Nstats
 };
 
-static char *statnames[] =
+static char *statnames[Nstats] =
 {
 [MaxConn]	"MaxConn",
+[Mss]		"MaxSegment",
 [ActiveOpens]	"ActiveOpens",
 [PassiveOpens]	"PassiveOpens",
 [EstabResets]	"EstabResets",
@@ -320,6 +350,7 @@
 [InSegs]	"InSegs",
 [OutSegs]	"OutSegs",
 [RetransSegs]	"RetransSegs",
+[RetransSegsSent]	"RetransSegsSent",
 [RetransTimeouts]	"RetransTimeouts",
 [InErrs]	"InErrs",
 [OutRsts]	"OutRsts",
@@ -327,6 +358,19 @@
 [HlenErrs]	"HlenErrs",
 [LenErrs]	"LenErrs",
 [OutOfOrder]	"OutOfOrder",
+[Resequenced]	"Resequenced",
+[ReseqBytelim]	"ReseqBytelim",
+[ReseqPktlim]	"ReseqPktlim",
+[Delayack]	"Delayack",
+[Wopenack]	"Wopenack",
+
+[Recovery]	"Recovery",
+[RecoveryDone]	"RecoveryDone",
+[RecoveryRTO]	"RecoveryRTO",
+
+[RecoveryNoSeq]	"RecoveryNoSeq",
+[RecoveryCwind]	"RecoveryCwind",
+[RecoveryPA]	"RecoveryPA",
 };
 
 typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
 	QLock	apl;
 	int	ackprocstarted;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 };
 
 /*
@@ -356,34 +400,34 @@
  *  of DoS attack.
  *
  *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
- *  it that number gets acked by the other end, we shut down the connection.
- *  Look for tcpporthogedefense in the code.
+ *  that number gets acked by the other end, we shut down the connection.
+ *  Look for tcpporthogdefense in the code.
  */
 int tcpporthogdefense = 0;
 
-int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void	localclose(Conv*, char*);
-void	procsyn(Conv*, Tcp*);
-void	tcpiput(Proto*, Ipifc*, Block*);
-void	tcpoutput(Conv*);
-int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void	tcpstart(Conv*, int);
-void	tcptimeout(void*);
-void	tcpsndsyn(Conv*, Tcpctl*);
-void	tcprcvwin(Conv*);
-void	tcpacktimer(void*);
-void	tcpkeepalive(void*);
-void	tcpsetkacounter(Tcpctl*);
-void	tcprxmit(Conv*);
-void	tcpsettimer(Tcpctl*);
-void	tcpsynackrtt(Conv*);
-void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	int	addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static	int	dumpreseq(Tcpctl*);
+static	void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static	void	limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static	void	limborexmit(Proto*);
+static	void	localclose(Conv*, char*);
+static	void	procsyn(Conv*, Tcp*);
+static	void	tcpacktimer(void*);
+static	void	tcpiput(Proto*, Ipifc*, Block*);
+static	void	tcpkeepalive(void*);
+static	void	tcpoutput(Conv*);
+static	void	tcprcvwin(Conv*);
+static	void	tcprxmit(Conv*);
+static	void	tcpsetkacounter(Tcpctl*);
+static	void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	void	tcpsettimer(Tcpctl*);
+static	void	tcpsndsyn(Conv*, Tcpctl*);
+static	void	tcpstart(Conv*, int);
+static	void	tcpsynackrtt(Conv*);
+static	void	tcptimeout(void*);
+static	int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
 
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
 tcpsetstate(Conv *s, uchar newstate)
 {
 	Tcpctl *tcb;
@@ -403,11 +447,6 @@
 	if(newstate == Established)
 		tpriv->stats[CurrEstab]++;
 
-	/**
-	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
-		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
-	**/
-
 	switch(newstate) {
 	case Closed:
 		qclose(s->rq);
@@ -430,7 +469,12 @@
 tcpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -447,12 +491,14 @@
 	s = (Tcpctl*)(c->ptcl);
 
 	return snprint(state, n,
-		"%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+		"%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
 		tcpstates[s->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
-		s->srtt, s->mdev,
-		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+		s->nreseq, s->reseqlen,
+		s->srtt, s->mdev, s->ssthresh,
+		s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+		s->qscale,
 		s->timer.start, s->timer.count, s->rerecv,
 		s->katimer.start, s->katimer.count);
 }
@@ -470,7 +516,12 @@
 tcpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdannounce(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -524,7 +575,7 @@
 	}
 }
 
-void
+static void
 tcpkick(void *x)
 {
 	Conv *s = x;
@@ -546,7 +597,6 @@
 		/*
 		 * Push data
 		 */
-		tcprcvwin(s);
 		tcpoutput(s);
 		break;
 	default:
@@ -558,7 +608,9 @@
 	poperror();
 }
 
-void
+static int seq_lt(ulong, ulong);
+
+static void
 tcprcvwin(Conv *s)				/* Call with tcb locked */
 {
 	int w;
@@ -568,12 +620,20 @@
 	w = tcb->window - qlen(s->rq);
 	if(w < 0)
 		w = 0;
-	tcb->rcv.wnd = w;
-	if(w == 0)
+	/* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+	if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+		w = tcb->rcv.wptr - tcb->rcv.nxt;
+	if(w != tcb->rcv.wnd)
+	if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
 		tcb->rcv.blocked = 1;
+		netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+			tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+	}
+	tcb->rcv.wnd = w;
+	tcb->rcv.wptr = tcb->rcv.nxt + w;
 }
 
-void
+static void
 tcpacktimer(void *v)
 {
 	Tcpctl *tcb;
@@ -589,7 +649,6 @@
 	qlock(s);
 	if(tcb->state != Closed){
 		tcb->flags |= FORCE;
-		tcprcvwin(s);
 		tcpoutput(s);
 	}
 	qunlock(s);
@@ -597,10 +656,52 @@
 }
 
 static void
+tcpcongestion(Tcpctl *tcb)
+{
+	ulong inflight;
+
+	inflight = tcb->snd.nxt - tcb->snd.una;
+	if(inflight > tcb->cwind)
+		inflight = tcb->cwind;
+	tcb->ssthresh = inflight / 2;
+	if(tcb->ssthresh < 2*tcb->mss)
+		tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+	L		= 2,		/* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+	uint limit;
+
+	tcb->abcbytes += acked;
+	if(tcb->cwind < tcb->ssthresh){
+		/* slow start */
+		if(tcb->snd.rto)
+			limit = 1*tcb->mss;
+		else
+			limit = L*tcb->mss;
+		tcb->cwind += MIN(tcb->abcbytes, limit);
+		tcb->abcbytes = 0;
+	}
+	else{
+		tcb->snd.rto = 0;
+		/* avoidance */
+		if(tcb->abcbytes >= tcb->cwind){
+			tcb->abcbytes -= tcb->cwind;
+			tcb->cwind += tcb->mss;
+		}
+	}
+}
+
+static void
 tcpcreate(Conv *c)
 {
 	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
-	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+	c->wq = qopen(QMAX, Qkick, tcpkick, c);
 }
 
 static void
@@ -608,7 +709,7 @@
 {
 	if(newstate != TcptimerON){
 		if(t->state == TcptimerON){
-			// unchain
+			/* unchain */
 			if(priv->timers == t){
 				priv->timers = t->next;
 				if(t->prev != nil)
@@ -622,7 +723,7 @@
 		}
 	} else {
 		if(t->state != TcptimerON){
-			// chain
+			/* chain */
 			if(t->prev != nil || t->next != nil)
 				panic("timerstate2");
 			t->prev = nil;
@@ -635,7 +736,7 @@
 	t->state = newstate;
 }
 
-void
+static void
 tcpackproc(void *a)
 {
 	Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
 	tcp = a;
 	priv = tcp->priv;
 
+	while(waserror())
+		;
+
 	for(;;) {
 		tsleep(&up->sleep, return0, 0, MSPTICK);
 
@@ -681,7 +785,7 @@
 	}
 }
 
-void
+static void
 tcpgo(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
 	qunlock(&priv->tl);
 }
 
-void
+static void
 tcphalt(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil)
@@ -704,17 +808,16 @@
 	qunlock(&priv->tl);
 }
 
-int
+static int
 backoff(int n)
 {
 	return 1 << n;
 }
 
-void
+static void
 localclose(Conv *s, char *reason)	/* called with tcb locked */
 {
 	Tcpctl *tcb;
-	Reseq *rp,*rp1;
 	Tcppriv *tpriv;
 
 	tpriv = s->p->priv;
@@ -728,12 +831,7 @@
 	tcphalt(tpriv, &tcb->katimer);
 
 	/* Flush reassembly queue; nothing more can arrive */
-	for(rp = tcb->reseq; rp != nil; rp = rp1) {
-		rp1 = rp->next;
-		freeblist(rp->bp);
-		free(rp);
-	}
-	tcb->reseq = nil;
+	dumpreseq(tcb);
 
 	if(tcb->state == Syn_sent)
 		Fsconnected(s, reason);
@@ -747,45 +845,46 @@
 }
 
 /* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
 {
 	Ipifc *ifc;
 	int mtu;
 
-	ifc = findipifc(tcp->f, addr, 0);
-	switch(version){
-	default:
-	case V4:
-		mtu = DEF_MSS;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
-		break;
-	case V6:
-		mtu = DEF_MSS6;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
-		break;
-	}
-	if(ifc != nil){
-		if(ifc->mbps > 100)
-			*scale = HaveWS | 3;
-		else if(ifc->mbps > 10)
-			*scale = HaveWS | 1;
-		else
-			*scale = HaveWS | 0;
-	} else
-		*scale = HaveWS | 0;
+	/*
+	 * set the ws.  it doesn't commit us to anything.
+	 * ws is the ultimate limit to the bandwidth-delay product.
+	 */
+	*scale = Defadvscale;
 
-	return mtu;
+	/*
+	 * currently we do not implement path MTU discovery
+	 * so use interface MTU *only* if directly reachable
+	 * or when we use V4 which allows routers to fragment.
+	 * otherwise, we use the default MSS which assumes a
+	 * safe minimum MTU of 1280 bytes for V6.
+	 */  
+	if(r != nil && (ifc = r->ifc) != nil){
+		mtu = ifc->maxtu - ifc->m->hsize;
+		if(version == V4)
+			return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+		mtu -= TCP6_PKT + TCP6_HDRSIZE;
+		if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+			return mtu;
+	}
+	if(version == V6)
+		return DEF_MSS6;
+	else
+		return DEF_MSS;
 }
 
-void
+static void
 inittcpctl(Conv *s, int mode)
 {
 	Tcpctl *tcb;
 	Tcp4hdr* h4;
 	Tcp6hdr* h6;
+	Tcppriv *tpriv;
 	int mss;
 
 	tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
 
 	memset(tcb, 0, sizeof(Tcpctl));
 
-	tcb->ssthresh = 65535;
+	tcb->ssthresh = QMAX;			/* reset by tcpsetscale() */
 	tcb->srtt = tcp_irtt<<LOGAGAIN;
 	tcb->mdev = 0;
 
@@ -841,19 +940,18 @@
 	}
 
 	tcb->mss = tcb->cwind = mss;
+	tcb->abcbytes = 0;
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* default is no window scaling */
-	tcb->window = QMAX;
-	tcb->rcv.wnd = QMAX;
-	tcb->rcv.scale = 0;
-	tcb->snd.scale = 0;
-	qsetlimit(s->rq, QMAX);
+	tcpsetscale(s, tcb, 0, 0);
 }
 
 /*
  *  called with s qlocked
  */
-void
+static void
 tcpstart(Conv *s, int mode)
 {
 	Tcpctl *tcb;
@@ -865,8 +963,8 @@
 	if(tpriv->ackprocstarted == 0){
 		qlock(&tpriv->apl);
 		if(tpriv->ackprocstarted == 0){
-			sprint(kpname, "#I%dtcpack", s->p->f->dev);
-			kproc(kpname, tcpackproc, s->p, 0);
+			snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+			kproc(kpname, tcpackproc, s->p);
 			tpriv->ackprocstarted = 1;
 		}
 		qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
 }
 
 static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
 {
-	static char buf[128];
+	char *p;
 
-	sprint(buf, "%d", flag>>10);	/* Head len */
+	p = seprint(buf, e, "%d", flag>>10);	/* Head len */
 	if(flag & URG)
-		strcat(buf, " URG");
+		p = seprint(p, e, " URG");
 	if(flag & ACK)
-		strcat(buf, " ACK");
+		p = seprint(p, e, " ACK");
 	if(flag & PSH)
-		strcat(buf, " PSH");
+		p = seprint(p, e, " PSH");
 	if(flag & RST)
-		strcat(buf, " RST");
+		p = seprint(p, e, " RST");
 	if(flag & SYN)
-		strcat(buf, " SYN");
+		p = seprint(p, e, " SYN");
 	if(flag & FIN)
-		strcat(buf, " FIN");
-
+		p = seprint(p, e, " FIN");
+	USED(p);
 	return buf;
 }
 
-Block *
+static Block*
 htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -940,14 +1038,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP6_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP6_PKT;
 	}
 
@@ -1000,7 +1094,7 @@
 	return data;
 }
 
-Block *
+static Block*
 htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -1013,7 +1107,7 @@
 	if(tcph->flags & SYN){
 		if(tcph->mss)
 			hdrlen += MSS_LENGTH;
-		if(tcph->ws)
+		if(1)
 			hdrlen += WS_LENGTH;
 		optpad = hdrlen & 3;
 		if(optpad)
@@ -1024,14 +1118,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP4_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP4_PKT;
 	}
 
@@ -1055,7 +1145,8 @@
 			hnputs(opt, tcph->mss);
 			opt += 2;
 		}
-		if(tcph->ws != 0){
+		/* always offer.  rfc1323 §2.2 */
+		if(1){
 			*opt++ = WSOPT;
 			*opt++ = WS_LENGTH;
 			*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
 	return data;
 }
 
-int
+static int
 ntohtcp6(Tcp *tcph, Block **bpp)
 {
 	Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->ploadlen) - hdrlen;
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1136,7 +1228,7 @@
 	return hdrlen;
 }
 
-int
+static int
 ntohtcp4(Tcp *tcph, Block **bpp)
 {
 	Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1200,16 +1293,19 @@
 }
 
 /*
- *  For outgiing calls, generate an initial sequence
+ *  For outgoing calls, generate an initial sequence
  *  number and put a SYN on the send queue
  */
-void
+static void
 tcpsndsyn(Conv *s, Tcpctl *tcb)
 {
+	Tcppriv *tpriv;
+
 	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
 	tcb->rttseq = tcb->iss;
 	tcb->snd.wl2 = tcb->iss;
 	tcb->snd.una = tcb->iss;
+	tcb->snd.rxt = tcb->iss;
 	tcb->snd.ptr = tcb->rttseq;
 	tcb->snd.nxt = tcb->rttseq;
 	tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
 	tcb->sndsyntime = NOW;
 
 	/* set desired mss and scale */
-	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+	tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 }
 
 void
@@ -1229,7 +1327,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 
-	netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+	netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
 
 	tpriv = tcp->priv;
 
@@ -1307,7 +1405,7 @@
  *  send a reset to the remote side and close the conversation
  *  called with s qlocked
  */
-char*
+static char*
 tcphangup(Conv *s)
 {
 	Tcp seg;
@@ -1322,7 +1420,7 @@
 			memset(&seg, 0, sizeof seg);
 			seg.flags = RST | ACK;
 			seg.ack = tcb->rcv.nxt;
-			tcb->rcv.una = 0;
+			tcb->rcv.ackptr = seg.ack;
 			seg.seq = tcb->snd.ptr;
 			seg.wnd = 0;
 			seg.urg = 0;
@@ -1353,7 +1451,7 @@
 /*
  *  (re)send a SYN ACK
  */
-int
+static int
 sndsynack(Proto *tcp, Limbo *lp)
 {
 	Block *hbp;
@@ -1360,7 +1458,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 	Tcp seg;
-	int scale;
+	uint scale;
 
 	/* make pseudo header */
 	switch(lp->version) {
@@ -1388,11 +1486,12 @@
 		panic("sndrst: version %d", lp->version);
 	}
 
+	memset(&seg, 0, sizeof seg);
 	seg.seq = lp->iss;
 	seg.ack = lp->irs+1;
 	seg.flags = SYN|ACK;
 	seg.urg = 0;
-	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+	seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
 	seg.wnd = QMAX;
 
 	/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
 	}
 }
 
+static void
+initialwindow(Tcpctl *tcb)
+{
+	/* RFC 3390 initial window */
+	if(tcb->mss < 1095)
+		tcb->cwind = 4*tcb->mss;
+	else if(tcb->mss < 2190)
+		tcb->cwind = 4380;
+	else
+		tcb->cwind = 2*tcb->mss;
+}
+
 /*
  *  come here when we finally get an ACK to our SYN-ACK.
  *  lookup call in limbo.  if found, create a new conversation
@@ -1596,7 +1707,7 @@
 	/* find a call in limbo */
 	h = hashipa(src, segp->source);
 	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
-		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+		netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
 			src, segp->source, lp->raddr, lp->rport,
 			dst, segp->dest, lp->laddr, lp->lport,
 			version, lp->version
@@ -1611,7 +1722,7 @@
 
 		/* we're assuming no data with the initial SYN */
 		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
-			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
 				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
 			lp = nil;
 		} else {
@@ -1641,6 +1752,8 @@
 
 	tcb->irs = lp->irs;
 	tcb->rcv.nxt = tcb->irs+1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 
 	tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
 	tcb->snd.una = tcb->iss+1;
 	tcb->snd.ptr = tcb->iss+1;
 	tcb->snd.nxt = tcb->iss+1;
+	tcb->snd.rxt = tcb->iss+1;
 	tcb->flgcnt = 0;
 	tcb->flags |= SYNACK;
 
+	/* set desired mss and scale */
+	tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
 	/* our sending max segment size cannot be bigger than what he asked for */
 	if(lp->mss != 0 && lp->mss < tcb->mss)
 		tcb->mss = lp->mss;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* window scaling */
 	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
 
-	/* the congestion window always starts out as a single segment */
+	/* congestion window */
 	tcb->snd.wnd = segp->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 
 	/* set initial round trip time */
 	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
 	return new;
 }
 
-int
+static int
 seq_within(ulong x, ulong low, ulong high)
 {
 	if(low <= high){
@@ -1714,25 +1832,25 @@
 	return 0;
 }
 
-int
+static int
 seq_lt(ulong x, ulong y)
 {
 	return (int)(x-y) < 0;
 }
 
-int
+static int
 seq_le(ulong x, ulong y)
 {
 	return (int)(x-y) <= 0;
 }
 
-int
+static int
 seq_gt(ulong x, ulong y)
 {
 	return (int)(x-y) > 0;
 }
 
-int
+static int
 seq_ge(ulong x, ulong y)
 {
 	return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
  *  use the time between the first SYN and it's ack as the
  *  initial round trip time
  */
-void
+static void
 tcpsynackrtt(Conv *s)
 {
 	Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
 	tcphalt(tpriv, &tcb->rtt_timer);
 }
 
-void
+static void
 update(Conv *s, Tcp *seg)
 {
 	int rtt, delta;
 	Tcpctl *tcb;
 	ulong acked;
-	ulong expand;
 	Tcppriv *tpriv;
 
+	if(seg->update)
+		return;
+	seg->update = 1;
+
 	tpriv = s->p->priv;
 	tcb = (Tcpctl*)s->ptcl;
 
-	/* if everything has been acked, force output(?) */
-	if(seq_gt(seg->ack, tcb->snd.nxt)) {
-		tcb->flags |= FORCE;
-		return;
+	/* catch zero-window updates, update window & recover */
+	if(tcb->snd.wnd == 0 && seg->wnd > 0)
+	if(seq_lt(seg->ack,  tcb->snd.ptr)){
+		netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+			seg->ack,  tcb->snd.una, tcb->snd.ptr, seg->wnd);
+		tcb->snd.wnd = seg->wnd;
+		goto recovery;
 	}
 
-	/* added by Dong Lin for fast retransmission */
-	if(seg->ack == tcb->snd.una
-	&& tcb->snd.una != tcb->snd.nxt
-	&& seg->len == 0
-	&& seg->wnd == tcb->snd.wnd) {
-
-		/* this is a pure ack w/o window update */
-		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
-			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
-		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
-			/*
-			 *  tahoe tcp rxt the packet, half sshthresh,
- 			 *  and set cwnd to one packet
-			 */
+	/* newreno fast retransmit */
+	if(seg->ack == tcb->snd.una)
+	if(tcb->snd.una != tcb->snd.nxt)
+	if(++tcb->snd.dupacks == 3){
+recovery:
+		if(tcb->snd.recovery){
+			tpriv->stats[RecoveryCwind]++;
+			tcb->cwind += tcb->mss;
+		}else if(seq_le(tcb->snd.rxt, seg->ack)){
+			tpriv->stats[Recovery]++;
+			tcb->abcbytes = 0;
 			tcb->snd.recovery = 1;
+			tcb->snd.partialack = 0;
 			tcb->snd.rxt = tcb->snd.nxt;
-			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+			tcpcongestion(tcb);
+			tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+			netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+				tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
 			tcprxmit(s);
-		} else {
-			/* do reno tcp here. */
+		}else{
+			tpriv->stats[RecoveryNoSeq]++;
+			netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+				tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+			/* do not enter fast retransmit */
+			/* do not change ssthresh */
 		}
+	}else if(tcb->snd.recovery){
+		tpriv->stats[RecoveryCwind]++;
+		tcb->cwind += tcb->mss;
 	}
 
 	/*
@@ -1807,6 +1938,9 @@
 	 */
 	if(seq_gt(seg->ack, tcb->snd.wl2)
 	|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+		/* clear dupack if we advance wl2 */
+		if(tcb->snd.wl2 != seg->ack)
+			tcb->snd.dupacks = 0;
 		tcb->snd.wnd = seg->wnd;
 		tcb->snd.wl2 = seg->ack;
 	}
@@ -1816,22 +1950,11 @@
 		 *  don't let us hangup if sending into a closed window and
 		 *  we're still getting acks
 		 */
-		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
 			tcb->backedoff = MAXBACKMS/4;
-		}
 		return;
 	}
 
-	/*
-	 *  any positive ack turns off fast rxt,
-	 *  (should we do new-reno on partial acks?)
-	 */
-	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
-		tcb->snd.dupacks = 0;
-		tcb->snd.recovery = 0;
-	} else
-		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
 	/* Compute the new send window size */
 	acked = seg->ack - tcb->snd.una;
 
@@ -1843,24 +1966,41 @@
 		goto done;
 	}
 
-	/* slow start as long as we're not recovering from lost packets */
-	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
-		if(tcb->cwind < tcb->ssthresh) {
-			expand = tcb->mss;
-			if(acked < expand)
-				expand = acked;
+	/*
+	 *  congestion control
+	 */
+	if(tcb->snd.recovery){
+		if(seq_ge(seg->ack, tcb->snd.rxt)){
+			/* recovery finished; deflate window */
+			tpriv->stats[RecoveryDone]++;
+			tcb->snd.dupacks = 0;
+			tcb->snd.recovery = 0;
+			tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+			if(tcb->ssthresh < tcb->cwind)
+				tcb->cwind = tcb->ssthresh;
+			netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+				tcb->cwind, tcb->ssthresh);
+		} else {
+			/* partial ack; we lost more than one segment */
+			tpriv->stats[RecoveryPA]++;
+			if(tcb->cwind > acked)
+				tcb->cwind -= acked;
+			else{
+				netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+				tcb->cwind = tcb->mss;
+			}
+			netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+				acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+			if(acked >= tcb->mss)
+				tcb->cwind += tcb->mss;
+			tcb->snd.partialack++;
 		}
-		else
-			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+	} else
+		tcpabcincr(tcb, acked);
 
-		if(tcb->cwind + expand < tcb->cwind)
-			expand = tcb->snd.wnd - tcb->cwind;
-		if(tcb->cwind + expand > tcb->snd.wnd)
-			expand = tcb->snd.wnd - tcb->cwind;
-		tcb->cwind += expand;
-	}
-
 	/* Adjust the timers according to the round trip time */
+	/* todo: fix sloppy treatment of overflow cases here. */
 	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
 		tcphalt(tpriv, &tcb->rtt_timer);
 		if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
 done:
 	if(qdiscard(s->wq, acked) < acked)
 		tcb->flgcnt--;
-
 	tcb->snd.una = seg->ack;
+
+	/* newreno fast recovery */
+	if(tcb->snd.recovery)
+		tcprxmit(s);
+
 	if(seq_gt(seg->ack, tcb->snd.urg))
 		tcb->snd.urg = seg->ack;
 
-	if(tcb->snd.una != tcb->snd.nxt)
-		tcpgo(tpriv, &tcb->timer);
+	if(tcb->snd.una != tcb->snd.nxt){
+		/* “impatient” variant */
+		if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+			tcb->time = NOW;
+			tcb->timeuna = tcb->snd.una;
+			tcpgo(tpriv, &tcb->timer);
+		}
+	}
 	else
 		tcphalt(tpriv, &tcb->timer);
 
@@ -1904,12 +2054,13 @@
 	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
 		tcb->snd.ptr = tcb->snd.una;
 
-	tcb->flags &= ~RETRAN;
+	if(!tcb->snd.recovery)
+		tcb->flags &= ~RETRAN;
 	tcb->backoff = 0;
 	tcb->backedoff = 0;
 }
 
-void
+static void
 tcpiput(Proto *tcp, Ipifc*, Block *bp)
 {
 	Tcp seg;
@@ -1917,7 +2068,7 @@
 	Tcp6hdr *h6;
 	int hdrlen;
 	Tcpctl *tcb;
-	ushort length;
+	ushort length, csum;
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	Conv *s;
 	Fs *f;
@@ -1980,10 +2131,12 @@
 		h6->ttl = proto;
 		hnputl(h6->vcf, length);
 		if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
-			ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+		    (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
 			tpriv->stats[CsumErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp proto cksum\n");
+			netlog(f, Logtcp,
+			    "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+				h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
 			freeblist(bp);
 			return;
 		}
@@ -1995,7 +2148,7 @@
 		if(hdrlen < 0){
 			tpriv->stats[HlenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp hdr len\n");
+			netlog(f, Logtcp, "bad tcpv6 hdr len\n");
 			return;
 		}
 
@@ -2005,7 +2158,7 @@
 		if(bp == nil){
 			tpriv->stats[LenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "tcp len < 0 after trim\n");
+			netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
 			return;
 		}
 	}
@@ -2016,7 +2169,8 @@
 	/* Look for a matching conversation */
 	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
 	if(s == nil){
-		netlog(f, Logtcp, "iphtlook failed");
+		netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+			source, seg.source, dest, seg.dest);
 reset:
 		qunlock(tcp);
 		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
 	}
 
 	/* Cut the data to fit the receive window */
+	tcprcvwin(s);
 	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
-		netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+		if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+		netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n", 
+			seg.seq, seg.seq + length - 1, 
+			tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
 		update(s, &seg);
 		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
 			tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
 	if(seg.seq != tcb->rcv.nxt)
 	if(length != 0 || (seg.flags & (SYN|FIN))) {
 		update(s, &seg);
-		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+		if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
 			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
-		tcb->flags |= FORCE;
+		tcb->flags |= FORCE;		/* force duplicate ack; RFC 5681 §3.2 */
 		goto output;
 	}
 
+	if(tcb->nreseq > 0)
+		tcb->flags |= FORCE;		/* filled hole in sequence space; RFC 5681 §3.2 */
+
 	/*
 	 *  keep looping till we've processed this packet plus any
 	 *  adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
 				goto raise;
 			}
 		case Time_wait:
-			tcb->flags |= FORCE;
+			if(seg.flags & FIN)
+				tcb->flags |= FORCE;
 			if(tcb->timer.state != TcptimerON)
 				tcpgo(tpriv, &tcb->timer);
 		}
@@ -2272,34 +2434,12 @@
 				 * receive queue
 				 */
 				if(bp) {
-					bp = packblock(bp);
-					if(bp == nil)
-						panic("tcp packblock");
-					qpassnolim(s->rq, bp);
+					qpassnolim(s->rq, packblock(bp));
 					bp = nil;
-
-					/*
-					 *  Force an ack every 2 data messages.  This is
-					 *  a hack for rob to make his home system run
-					 *  faster.
-					 *
-					 *  this also keeps the standard TCP congestion
-					 *  control working since it needs an ack every
-					 *  2 max segs worth.  This is not quite that,
-					 *  but under a real stream is equivalent since
-					 *  every packet has a max seg in it.
-					 */
-					if(++(tcb->rcv.una) >= 2)
-						tcb->flags |= FORCE;
 				}
 				tcb->rcv.nxt += length;
 
 				/*
-				 *  update our rcv window
-				 */
-				tcprcvwin(s);
-
-				/*
 				 *  turn on the acktimer if there's something
 				 *  to ack
 				 */
@@ -2373,8 +2513,11 @@
 
 			getreseq(tcb, &seg, &bp, &length);
 
-			if(tcptrim(tcb, &seg, &bp, &length) == 0)
+			tcprcvwin(s);
+			if(tcptrim(tcb, &seg, &bp, &length) == 0){
+				tcb->flags |= FORCE;
 				break;
+			}
 		}
 	}
 output:
@@ -2394,15 +2537,15 @@
  *  the lock to ipoput the packet so some care has to be
  *  taken by callers.
  */
-void
+static void
 tcpoutput(Conv *s)
 {
 	Tcp seg;
-	int msgs;
+	uint msgs;
 	Tcpctl *tcb;
 	Block *hbp, *bp;
-	int sndcnt, n;
-	ulong ssize, dsize, usable, sent;
+	int sndcnt;
+	ulong ssize, dsize, sent;
 	Fs *f;
 	Tcppriv *tpriv;
 	uchar version;
@@ -2411,9 +2554,26 @@
 	tpriv = s->p->priv;
 	version = s->ipversion;
 
-	for(msgs = 0; msgs < 100; msgs++) {
-		tcb = (Tcpctl*)s->ptcl;
+	tcb = (Tcpctl*)s->ptcl;
 
+	/* force ack every 2*mss */
+	if((tcb->flags & FORCE) == 0)
+	if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+		tpriv->stats[Delayack]++;
+		tcb->flags |= FORCE;
+	}
+
+	/* force ack if window opening */
+	if(0)
+	if((tcb->flags & FORCE) == 0){
+		tcprcvwin(s);
+		if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+			tpriv->stats[Wopenack]++;
+			tcb->flags |= FORCE;
+		}
+	}
+
+	for(msgs = 0; msgs < 100; msgs++) {
 		switch(tcb->state) {
 		case Listen:
 		case Closed:
@@ -2421,7 +2581,12 @@
 			return;
 		}
 
+		/* Don't send anything else until our SYN has been acked */
+		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+			break;
+
 		/* force an ack when a window has opened up */
+		tcprcvwin(s);
 		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
 			tcb->rcv.blocked = 0;
 			tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
 
 		sndcnt = qlen(s->wq)+tcb->flgcnt;
 		sent = tcb->snd.ptr - tcb->snd.una;
-
-		/* Don't send anything else until our SYN has been acked */
-		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
-			break;
-
-		/* Compute usable segment based on offered window and limit
-		 * window probes to one
-		 */
+		ssize = sndcnt;
 		if(tcb->snd.wnd == 0){
-			if(sent != 0) {
-				if((tcb->flags&FORCE) == 0)
-					break;
-//				tcb->snd.ptr = tcb->snd.una;
+			/* zero window probe */
+			if(sent > 0)
+			if(!(tcb->flags & FORCE))
+				break;	/* already probing, rto re-probes */
+			if(ssize < sent)
+				ssize = 0;
+			else{
+				ssize -= sent;
+				if(ssize > 0)
+					ssize = 1;
 			}
-			usable = 1;
+		} else {
+			/* calculate usable segment size */
+			if(ssize > tcb->cwind)
+				ssize = tcb->cwind;
+			if(ssize > tcb->snd.wnd)
+				ssize = tcb->snd.wnd;
+
+			if(ssize < sent)
+				ssize = 0;
+			else {
+				ssize -= sent;
+				if(ssize > tcb->mss)
+					ssize = tcb->mss;
+			}
 		}
-		else {
-			usable = tcb->cwind;
-			if(tcb->snd.wnd < usable)
-				usable = tcb->snd.wnd;
-			usable -= sent;
-		}
-		ssize = sndcnt-sent;
-		if(ssize && usable < 2)
-			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
-				tcb->snd.wnd, tcb->cwind);
-		if(usable < ssize)
-			ssize = usable;
-		if(tcb->mss < ssize)
-			ssize = tcb->mss;
+
 		dsize = ssize;
 		seg.urg = 0;
 
-		if(ssize == 0)
-		if((tcb->flags&FORCE) == 0)
-			break;
+		if(!(tcb->flags & FORCE)){
+			if(ssize == 0)
+				break;
+			if(ssize < tcb->mss)
+			if(tcb->snd.nxt == tcb->snd.ptr)
+			if(sent > TCPREXMTTHRESH*tcb->mss)
+				break;
+		}
 
 		tcb->flags &= ~FORCE;
-		tcprcvwin(s);
 
 		/* By default we will generate an ack */
 		tcphalt(tpriv, &tcb->acktimer);
-		tcb->rcv.una = 0;
 		seg.source = s->lport;
 		seg.dest = s->rport;
 		seg.flags = ACK;
 		seg.mss = 0;
 		seg.ws = 0;
+		seg.update = 0;
 		switch(tcb->state){
 		case Syn_sent:
 			seg.flags = 0;
@@ -2516,20 +2684,9 @@
 			}
 		}
 
-		if(sent+dsize == sndcnt)
+		if(sent+dsize == sndcnt && dsize)
 			seg.flags |= PSH;
 
-		/* keep track of balance of resent data */
-		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
-			n = tcb->snd.nxt - tcb->snd.ptr;
-			if(ssize < n)
-				n = ssize;
-			tcb->resent += n;
-			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
-				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
-			tpriv->stats[RetransSegs]++;
-		}
-
 		tcb->snd.ptr += ssize;
 
 		/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
 		 * expect acknowledges
 		 */
 		if(ssize != 0){
-			if(tcb->timer.state != TcptimerON)
+			if(tcb->timer.state != TcptimerON){
+				tcb->time = NOW;
+				tcb->timeuna = tcb->snd.una;
 				tcpgo(tpriv, &tcb->timer);
+			}
 
 			/*  If round trip timer isn't running, start it.
 			 *  measure the longest packet only in case the
 			 *  transmission time dominates RTT
 			 */
+			if(tcb->snd.retransmit == 0)
 			if(tcb->rtt_timer.state != TcptimerON)
 			if(ssize == tcb->mss) {
 				tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
 		}
 
 		tpriv->stats[OutSegs]++;
+		if(tcb->snd.retransmit)
+			tpriv->stats[RetransSegsSent]++;
+		tcb->rcv.ackptr = seg.ack;
+		tcb->rcv.wsnt = tcb->rcv.wptr;
 
 		/* put off the next keep alive */
 		tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
 		default:
 			panic("tcpoutput2: version %d", version);
 		}
-		if((msgs%4) == 1){
+		if((msgs%4) == 3){
 			qunlock(s);
-			sched();
 			qlock(s);
 		}
 	}
@@ -2611,7 +2775,7 @@
 /*
  *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
  */
-void
+static void
 tcpsendka(Conv *s)
 {
 	Tcp seg;
@@ -2621,6 +2785,7 @@
 	tcb = (Tcpctl*)s->ptcl;
 
 	dbp = nil;
+	memset(&seg, 0, sizeof seg);
 	seg.urg = 0;
 	seg.source = s->lport;
 	seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
 	else
 		seg.seq = tcb->snd.una-1;
 	seg.ack = tcb->rcv.nxt;
-	tcb->rcv.una = 0;
+	tcb->rcv.ackptr = seg.ack;
+	tcprcvwin(s);
 	seg.wnd = tcb->rcv.wnd;
 	if(tcb->state == Finwait2){
 		seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
 /*
  *  set connection to time out after 12 minutes
  */
-void
+static void
 tcpsetkacounter(Tcpctl *tcb)
 {
 	tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
  *  if we've timed out, close the connection
  *  otherwise, send a keepalive and restart the timer
  */
-void
+static void
 tcpkeepalive(void *v)
 {
 	Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
 /*
  *  start keepalive timer
  */
-char*
+static char*
 tcpstartka(Conv *s, char **f, int n)
 {
 	Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
 /*
  *  turn checksums on/off
  */
-char*
+static char*
 tcpsetchecksum(Conv *s, char **f, int)
 {
 	Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
 	return nil;
 }
 
-void
+/*
+ *  retransmit (at most) one segment at snd.una.
+ *  preserve cwind & snd.ptr
+ */
+static void
 tcprxmit(Conv *s)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	ulong tcwind, tptr;
 
 	tcb = (Tcpctl*)s->ptcl;
-
 	tcb->flags |= RETRAN|FORCE;
-	tcb->snd.ptr = tcb->snd.una;
 
-	/*
-	 *  We should be halving the slow start threshhold (down to one
-	 *  mss) but leaving it at mss seems to work well enough
-	 */
- 	tcb->ssthresh = tcb->mss;
-
-	/*
-	 *  pull window down to a single packet
-	 */
+	tptr = tcb->snd.ptr;
+	tcwind = tcb->cwind;
+	tcb->snd.ptr = tcb->snd.una;
 	tcb->cwind = tcb->mss;
+	tcb->snd.retransmit = 1;
 	tcpoutput(s);
+	tcb->snd.retransmit = 0;
+	tcb->cwind = tcwind;
+	tcb->snd.ptr = tptr;
+
+	tpriv = s->p->priv;
+	tpriv->stats[RetransSegs]++;
 }
 
-void
+/*
+ *  todo: RFC 4138 F-RTO
+ */
+static void
 tcptimeout(void *arg)
 {
 	Conv *s;
@@ -2792,11 +2966,29 @@
 			localclose(s, Etimedout);
 			break;
 		}
-		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+		netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+			tcb->srtt, tcb->mdev, NOW-tcb->time,
+			tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+			tcpstates[s->state]);
 		tcpsettimer(tcb);
+		if(tcb->snd.rto == 0)
+			tcpcongestion(tcb);
 		tcprxmit(s);
+		tcb->snd.ptr = tcb->snd.una;
+		tcb->cwind = tcb->mss;
+		tcb->snd.rto = 1;
 		tpriv->stats[RetransTimeouts]++;
-		tcb->snd.dupacks = 0;
+
+		if(tcb->snd.recovery){
+			tcb->snd.dupacks = 0;			/* reno rto */
+			tcb->snd.recovery = 0;
+			tpriv->stats[RecoveryRTO]++;
+			tcb->snd.rxt = tcb->snd.nxt;
+			netlog(s->p->f, Logtcpwin,
+				"rto recovery rxt @%lud\n", tcb->snd.nxt);
+		}
+
+		tcb->abcbytes = 0;
 		break;
 	case Time_wait:
 		localclose(s, nil);
@@ -2808,7 +3000,7 @@
 	poperror();
 }
 
-int
+static int
 inwindow(Tcpctl *tcb, int seq)
 {
 	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
 /*
  *  set up state for a received SYN (or SYN ACK) packet
  */
-void
+static void
 procsyn(Conv *s, Tcp *seg)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
 
 	tcb = (Tcpctl*)s->ptcl;
 	tcb->flags |= FORCE;
 
 	tcb->rcv.nxt = seg->seq + 1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 	tcb->irs = seg->seq;
 
 	/* our sending max segment size cannot be bigger than what he asked for */
-	if(seg->mss != 0 && seg->mss < tcb->mss)
+	if(seg->mss != 0 && seg->mss < tcb->mss) {
 		tcb->mss = seg->mss;
+		tpriv = s->p->priv;
+		tpriv->stats[Mss] = tcb->mss;
+	}
 
-	/* the congestion window always starts out as a single segment */
+	/* if the server does not support ws option, disable window scaling */
+	if(seg->ws == 0){
+		tcb->scale = 0;
+		tcb->snd.scale = 0;
+	}
+
 	tcb->snd.wnd = seg->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 }
 
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
 {
-	Reseq *rp, *rp1;
-	int i, rqlen, qmax;
+	Reseq *r, *next;
 
+	for(r = tcb->reseq; r != nil; r = next){
+		next = r->next;
+		freeblist(r->bp);
+		free(r);
+	}
+	tcb->reseq = nil;
+	tcb->nreseq = 0;
+	tcb->reseqlen = 0;
+	return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+	char *s;
+
+	for(; r != nil; r = r->next){
+		s = nil;
+		if(r->next == nil && r->seg.seq != n)
+			s = "hole/end";
+		else if(r->next == nil)
+			s = "end";
+		else if(r->seg.seq != n)
+			s = "hole";
+		if(s != nil)
+			netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+				n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+		n = r->seg.seq + r->seg.len;
+	}
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+	Reseq *rp, **rr;
+	int qmax;
+
 	rp = malloc(sizeof(Reseq));
 	if(rp == nil){
-		freeblist(bp);	/* bp always consumed by add_reseq */
+		freeblist(bp);	/* bp always consumed by addreseq */
 		return 0;
 	}
 
@@ -2854,56 +3093,39 @@
 	rp->bp = bp;
 	rp->length = length;
 
-	/* Place on reassembly list sorting by starting seq number */
-	rp1 = tcb->reseq;
-	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
-		rp->next = rp1;
-		tcb->reseq = rp;
-		if(rp->next != nil)
-			tpriv->stats[OutOfOrder]++;
-		return 0;
-	}
+	tcb->reseqlen += length;
+	tcb->nreseq++;
 
-	rqlen = 0;
-	for(i = 0;; i++) {
-		rqlen += rp1->length;
-		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
-			rp->next = rp1->next;
-			rp1->next = rp;
+	/* Place on reassembly list sorting by starting seq number */
+	for(rr = &tcb->reseq;; rr = &(*rr)->next)
+		if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+			rp->next = *rr;
+			*rr = rp;
+			tpriv->stats[Resequenced]++;
 			if(rp->next != nil)
 				tpriv->stats[OutOfOrder]++;
 			break;
 		}
-		rp1 = rp1->next;
-	}
-	qmax = QMAX<<tcb->rcv.scale;
-	if(rqlen > qmax){
-		print("resequence queue > window: %d > %d\n", rqlen, qmax);
-		i = 0;
-	  	for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
-	  		print("%#lux %#lux %#ux\n", rp1->seg.seq,
-	  			rp1->seg.ack, rp1->seg.flags);
-			if(i++ > 10){
-				print("...\n");
-				break;
-			}
-		}
 
-		// delete entire reassembly queue; wait for retransmit.
-		// - should we be smarter and only delete the tail?
-		for(rp = tcb->reseq; rp != nil; rp = rp1){
-			rp1 = rp->next;
-			freeblist(rp->bp);
-			free(rp);
-		}
-		tcb->reseq = nil;
-
-	  	return -1;
+	qmax = tcb->window;
+	if(tcb->reseqlen > qmax){
+		netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqBytelim]++;
+		return dumpreseq(tcb);
 	}
+	qmax = tcb->window / tcb->mss;		/* ~190 for qscale==2, 390 for qscale=3 */
+	if(tcb->nreseq > qmax){
+		netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqPktlim]++;
+		return dumpreseq(tcb);
+	}
+
 	return 0;
 }
 
-void
+static void
 getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	Reseq *rp;
@@ -2918,10 +3140,13 @@
 	*bp = rp->bp;
 	*length = rp->length;
 
+	tcb->nreseq--;
+	tcb->reseqlen -= rp->length;
+
 	free(rp);
 }
 
-int
+static int
 tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	ushort len;
@@ -2992,7 +3217,7 @@
 	return 0;
 }
 
-void
+static void
 tcpadvise(Proto *tcp, Block *bp, char *msg)
 {
 	Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
 		v4tov6(source, h4->tcpsrc);
 		psource = nhgets(h4->tcpsport);
 		pdest = nhgets(h4->tcpdport);
-	}
-	else {
+	} else {
 		ipmove(dest, h6->tcpdst);
 		ipmove(source, h6->tcpsrc);
 		psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
 
 	/* Look for a connection */
 	qlock(tcp);
-	for(p = tcp->conv; *p; p++) {
-		s = *p;
+	for(p = tcp->conv; (s = *p) != nil; p++) {
 		tcb = (Tcpctl*)s->ptcl;
 		if(s->rport == pdest)
 		if(s->lport == psource)
@@ -3029,6 +3252,8 @@
 		if(tcb->state != Closed)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qlock(s);
 			qunlock(tcp);
 			switch(tcb->state){
@@ -3058,9 +3283,11 @@
 }
 
 /* called with c qlocked */
-char*
+static char*
 tcpctl(Conv* c, char** f, int n)
 {
+	if(n == 1 && strcmp(f[0], "close") == 0)
+		return tcpclose(c), nil;
 	if(n == 1 && strcmp(f[0], "hangup") == 0)
 		return tcphangup(c);
 	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
 	return "unknown control request";
 }
 
-int
+static int
 tcpstats(Proto *tcp, char *buf, int len)
 {
 	Tcppriv *priv;
@@ -3083,7 +3310,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -3096,7 +3323,7 @@
  *  of questionable validity so we try to use them only when we're
  *  up against the wall.
  */
-int
+static int
 tcpgc(Proto *tcp)
 {
 	Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
 	Tcpctl *tcb;
 
 
-	n = natgc(tcp->ipproto);
+	n = 0;
 	ep = &tcp->conv[tcp->nc];
 	for(pp = tcp->conv; pp < ep; pp++) {
 		c = *pp;
@@ -3116,13 +3343,13 @@
 		switch(tcb->state){
 		case Syn_received:
 			if(NOW - tcb->time > 5000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
 		case Finwait2:
 			if(NOW - tcb->time > 5*60*1000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
@@ -3132,7 +3359,7 @@
 	return n;
 }
 
-void
+static void
 tcpsettimer(Tcpctl *tcb)
 {
 	int x;
@@ -3141,9 +3368,9 @@
 	x = backoff(tcb->backoff) *
 		(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
 
-	/* bounded twixt 1/2 and 64 seconds */
-	if(x < 500/MSPTICK)
-		x = 500/MSPTICK;
+	/* bounded twixt 0.3 and 64 seconds */
+	if(x < 300/MSPTICK)
+		x = 300/MSPTICK;
 	else if(x > (64000/MSPTICK))
 		x = 64000/MSPTICK;
 	tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
 	Fsproto(fs, tcp);
 }
 
-void
+static void
 tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
 {
-	if(rcvscale){
-		tcb->rcv.scale = rcvscale & 0xff;
-		tcb->snd.scale = sndscale & 0xff;
-		tcb->window = QMAX<<tcb->snd.scale;
-		qsetlimit(s->rq, tcb->window);
-	} else {
-		tcb->rcv.scale = 0;
-		tcb->snd.scale = 0;
-		tcb->window = QMAX;
-		qsetlimit(s->rq, tcb->window);
-	}
+	/*
+	 * guess at reasonable queue sizes.  there's no current way 
+	 * to know how many nic receive buffers we can safely tie up in the
+	 * tcp stack, and we don't adjust our queues to maximize throughput
+	 * and minimize bufferbloat.  n.b. the offer (rcvscale) needs to be
+	 * respected, but we still control our own buffer commitment by
+	 * keeping a seperate qscale.
+	 */
+	tcb->rcv.scale = rcvscale & 0xff;
+	tcb->snd.scale = sndscale & 0xff;
+	tcb->qscale = rcvscale & 0xff;
+	if(rcvscale > Maxqscale)
+		tcb->qscale = Maxqscale;
+
+	if(rcvscale != tcb->rcv.scale)
+		netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+			tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+	tcb->window = QMAX<<tcb->qscale;
+	tcb->ssthresh = tcb->window;
+
+	/*
+	 * it's important to set wq large enough to cover the full
+	 * bandwidth-delay product.  it's possible to be in loss
+	 * recovery with a big window, and we need to keep sending
+	 * into the inflated window.  the difference can be huge
+	 * for even modest (70ms) ping times.
+	 */
+	qsetlimit(s->rq, QMAX<<tcb->qscale);
+	qsetlimit(s->wq, QMAX<<tcb->qscale);
+	tcprcvwin(s);
 }
--- a/os/ip/udp.c
+++ b/os/ip/udp.c
@@ -24,7 +24,6 @@
 
 	IP_UDPPROTO	= 17,
 	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
 
 	Udprxms		= 200,
 	Udptickms	= 100,
@@ -40,7 +39,7 @@
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	udpproto;	/* Protocol */
 	uchar	udpplen[2];	/* Header plus data length */
 	uchar	udpsrc[IPv4addrlen];	/* Ip source */
@@ -73,10 +72,10 @@
 typedef struct Udpstats Udpstats;
 struct Udpstats
 {
-	ulong	udpInDatagrams;
+	uvlong	udpInDatagrams;
 	ulong	udpNoPorts;
 	ulong	udpInErrors;
-	ulong	udpOutDatagrams;
+	uvlong	udpOutDatagrams;
 };
 
 typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
 typedef struct Udpcb Udpcb;
 struct Udpcb
 {
-	QLock;
 	uchar	headers;
 };
 
@@ -125,7 +123,7 @@
 static int
 udpstate(Conv *c, char *state, int n)
 {
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		c->inuse ? "Open" : "Closed",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
 static void
 udpcreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->rq = qopen(512*1024, Qmsg, 0, 0);
 	c->wq = qbypass(udpkick, c);
 }
 
@@ -175,8 +173,6 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	ucb->headers = 0;
-
-	qunlock(c);
 }
 
 void
@@ -192,12 +188,13 @@
 	Udppriv *upriv;
 	Fs *f;
 	int version;
-	Conv *rc;
+	Routehint *rh;
+	ushort csum;
 
 	upriv = c->p->priv;
 	f = c->p->f;
 
-	netlog(c->p->f, Logudp, "udp: kick\n");
+//	netlog(c->p->f, Logudp, "udp: kick\n");	/* frequent and uninteresting */
 	if(bp == nil)
 		return;
 
@@ -219,21 +216,6 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-		bp->rp += 2+2;			/* Ignore local port */
-		break;
 	default:
 		rport = 0;
 		break;
@@ -240,18 +222,12 @@
 	}
 
 	if(ucb->headers) {
-		if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
-		    ipcmp(laddr, IPnoaddr) == 0)
+		if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
 			version = V4;
 		else
 			version = V6;
 	} else {
-		if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-			memcmp(c->laddr, v4prefix, IPv4off) == 0)
-			|| ipcmp(c->raddr, IPnoaddr) == 0)
-			version = V4;
-		else
-			version = V6;
+		version = convipvers(c);
 	}
 
 	dlen = blocklen(bp);
@@ -260,9 +236,6 @@
 	switch(version){
 	case V4:
 		bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
 		uh4 = (Udp4hdr *)(bp->rp);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
 		uh4->Unused = 0;
@@ -274,7 +247,7 @@
 			v6tov4(uh4->udpdst, raddr);
 			hnputs(uh4->udpdport, rport);
 			v6tov4(uh4->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			v6tov4(uh4->udpdst, c->raddr);
 			hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			v6tov4(uh4->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh4->udpsport, c->lport);
 		hnputs(uh4->udplen, ptcllen);
 		uh4->udpcksum[0] = 0;
 		uh4->udpcksum[1] = 0;
-		hnputs(uh4->udpcksum, 
-		       ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+		csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh4->udpcksum, csum);
 		uh4->vihl = IP_VER4;
-		ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput4(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	case V6:
+		/*
+		 * using the v6 ip header to create pseudo header
+		 * first then reset it to the normal ip header
+		 */
 		bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
-		// using the v6 ip header to create pseudo header 
-		// first then reset it to the normal ip header
 		uh6 = (Udp6hdr *)(bp->rp);
 		memset(uh6, 0, 8);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
 			ipmove(uh6->udpdst, raddr);
 			hnputs(uh6->udpdport, rport);
 			ipmove(uh6->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			ipmove(uh6->udpdst, c->raddr);
 			hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			ipmove(uh6->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh6->udpsport, c->lport);
 		hnputs(uh6->udplen, ptcllen);
 		uh6->udpcksum[0] = 0;
 		uh6->udpcksum[1] = 0;
-		hnputs(uh6->udpcksum, 
-		       ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+		csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh6->udpcksum, csum);
 		memset(uh6, 0, 8);
 		uh6->viclfl[0] = IP_VER6;
 		hnputs(uh6->len, ptcllen);
 		uh6->nextheader = IP_UDPPROTO;
-		ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput6(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	default:
@@ -360,10 +336,8 @@
 	uh4 = (Udp4hdr*)(bp->rp);
 	version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
 
-	/*
-	 * Put back pseudo header for checksum 
-	 * (remember old values for icmpnoconv())
-	 */
+	/* Put back pseudo header for checksum
+	 * (remember old values for icmpnoconv()) */
 	switch(version) {
 	case V4:
 		ottl = uh4->Unused;
@@ -423,7 +397,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.udpNoPorts++;
 		qunlock(udp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
 			icmpnoconv(f, bp);
 			break;
 		case V6:
-			icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+			icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
 			break;
 		default:
 			panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
 	if(c->state == Announced){
 		if(ucb->headers == 0){
 			/* create a new conversation */
-			if(ipforme(f, laddr) != Runi) {
-				switch(version){
-				case V4:
-					v4tov6(laddr, ifc->lifc->local);
-					break;
-				case V6:
-					ipmove(laddr, ifc->lifc->local);
-					break;
-				default:
-					panic("udpiput3: version %d", version);
-				}
-			}
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
 			c = Fsnewcall(c, raddr, rport, laddr, lport, version);
 			if(c == nil){
 				qunlock(udp);
@@ -507,33 +471,21 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	}
 
-	if(bp->next)
-		bp = concatblock(bp);
-
 	if(qfull(c->rq)){
-		qunlock(c);
-		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
-		       laddr, lport);
+		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
-		return;
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-
-	qpass(c->rq, bp);
 	qunlock(c);
 
 }
@@ -545,11 +497,13 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	if(n == 1){
-		if(strcmp(f[0], "oldheaders") == 0){
-			ucb->headers = 6;
+		if(strcmp(f[0], "hangup") == 0){
+			qhangup(c->rq, nil);
+			qhangup(c->wq, nil);
 			return nil;
-		} else if(strcmp(f[0], "headers") == 0){
-			ucb->headers = 7;
+		}
+		if(strcmp(f[0], "headers") == 0){
+			ucb->headers = 7;	/* new headers format */
 			return nil;
 		}
 	}
@@ -564,34 +518,25 @@
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	ushort psource, pdest;
 	Conv *s, **p;
-	int version;
 
 	h4 = (Udp4hdr*)(bp->rp);
-	version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+	h6 = (Udp6hdr*)(bp->rp);
 
-	switch(version) {
-	case V4:
+	if((h4->vihl&0xF0)==IP_VER4) {
 		v4tov6(dest, h4->udpdst);
 		v4tov6(source, h4->udpsrc);
 		psource = nhgets(h4->udpsport);
 		pdest = nhgets(h4->udpdport);
-		break;
-	case V6:
-		h6 = (Udp6hdr*)(bp->rp);
+	} else {
 		ipmove(dest, h6->udpdst);
 		ipmove(source, h6->udpsrc);
 		psource = nhgets(h6->udpsport);
 		pdest = nhgets(h6->udpdport);
-		break;
-	default:
-		panic("udpadvise: version %d", version);
-		return;  /* to avoid a warning */
 	}
 
 	/* Look for a connection */
 	qlock(udp);
-	for(p = udp->conv; *p; p++) {
-		s = *p;
+	for(p = udp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
 	Udppriv *upriv;
 
 	upriv = udp->priv;
-	return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+	return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+		"InErrors: %lud\nOutDatagrams: %llud\n",
 		upriv->ustats.udpInDatagrams,
 		upriv->ustats.udpNoPorts,
 		upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
 		upriv->ustats.udpOutDatagrams);
 }
 
-int
-udpgc(Proto *udp)
-{
-	return natgc(udp->ipproto);
-}
-
 void
 udpinit(Fs *fs)
 {
@@ -647,7 +587,6 @@
 	udp->rcv = udpiput;
 	udp->advise = udpadvise;
 	udp->stats = udpstats;
-	udp->gc = udpgc;
 	udp->ipproto = IP_UDPPROTO;
 	udp->nc = Nchans;
 	udp->ptclsize = sizeof(Udpcb);
diff -u a/os/ip//arp.c b/os/ip//arp.c
--- a/os/ip//arp.c
+++ b/os/ip//arp.c
@@ -47,7 +47,8 @@
 
 #define haship(s) ((s)[IPaddrlen-1]%NHASH)
 
-extern int 	ReTransTimer = RETRANS_TIMER;
+int 	ReTransTimer = RETRANS_TIMER;
+
 static void 	rxmitproc(void *v);
 
 void
@@ -57,145 +58,121 @@
 	f->arp->f = f;
 	f->arp->rxmt = nil;
 	f->arp->dropf = f->arp->dropl = nil;
-	kproc("rxmitproc", rxmitproc, f->arp, 0);
+	kproc("rxmitproc", rxmitproc, f->arp);
 }
 
-/*
- *  create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
 {
-	uint t;
-	Block *next, *xp;
-	Arpent *a, *e, *f, **l;
-	Medium *m = ifc->m;
-	int empty;
+	Block *next;
 
-	/* find oldest entry */
-	e = &arp->cache[NCACHE];
-	a = arp->cache;
-	t = a->utime;
-	for(f = a; f < e; f++){
-		if(f->utime < t){
-			t = f->utime;
-			a = f;
-		}
+	while(bp != nil){
+		next = bp->list;
+		freeblist(bp);
+		bp = next;
 	}
+}
 
-	/* dump waiting packets */
-	xp = a->hold;
-	a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+	Arpent **l;
 
-	if(isv4(a->ip)){
-		while(xp){
-			next = xp->list;
-			freeblist(xp);
-			xp = next;
+	for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+		if(*l == a){
+			*l = a->nextrxt;
+			break;
 		}
 	}
-	else {	// queue icmp unreachable for rxmitproc later on, w/o arp lock
-		if(xp){
-			if(arp->dropl == nil) 
-				arp->dropf = xp;
-			else
-				arp->dropl->list = xp;
+	a->nextrxt = nil;
+	return l;
+}
 
-			for(next = xp->list; next; next = next->list)
-				xp = next;
-			arp->dropl = xp;
-			wakeup(&arp->rxmtq);
-		}
-	}
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+	Arpent **l;
+	Block *bp;
 
 	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
+	for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+		if(*l == a){
 			*l = a->hash;
 			break;
 		}
-		l = &f->hash;
 	}
+	a->hash = nil;
 
-	/* insert into new chain */
-	l = &arp->hash[haship(ip)];
-	a->hash = *l;
-	*l = a;
+	/* dump waiting packets */
+	bp = a->hold;
+	a->hold = nil;
+	if(isv4(a->ip))
+		freeblistchain(bp);
+	else {
+		rxmtunchain(arp, a);
 
-	memmove(a->ip, ip, sizeof(a->ip));
-	a->utime = NOW;
-	a->ctime = 0;
-	a->type = m;
+		/* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+		if(bp != nil){
+			if(arp->dropf == nil)
+				arp->dropf = bp;
+			else
+				arp->dropl->list = bp;
+			arp->dropl = a->last;
 
-	a->rtime = NOW + ReTransTimer;
-	a->rxtsrem = MAX_MULTICAST_SOLICIT;
-	a->ifc = ifc;
-	a->ifcid = ifc->ifcid;
-
-	/* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
-	if(!ipismulticast(a->ip) && addrxt){
-		l = &arp->rxmt;
-		empty = (*l==nil);
-
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
+			if(bp == arp->dropf)
+				wakeup(&arp->rxmtq);
 		}
-		for(f = *l; f; f = f->nextrxt){
-			l = &f->nextrxt;
-		}
-		*l = a;
-		if(empty) 
-			wakeup(&arp->rxmtq);
 	}
+	a->last = nil;
 
-	a->nextrxt = nil;
+	a->ifc = nil;
+	a->ifcid = 0;
 
-	return a;
-}
+	a->state = 0;
+	a->rxtsrem = 0;
 
-/* called with arp qlocked */
+	a->utime = 0;
+	a->ctime = 0;
 
-void
-cleanarpent(Arp *arp, Arpent *a)
+	memset(a->ip, 0, sizeof(a->ip));
+	memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ *  create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
 {
-	Arpent *f, **l;
+	Arpent *a, *e, *f, **l;
+	ulong t;
 
-	a->utime = 0;
-	a->ctime = 0;
-	a->type = 0;
-	a->state = 0;
-	
-	/* take out of current chain */
-	l = &arp->hash[haship(a->ip)];
-	for(f = *l; f; f = f->hash){
-		if(f == a){
-			*l = a->hash;
-			break;
+	/* find oldest entry */
+	e = &arp->cache[NCACHE];
+	a = arp->cache;
+	t = a->utime;
+	for(f = a; f < e; f++){
+		if(f->utime < t){
+			t = f->utime;
+			a = f;
 		}
-		l = &f->hash;
 	}
+	cleanarpent(arp, a);
 
-	/* take out of re-transmit chain */
-	l = &arp->rxmt;
-	for(f = *l; f; f = f->nextrxt){
-		if(f == a){
-			*l = a->nextrxt;
-			break;
-		}
-		l = &f->nextrxt;
-	}
-	a->nextrxt = nil;
-	a->hash = nil;
-	a->hold = nil;
-	a->last = nil;
-	a->ifc = nil;
+	ipmove(a->ip, ip);
+	a->ifc = ifc;
+	a->ifcid = ifc->ifcid;
+
+	/* insert into new chain */
+	l = &arp->hash[haship(ip)];
+	a->hash = *l;
+	*l = a;
+
+	return a;
 }
 
+
 /*
  *  fill in the media address if we have it.  Otherwise return an
  *  Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
 {
 	int hash;
 	Arpent *a;
-	Medium *type = ifc->m;
 	uchar v6ip[IPaddrlen];
 
 	if(version == V4){
@@ -217,30 +193,28 @@
 
 	qlock(arp);
 	hash = haship(ip);
-	for(a = arp->hash[hash]; a; a = a->hash){
-		if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
-		if(type == a->type)
+	for(a = arp->hash[hash]; a != nil; a = a->hash){
+		if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
 			break;
 	}
-
 	if(a == nil){
-		a = newarp6(arp, ip, ifc, (version != V4));
+		a = newarpent(arp, ip, ifc);
 		a->state = AWAIT;
 	}
 	a->utime = NOW;
 	if(a->state == AWAIT){
 		if(bp != nil){
-			if(a->hold)
-				a->last->list = bp;
-			else
+			bp->list = nil; 
+			if(a->hold == nil)
 				a->hold = bp;
+			else
+				a->last->list = bp;
 			a->last = bp;
-			bp->list = nil; 
 		}
 		return a;		/* return with arp qlocked */
 	}
 
-	memmove(mac, a->mac, a->type->maclen);
+	memmove(mac, a->mac, ifc->m->maclen);
 
 	/* remove old entries */
 	if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
 arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
 {
 	Block *bp;
-	Arpent *f, **l;
 
-	if(!isv4(a->ip)){
-		l = &arp->rxmt;
-		for(f = *l; f; f = f->nextrxt){
-			if(f == a){
-				*l = a->nextrxt;
-				break;
-			}
-			l = &f->nextrxt;
-		}
-	}
-
 	memmove(a->mac, mac, type->maclen);
-	a->type = type;
+	if(a->state == AWAIT && !isv4(a->ip)){
+		rxmtunchain(arp, a);
+		a->rxtsrem = 0;
+	}
 	a->state = AOK;
-	a->utime = NOW;
+	a->ctime = a->utime = NOW;
 	bp = a->hold;
-	a->hold = nil;
+	a->hold = a->last = nil;
 	qunlock(arp);
 
 	return bp;
 }
 
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
 {
-	Arp *arp;
-	Route *r;
-	Arpent *a, *f, **l;
-	Ipifc *ifc;
-	Medium *type;
-	Block *bp, *next;
 	uchar v6ip[IPaddrlen];
+	Block *bp, *next;
+	Arpent *a;
+	Route *r;
+	Arp *arp;
 
-	arp = fs->arp;
+	if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+		return -1;
 
-	if(n != 6){
-//		print("arp: len = %d\n", n);
-		return;
-	}
-
 	switch(version){
 	case V4:
-		r = v4lookup(fs, ip, nil);
+		r = v4lookup(fs, ip, ia, nil);
 		v4tov6(v6ip, ip);
 		ip = v6ip;
 		break;
 	case V6:
-		r = v6lookup(fs, ip, nil);
+		r = v6lookup(fs, ip, ia, nil);
 		break;
 	default:
 		panic("arpenter: version %d", version);
-		return;	/* to supress warnings */
+		return -1;	/* to supress warnings */
 	}
 
-	if(r == nil){
-//		print("arp: no route for entry\n");
-		return;
-	}
+	if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+		return -1;
 
-	ifc = r->ifc;
-	type = ifc->m;
-
+	arp = fs->arp;
 	qlock(arp);
-	for(a = arp->hash[haship(ip)]; a; a = a->hash){
-		if(a->type != type || (a->state != AWAIT && a->state != AOK))
+	for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+		if(a->ifc != ifc || a->ifcid != ifc->ifcid)
 			continue;
-
 		if(ipcmp(a->ip, ip) == 0){
-			a->state = AOK;
-			memmove(a->mac, mac, type->maclen);
-
-			if(version == V6){
-				/* take out of re-transmit chain */
-				l = &arp->rxmt;
-				for(f = *l; f; f = f->nextrxt){
-					if(f == a){
-						*l = a->nextrxt;
-						break;
-					}
-					l = &f->nextrxt;
-				}
-			}
-
-			a->ifc = ifc;
-			a->ifcid = ifc->ifcid;
-			bp = a->hold;
-			a->hold = nil;
 			if(version == V4)
 				ip += IPv4off;
-			a->utime = NOW;
-			a->ctime = a->utime;
-			qunlock(arp);
-
-			while(bp){
+			bp = arpresolve(arp, a, ifc->m, mac);	/* unlocks arp */
+			for(; bp != nil; bp = next){
 				next = bp->list;
-				if(ifc != nil){
-					if(waserror()){
-						runlock(ifc);
-						nexterror();
-					}
-					rlock(ifc);
-					if(ifc->m != nil)
-						ifc->m->bwrite(ifc, bp, version, ip);
-					else
-						freeb(bp);
-					runlock(ifc);
-					poperror();
-				} else
-					freeb(bp);
-				bp = next;
+				bp->list = nil;
+				if(waserror()){
+					freeblistchain(next);
+					break;
+				}
+				ipifcoput(ifc, bp, version, ip);
+				poperror();
 			}
-			return;
+			return 1;
 		}
 	}
 
 	if(refresh == 0){
-		a = newarp6(arp, ip, ifc, 0);
+		a = newarpent(arp, ip, ifc);
 		a->state = AOK;
-		a->type = type;
-		a->ctime = NOW;
-		memmove(a->mac, mac, type->maclen);
+		a->ctime = a->utime = NOW;
+		memmove(a->mac, mac, n);
 	}
-
 	qunlock(arp);
+
+	return refresh == 0;
 }
 
 int
@@ -401,13 +325,12 @@
 arpwrite(Fs *fs, char *s, int len)
 {
 	int n;
-	Route *r;
 	Arp *arp;
-	Block *bp;
-	Arpent *a, *fl, **l;
+	Arpent *a, *x;
 	Medium *m;
-	char *f[4], buf[256];
-	uchar ip[IPaddrlen], mac[MAClen];
+	Ipifc *ifc;
+	char *f[5], buf[256];
+	uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
 
 	arp = fs->arp;
 
@@ -420,7 +343,7 @@
 	if(len > 0 && buf[len-1] == '\n')
 		buf[len-1] = 0;
 
-	n = getfields(buf, f, 4, 1, " ");
+	n = getfields(buf, f, nelem(f), 1, " ");
 	if(strcmp(f[0], "flush") == 0){
 		qlock(arp);
 		for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
 			memset(a->ip, 0, sizeof(a->ip));
 			memset(a->mac, 0, sizeof(a->mac));
 			a->hash = nil;
+			a->nextrxt = nil;
+			a->ifc = nil;
+			a->ifcid = 0;
 			a->state = 0;
+			a->rxtsrem = 0;
+			a->ctime = 0;
 			a->utime = 0;
-			while(a->hold != nil){
-				bp = a->hold->list;
-				freeblist(a->hold);
-				a->hold = bp;
-			}
+			freeblistchain(a->hold);
+			a->hold = a->last = nil;
 		}
 		memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+		freeblistchain(arp->dropf);
+		arp->dropf = arp->dropl = nil;
 		arp->rxmt = nil;
-		arp->dropf = nil;
-		arp->dropl = nil;
 		qunlock(arp);
 	} else if(strcmp(f[0], "add") == 0){
 		switch(n){
@@ -446,64 +370,53 @@
 		default:
 			error(Ebadarg);
 		case 3:
-			parseip(ip, f[1]);
-			if(isv4(ip))
-				r = v4lookup(fs, ip+IPv4off, nil);
-			else
-				r = v6lookup(fs, ip, nil);
-			if(r == nil)
-				error("Destination unreachable");
-			m = r->ifc->m;
-			n = parsemac(mac, f[2], m->maclen);
+			if(parseip(ip, f[1]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
 		case 4:
 			m = ipfindmedium(f[1]);
-			if(m == nil)
+			if(m == nil || m->maclen == 0)
 				error(Ebadarp);
-			parseip(ip, f[2]);
-			n = parsemac(mac, f[3], m->maclen);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			findlocalip(fs, ia, ip);
 			break;
+		case 5:
+			m = ipfindmedium(f[1]);
+			if(m == nil || m->maclen == 0)
+				error(Ebadarp);
+			if(parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+				error(Ebadarp);
+			if(parseip(ia, f[4]) == -1)
+				error(Ebadip);
+			break;
 		}
-
-		if(m->ares == nil)
-			error(Ebadarp);
-
-		m->ares(fs, V6, ip, mac, n, 0);
+		if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+			error("no interface");
+		rlock(ifc);
+		if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+			runlock(ifc);
+			error("destination unreachable");
+		}
+		runlock(ifc);
 	} else if(strcmp(f[0], "del") == 0){
-		if(n != 2)
+		if (n != 2)
 			error(Ebadarg);
-
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			error(Ebadip);
 		qlock(arp);
-
-		l = &arp->hash[haship(ip)];
-		for(a = *l; a; a = a->hash){
-			if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
-				*l = a->hash;
-				break;
-			}
-			l = &a->hash;
+		for(a = arp->hash[haship(ip)]; a != nil; a = x){
+			x = a->hash;
+			if(ipcmp(ip, a->ip) == 0)
+				cleanarpent(arp, a);
 		}
-	
-		if(a){
-			/* take out of re-transmit chain */
-			l = &arp->rxmt;
-			for(fl = *l; fl; fl = fl->nextrxt){
-				if(fl == a){
-					*l = a->nextrxt;
-					break;
-				}
-				l = &fl->nextrxt;
-			}
-
-			a->nextrxt = nil;
-			a->hash = nil;
-			a->hold = nil;
-			a->last = nil;
-			a->ifc = nil;
-			memset(a->ip, 0, sizeof(a->ip));
-			memset(a->mac, 0, sizeof(a->mac));
-		}
 		qunlock(arp);
 	} else
 		error(Ebadarp);
@@ -511,13 +424,6 @@
 	return len;
 }
 
-enum
-{
-	Alinelen=	90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
 static void
 convmac(char *p, uchar *mac, int n)
 {
@@ -526,136 +432,136 @@
 }
 
 int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
 {
+	char mac[2*MAClen+1], *state, *mname, *p;
+	uchar ip[IPaddrlen], ia[IPaddrlen];
+	Ipifc *ifc;
 	Arpent *a;
-	int n;
-	char mac[2*MAClen+1];
+	long n, o;
 
-	if(offset % Alinelen)
-		return 0;
-
-	offset = offset/Alinelen;
-	len = len/Alinelen;
-
-	n = 0;
+	p = s;
+	o = -offset;
 	for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
-		if(a->state == 0)
+		if(a->state == 0 || (ifc = a->ifc) == nil)
 			continue;
-		if(offset > 0){
-			offset--;
+
+		rlock(ifc);
+		qlock(arp);
+		state = arpstate[a->state];
+		ipmove(ip, a->ip);
+		if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+			qunlock(arp);
+			runlock(ifc);
 			continue;
 		}
-		len--;
-		qlock(arp);
-		convmac(mac, a->mac, a->type->maclen);
-		n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+		mname = ifc->m->name;
+		convmac(mac, a->mac, ifc->m->maclen);
 		qunlock(arp);
+		runlock(ifc);
+
+		n = snprint(up->genbuf, sizeof up->genbuf,
+			"%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+			mname, state, ip, mac, ia);
+		o += n;
+		if(o <= 0)
+			continue;
+		if(n > len)
+			break;
+		memmove(p, up->genbuf, n);
+		len -= n;
+		p += n;
 	}
 
-	return n;
+	return p - s;
 }
 
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
 {
-	uint sflag;
-	Block *next, *xp;
-	Arpent *a, *b, **l;
-	Fs *f;
-	uchar ipsrc[IPaddrlen];
-	Ipifc *ifc = nil;
-	long nrxt;
+	uchar targ[IPaddrlen], src[IPaddrlen];
+	Arpent **l;
 
-	qlock(arp);
-	f = arp->f;
+	a->ctime = NOW;
+	if(a->rxtsrem == 0)
+		a->rxtsrem = MAX_MULTICAST_SOLICIT;
+	else
+		a->rxtsrem--;
 
-	a = arp->rxmt;
-	if(a==nil){
-		nrxt = 0;
-		goto dodrops; 		//return nrxt;
-	}
-	nrxt = a->rtime - NOW;
-	if(nrxt > 3*ReTransTimer/4) 
-		goto dodrops; 		//return nrxt;
+	/* put on end of re-transmit chain */
+	for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+		;
+	*l = a;
 
-	for(; a; a = a->nextrxt){
-		ifc = a->ifc;
-		assert(ifc != nil);
-		if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
-			xp = a->hold;
-			a->hold = nil;
+	if(l == &f->arp->rxmt)
+		wakeup(&f->arp->rxmtq);
 
-			if(xp){
-				if(arp->dropl == nil) 
-					arp->dropf = xp;
-				else
-					arp->dropl->list = xp;
-			}
+	/* try to use source address of original packet */
+	ipmove(targ, a->ip);
+	if(a->last != nil){
+		ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+		arprelease(f->arp, a);
 
-			cleanarpent(arp, a);
-		}
-		else
-			break;
+		if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+			goto send;
+	} else {
+		arprelease(f->arp, a);
 	}
-	if(a == nil)
-		goto dodrops;
+	if(!ipv6local(ifc, src, 0, targ))
+		return;
+send:
+	if(!waserror()){
+		icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+		poperror();
+	}
+}
 
+static void
+rxmitsols(Arp *arp)
+{
+	Block *next, *bp;
+	Arpent *a;
+	Ipifc *ifc;
+	Route *r;
 
-	qunlock(arp);	/* for icmpns */
-	if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
-		icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
-
-	runlock(ifc);
-	qlock(arp);	
-
-	/* put to the end of re-transmit chain */
-	l = &arp->rxmt;
-	for(b = *l; b; b = b->nextrxt){
-		if(b == a){
-			*l = a->nextrxt;
-			break;
+	qlock(arp);
+	while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+		if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+			if(a->ifcid == ifc->ifcid){
+				ndpsendsol(arp->f, ifc, a);	/* unlocks arp */
+				runlock(ifc);
+				qlock(arp);
+				continue;
+			}
+			runlock(ifc);
 		}
-		l = &b->nextrxt;
+		cleanarpent(arp, a);
 	}
-	for(b = *l; b; b = b->nextrxt){
-		l = &b->nextrxt;
-	}
-	*l = a;
-	a->rxtsrem--;
-	a->nextrxt = nil;
-	a->rtime = NOW + ReTransTimer;
-
-	a = arp->rxmt;
-	if(a==nil)
-		nrxt = 0;
-	else 
-		nrxt = a->rtime - NOW;
-
-dodrops:
-	xp = arp->dropf;
-	arp->dropf = nil;
-	arp->dropl = nil;
+	bp = arp->dropf;
+	arp->dropf = arp->dropl = nil;
 	qunlock(arp);
 
-	for(; xp; xp = next){
-		next = xp->list;
-		icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+	for(; bp != nil; bp = next){
+		next = bp->list;
+		bp->list = nil;
+		r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+		if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+			if(!waserror()){
+				icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+				poperror();
+			}
+			runlock(ifc);
+		}
+		freeblist(bp);
 	}
-
-	return nrxt;
-
 }
 
 static int
 rxready(void *v)
 {
-	Arp *arp = (Arp *) v;
-	int x;
+	Arp *arp = (Arp *)v;
 
-	x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
-	return x;
+	return arp->rxmt != nil || arp->dropf != nil;
 }
 
 static void
@@ -662,20 +568,15 @@
 rxmitproc(void *v)
 {
 	Arp *arp = v;
-	long wakeupat;
 
 	arp->rxmitp = up;
-	//print("arp rxmitproc started\n");
 	if(waserror()){
-		arp->rxmitp = 0;
+		arp->rxmitp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;){
-		wakeupat = rxmitsols(arp);
-		if(wakeupat == 0) 
-			sleep(&arp->rxmtq, rxready, v); 
-		else if(wakeupat > ReTransTimer/4) 
-			tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+		sleep(&arp->rxmtq, rxready, v);
+		rxmitsols(arp);
+		tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
 	}
 }
-
diff -u a/os/ip//devip.c b/os/ip//devip.c
--- a/os/ip//devip.c
+++ b/os/ip//devip.c
@@ -14,7 +14,6 @@
 	Qbootp,
 	Qndb,
 	Qiproute,
-	Qiprouter,
 	Qipselftab,
 	Qlog,
 
@@ -43,11 +42,11 @@
 	Maskproto=	(1<<Logproto)-1,
 	Shiftproto=	Logtype + Logconv,
 
-	Nfs=		32,
+	Nfs=		128,
 };
-#define TYPE(x) 	( ((u32)(x).path) & Masktype )
-#define CONV(x) 	( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) 	( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) 	( ((ulong)(x).path) & Masktype )
+#define CONV(x) 	( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((ulong)(x).path) >> Shiftproto) & Maskproto )
 #define QID(p, c, y) 	( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
 
 static char network[] = "network";
@@ -58,8 +57,7 @@
 
 extern	void nullmediumlink(void);
 extern	void pktmediumlink(void);
-static	long ndbwrite(Fs*, char*, ulong, int);
-extern void    closeconv(Conv*);
+	long ndbwrite(Fs *f, char *a, ulong off, int n);
 
 static int
 ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
 		mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
 		devdir(c, q, "stats", 0, network, 0444, dp);
 		return 1;
-	}	
+	}
 	return -1;
 }
 
@@ -144,11 +142,10 @@
 		return -1;
 	case Qarp:
 		p = "arp";
+		prot = 0664;
 		break;
 	case Qbootp:
 		p = "bootp";
-		if(bootp == nil)
-			return 0;
 		break;
 	case Qndb:
 		p = "ndb";
@@ -157,14 +154,12 @@
 		break;
 	case Qiproute:
 		p = "iproute";
+		prot = 0664;
 		break;
 	case Qipselftab:
 		p = "ipselftab";
 		prot = 0444;
 		break;
-	case Qiprouter:
-		p = "iprouter";
-		break;
 	case Qlog:
 		p = "log";
 		break;
@@ -188,7 +183,7 @@
 	case Qtopdir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
@@ -206,19 +201,18 @@
 	case Qndb:
 	case Qlog:
 	case Qiproute:
-	case Qiprouter:
 	case Qipselftab:
 		return ip1gen(c, TYPE(c->qid), dp);
 	case Qprotodir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%ud", c->dev);
+			snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
 		if(s < f->p[PROTO(c->qid)]->ac) {
 			cv = f->p[PROTO(c->qid)]->conv[s];
-			sprint(up->genbuf, "%d", s);
+			snprint(up->genbuf, sizeof up->genbuf, "%d", s);
 			mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
 			devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
 			return 1;
@@ -262,45 +256,14 @@
 	fmtinstall('M', eipfmt);
 }
 
-static Fs*
-ipgetfs(int dev)
-{
-	extern void (*ipprotoinit[])(Fs*);
-	Fs *f;
-	int i;
-
-	if(dev >= Nfs)
-		return nil;
-
-	qlock(&fslock);
-	if(ipfs[dev] == nil){
-		f = smalloc(sizeof(Fs));
-		ip_init(f);
-		arpinit(f);
-		netloginit(f);
-		for(i = 0; ipprotoinit[i]; i++)
-			ipprotoinit[i](f);
-		f->dev = dev;
-		ipfs[dev] = f;
-	}
-	qunlock(&fslock);
-
-	return ipfs[dev];
-}
-
 IPaux*
 newipaux(char *owner, char *tag)
 {
 	IPaux *a;
-	int n;
 
 	a = smalloc(sizeof(*a));
 	kstrdup(&a->owner, owner);
-	memset(a->tag, ' ', sizeof(a->tag));
-	n = strlen(tag);
-	if(n > sizeof(a->tag))
-		n = sizeof(a->tag);
-	memmove(a->tag, tag, n);
+	strncpy(a->tag, tag, sizeof(a->tag));
 	return a;
 }
 
@@ -310,13 +273,29 @@
 ipattach(char* spec)
 {
 	Chan *c;
-	int dev;
+	ulong dev;
 
-	dev = atoi(spec);
+	dev = strtoul(spec, nil, 10);
 	if(dev >= Nfs)
-		error("bad specification");
+		error(Enodev);
 
-	ipgetfs(dev);
+	qlock(&fslock);
+	if(ipfs[dev] == nil){
+		extern void (*ipprotoinit[])(Fs*);
+		Fs *f;
+		int i;
+
+		f = smalloc(sizeof(Fs));
+		ip_init(f);
+		arpinit(f);
+		netloginit(f);
+		for(i = 0; ipprotoinit[i]; i++)
+			ipprotoinit[i](f);
+		f->dev = dev;
+		ipfs[dev] = f;
+	}
+	qunlock(&fslock);
+
 	c = devattach('I', spec);
 	mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
 	c->dev = dev;
@@ -327,7 +306,7 @@
 }
 
 static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
 {
 	IPaux *a = c->aux;
 	Walkqid* w;
@@ -338,8 +317,9 @@
 	return w;
 }
 
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
 {
 	return devstat(c, db, n, nil, 0, ipgen);
 }
@@ -360,7 +340,7 @@
 };
 
 static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
 {
 	Conv *cv, *nc;
 	Proto *p;
@@ -375,7 +355,7 @@
 	default:
 		break;
 	case Qndb:
-		if(omode & (OWRITE|OTRUNC) && !iseve())
+		if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
 			error(Eperm);
 		if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
 			f->ndb[0] = 0;
@@ -383,10 +363,10 @@
 	case Qlog:
 		netlogopen(f);
 		break;
-	case Qiprouter:
-		iprouteropen(f);
-		break;
 	case Qiproute:
+	case Qarp:
+		if(omode != OREAD && !iseve())
+			error(Eperm);
 		break;
 	case Qtopdir:
 	case Qprotodir:
@@ -412,13 +392,8 @@
 	case Qclone:
 		p = f->p[PROTO(c->qid)];
 		qlock(p);
-		if(waserror()){
-			qunlock(p);
-			nexterror();
-		}
 		cv = Fsprotoclone(p, ATTACHER(c));
 		qunlock(p);
-		poperror();
 		if(cv == nil) {
 			error(Enodev);
 			break;
@@ -437,15 +412,12 @@
 			qunlock(p);
 			nexterror();
 		}
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
-		}
-		cv->inuse++;
-		if(cv->inuse == 1){
+		if(++cv->inuse == 1){
 			kstrdup(&cv->owner, ATTACHER(c));
 			cv->perm = 0660;
 		}
@@ -455,24 +427,26 @@
 		break;
 	case Qlisten:
 		cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
-		if((perm & (cv->perm>>6)) != perm) {
-			if(strcmp(ATTACHER(c), cv->owner) != 0)
-				error(Eperm);
-		 	if((perm & cv->perm) != perm)
-				error(Eperm); 
-
+		qlock(cv);
+		if(waserror()){
+			qunlock(cv);
+			nexterror();
 		}
+		if(strcmp(ATTACHER(c), cv->owner) == 0)
+			perm <<= 6;
+		if((perm & cv->perm) != perm && !iseve())
+			error(Eperm);
 
 		if(cv->state != Announced)
 			error("not announced");
 
+		cv->inuse++;
+		qunlock(cv);
+		poperror();
 		if(waserror()){
 			closeconv(cv);
 			nexterror();
 		}
-		qlock(cv);
-		cv->inuse++;
-		qunlock(cv);
 
 		nc = nil;
 		while(nc == nil) {
@@ -494,7 +468,6 @@
 			if(nc != nil){
 				cv->incall = nc->next;
 				mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
-				kstrdup(&cv->owner, ATTACHER(c));
 			}
 			qunlock(cv);
 
@@ -511,13 +484,25 @@
 	return c;
 }
 
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
 {
-	Dir *d;
+	error(Eperm);
+	return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+	error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+	Dir *dir;
 	Conv *cv;
 	Fs *f;
-	Proto *p;
 
 	f = ipfs[c->dev];
 	switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
 		break;
 	}
 
-	d = smalloc(sizeof(*d)+n);
+	dir = smalloc(sizeof(Dir)+n);
 	if(waserror()){
-		free(d);
+		free(dir);
 		nexterror();
 	}
-	n = convM2D(dp, n, d, (char*)&d[1]);
+	n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
 	if(n == 0)
 		error(Eshortstat);
-	p = f->p[PROTO(c->qid)];
-	cv = p->conv[CONV(c->qid)];
-	if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+	cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+	qlock(cv);
+	if(waserror()){
+		qunlock(cv);
+		nexterror();
+	}
+	if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
 		error(Eperm);
-	if(!emptystr(d->uid))
-		kstrdup(&cv->owner, d->uid);
-	if(d->mode != ~0UL)
-		cv->perm = d->mode & 0777;
+	if(!emptystr(dir->uid)){
+		if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+			error(Eperm);
+		kstrdup(&cv->owner, dir->uid);
+	}
+	if(dir->mode != ~0UL)
+		cv->perm = dir->mode & 0666;
+	qunlock(cv);
 	poperror();
-	free(d);
+
+	free(dir);
+	poperror();
+
 	return n;
 }
 
-extern void
+void
 closeconv(Conv *cv)
 {
 	Conv *nc;
@@ -564,7 +561,7 @@
 	}
 
 	/* close all incoming calls since no listen will ever happen */
-	for(nc = cv->incall; nc; nc = cv->incall){
+	for(nc = cv->incall; nc != nil; nc = cv->incall){
 		cv->incall = nc->next;
 		closeconv(nc);
 	}
@@ -576,9 +573,9 @@
 	while((mp = cv->multi) != nil)
 		ipifcremmulti(cv, mp->ma, mp->ia);
 
-	cv->r = nil;
-	cv->rgen = 0;
-	cv->p->close(cv);
+	if(cv->p->close != nil)
+		(*cv->p->close)(cv);
+
 	cv->state = Idle;
 	qunlock(cv);
 }
@@ -596,10 +593,6 @@
 		if(c->flag & COPEN)
 			netlogclose(f);
 		break;
-	case Qiprouter:
-		if(c->flag & COPEN)
-			iprouterclose(f);
-		break;
 	case Qdata:
 	case Qctl:
 	case Qerr:
@@ -620,13 +613,13 @@
 	Statelen=	32*1024,
 };
 
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
 	char *buf, *p;
-	s32 rv;
+	long rv;
 	Fs *f;
 	ulong offset = off;
 
@@ -648,21 +641,22 @@
 		return readstr(offset, a, n, f->ndb);
 	case Qiproute:
 		return routeread(f, a, offset, n);
-	case Qiprouter:
-		return iprouterread(f, a, n);
 	case Qipselftab:
 		return ipselftabread(f, a, offset, n);
 	case Qlog:
 		return netlogread(f, a, offset, n);
 	case Qctl:
-		sprint(up->genbuf, "%ud", CONV(ch->qid));
-		return readstr(offset, p, n, up->genbuf);
+		buf = smalloc(16);
+		snprint(buf, 16, "%lud", CONV(ch->qid));
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
 	case Qremote:
 		buf = smalloc(Statelen);
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->remote == nil) {
-			sprint(buf, "%I!%d\n", c->raddr, c->rport);
+			snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
 		} else {
 			(*x->remote)(c, buf, Statelen-2);
 		}
@@ -674,7 +668,7 @@
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
 		if(x->local == nil) {
-			sprint(buf, "%I!%d\n", c->laddr, c->lport);
+			snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
 		} else {
 			(*x->local)(c, buf, Statelen-2);
 		}
@@ -711,7 +705,7 @@
 }
 
 static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
 {
 	Conv *c;
 	Proto *x;
@@ -740,7 +734,7 @@
 /*
  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
  */
-static char*
+char*
 setluniqueport(Conv* c, int lport)
 {
 	Proto *p;
@@ -771,51 +765,63 @@
 }
 
 /*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+	int x;
+
+	for(x = 0; x < p->nc && p->conv[x]; x++)
+		if(p->conv[x]->lport == lport)
+			return 1;
+	return 0;
+}
+
+/*
  *  pick a local port and set it
  */
-extern void
+char *
 setlport(Conv* c)
 {
 	Proto *p;
-	ushort *pp;
-	int x, found;
+	int i, port;
 
 	p = c->p;
-	if(c->restricted)
-		pp = &p->nextrport;
-	else
-		pp = &p->nextport;
 	qlock(p);
-	for(;;(*pp)++){
+	if(c->restricted){
+		/* Restricted ports cycle between 600 and 1024. */
+		for(i=0; i<1024-600; i++){
+			if(p->nextrport >= 1024 || p->nextrport < 600)
+				p->nextrport = 600;
+			port = p->nextrport++;
+			if(!lportinuse(p, port))
+				goto chosen;
+		}
+	}else{
 		/*
-		 * Fsproto initialises p->nextport to 0 and the restricted
-		 * ports (p->nextrport) to 600.
-		 * Restricted ports must lie between 600 and 1024.
-		 * For the initial condition or if the unrestricted port number
-		 * has wrapped round, select a random port between 5000 and 1<<15
-		 * to start at.
+		 * Unrestricted ports are chosen randomly
+		 * between 2^15 and 2^16.  There are at most
+		 * 4*Nchan = 4096 ports in use at any given time,
+		 * so even in the worst case, a random probe has a
+		 * 1 - 4096/2^15 = 87% chance of success.
+		 * If 64 successive probes fail, there is a bug somewhere
+		 * (or a once in 10^58 event has happened, but that's
+		 * less likely than a venti collision).
 		 */
-		if(c->restricted){
-			if(*pp >= 1024)
-				*pp = 600;
+		for(i=0; i<64; i++){
+			port = (1<<15) + nrand(1<<15);
+			if(!lportinuse(p, port))
+				goto chosen;
 		}
-		else while(*pp < 5000)
-			*pp = nrand(1<<15);
-
-		found = 0;
-		for(x = 0; x < p->nc; x++){
-			if(p->conv[x] == nil)
-				break;
-			if(p->conv[x]->lport == *pp){
-				found = 1;
-				break;
-			}
-		}
-		if(found == 0)
-			break;
 	}
-	c->lport = (*pp)++;
 	qunlock(p);
+	return "no ports available";
+
+chosen:
+	c->lport = port;
+	qunlock(p);
+	return nil;
 }
 
 /*
@@ -822,7 +828,7 @@
  *  set a local address and port from a string of the form
  *	[address!]port[!r]
  */
-static char*
+char*
 setladdrport(Conv* c, char* str, int announcing)
 {
 	char *p;
@@ -830,8 +836,6 @@
 	ushort lport;
 	uchar addr[IPaddrlen];
 
-	rv = nil;
-
 	/*
 	 *  ignore restricted part if it exists.  it's
 	 *  meaningless on local ports.
@@ -854,8 +858,9 @@
 		if(strcmp(str, "*") == 0)
 			ipmove(c->laddr, IPnoaddr);
 		else {
-			parseip(addr, str);
-			if(ipforme(c->p->f, addr))
+			if(parseip(addr, str) == -1)
+				return Ebadip;
+			if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
 				ipmove(c->laddr, addr);
 			else
 				return "not a local IP address";
@@ -869,9 +874,13 @@
 		return setluniqueport(c, 0);
 	}
 
-	lport = atoi(p);
+	str = p;
+	lport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
 	if(lport <= 0)
-		setlport(c);
+		rv = setlport(c);
 	else
 		rv = setluniqueport(c, lport);
 	return rv;
@@ -886,13 +895,17 @@
 	if(p == nil)
 		return "malformed address";
 	*p++ = 0;
-	parseip(c->raddr, str);
-	c->rport = atoi(p);
-	p = strchr(p, '!');
-	if(p){
-		if(strstr(p, "!r") != nil)
-			c->restricted = 1;
-	}
+	if(parseip(c->raddr, str) == -1)
+		return Ebadip;
+
+	str = p;
+	c->rport = strtol(str, &p, 10);
+	if(p <= str || strchr("!", *p) == nil)
+		return "bad numeric port";
+
+	if(strstr(p, "!r") != nil)
+		c->restricted = 1;
+
 	return nil;
 }
 
@@ -912,7 +925,9 @@
 		if(p != nil)
 			return p;
 		setladdr(c);
-		setlport(c);
+		p = setlport(c);
+		if (p != nil)
+			return p;
 		break;
 	case 3:
 		p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
 			return p;
 	}
 
-	if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-		memcmp(c->laddr, v4prefix, IPv4off) == 0)
-		|| ipcmp(c->raddr, IPnoaddr) == 0)
-		c->ipversion = V4;
-	else
-		c->ipversion = V6;
+	c->ipversion = convipvers(c);
 
 	return nil;
 }
@@ -978,10 +988,11 @@
 	c->rport = 0;
 	switch(argc){
 	default:
-		return "bad args to announce";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 1);
 	}
+	return "bad args to announce";
 }
 
 /*
@@ -1028,10 +1039,11 @@
 {
 	switch(argc){
 	default:
-		return "bad args to bind";
+		break;
 	case 2:
 		return setladdrport(c, argv[1], 0);
 	}
+	return "bad args to bind";
 }
 
 static void
@@ -1042,7 +1054,7 @@
 	if(x->bind == nil)
 		p = Fsstdbind(c, cb->f, cb->nf);
 	else
-		p = x->bind(c, cb->f, cb->nf);
+		p = (*x->bind)(c, cb->f, cb->nf);
 	if(p != nil)
 		error(p);
 }
@@ -1065,8 +1077,8 @@
 		c->ttl = atoi(cb->f[1]);
 }
 
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
 {
 	Conv *c;
 	Proto *x;
@@ -1075,6 +1087,7 @@
 	uchar ia[IPaddrlen], ma[IPaddrlen];
 	Fs *f;
 	char *a;
+	ulong offset = off;
 
 	a = v;
 	f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
 		netlogctl(f, a, n);
 		return n;
 	case Qndb:
-		return ndbwrite(f, a, off, n);
+		return ndbwrite(f, a, offset, n);
+		break;
 	case Qctl:
 		x = f->p[PROTO(ch->qid)];
 		c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
 			if(cb->nf == 2){
 				if(!ipismulticast(c->raddr))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
+				if (parseip(ia, cb->f[1]) == -1)
+					error(Ebadip);
 				ipifcaddmulti(c, c->raddr, ia);
 			} else {
-				parseip(ma, cb->f[2]);
+				if (parseip(ia, cb->f[1]) == -1 ||
+				    parseip(ma, cb->f[2]) == -1)
+					error(Ebadip);
 				if(!ipismulticast(ma))
 					error("addmulti for a non multicast address");
-				parseip(ia, cb->f[1]);
 				ipifcaddmulti(c, ma, ia);
 			}
 		} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
 				error("remmulti needs interface address");
 			if(!ipismulticast(c->raddr))
 				error("remmulti for a non multicast address");
-			parseip(ia, cb->f[1]);
+			if (parseip(ia, cb->f[1]) == -1)
+				error(Ebadip);
 			ipifcremmulti(c, c->raddr, ia);
 		} else if(x->ctl != nil) {
-			p = x->ctl(c, cb->f, cb->nf);
+			p = (*x->ctl)(c, cb->f, cb->nf);
 			if(p != nil)
 				error(p);
 		} else
@@ -1160,13 +1177,12 @@
 	return n;
 }
 
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
 {
 	Conv *c;
 	Proto *x;
 	Fs *f;
-	int n;
 
 	switch(TYPE(ch->qid)){
 	case Qdata:
@@ -1177,11 +1193,7 @@
 		if(c->wq == nil)
 			error(Eperm);
 
-		if(bp->next)
-			bp = concatblock(bp);
-		n = BLEN(bp);
-		qbwrite(c->wq, bp);
-		return n;
+		return qbwrite(c->wq, bp);
 	default:
 		return devbwrite(ch, bp, offset);
 	}
@@ -1198,13 +1210,13 @@
 	ipwalk,
 	ipstat,
 	ipopen,
-	devcreate,
+	ipcreate,
 	ipclose,
 	ipread,
 	ipbread,
 	ipwrite,
 	ipbwrite,
-	devremove,
+	ipremove,
 	ipwstat,
 };
 
@@ -1224,12 +1236,15 @@
 
 	p->qid.type = QTDIR;
 	p->qid.path = QID(f->np, 0, Qprotodir);
+	if(p->nc > Maskconv+1){
+		print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+		p->nc = Maskconv+1;
+	}
 	p->conv = malloc(sizeof(Conv*)*(p->nc+1));
 	if(p->conv == nil)
 		panic("Fsproto");
 
 	p->x = f->np;
-	p->nextport = 0;
 	p->nextrport = 600;
 	f->p[f->np++] = p;
 
@@ -1262,21 +1277,33 @@
 		if(c == nil){
 			c = malloc(sizeof(Conv));
 			if(c == nil)
-				error(Enomem);
-			qlock(c);
+				return nil;
+			if(waserror()){
+				qfree(c->rq);
+				qfree(c->wq);
+				qfree(c->eq);
+				qfree(c->sq);
+				free(c->ptcl);
+				free(c);
+				return nil;
+			}
 			c->p = p;
 			c->x = pp - p->conv;
 			if(p->ptclsize != 0){
 				c->ptcl = malloc(p->ptclsize);
-				if(c->ptcl == nil) {
-					free(c);
+				if(c->ptcl == nil)
 					error(Enomem);
-				}
 			}
-			*pp = c;
-			p->ac++;
 			c->eq = qopen(1024, Qmsg, 0, 0);
+			if(c->eq == nil)
+				error(Enomem);
 			(*p->create)(c);
+			if(c->rq == nil || c->wq == nil)
+				error(Enomem);
+			poperror();
+			qlock(c);
+			*pp = c;
+			p->ac++;
 			break;
 		}
 		if(canqlock(c)){
@@ -1291,8 +1318,11 @@
 		}
 	}
 	if(pp >= ep) {
-		if(p->gc != nil && (*p->gc)(p))
-			goto retry;
+		if(p->gc != nil){
+			print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+			if((*p->gc)(p))
+				goto retry;
+		}
 		return nil;
 	}
 
@@ -1307,8 +1337,9 @@
 	c->lport = 0;
 	c->rport = 0;
 	c->restricted = 0;
+	c->ignoreadvice = 0;
 	c->ttl = MAXTTL;
-	c->tos = DFLTTOS;
+	c->tos = 0;
 	qreopen(c->rq);
 	qreopen(c->wq);
 	qreopen(c->eq);
@@ -1321,7 +1352,7 @@
 Fsconnected(Conv* c, char* msg)
 {
 	if(msg != nil && *msg != '\0')
-		kstrcpy(c->cerr, msg, sizeof(c->cerr));
+		strncpy(c->cerr, msg, ERRMAX-1);
 
 	switch(c->state){
 
@@ -1368,12 +1399,19 @@
 	for(l = &c->incall; *l; l = &(*l)->next)
 		i++;
 	if(i >= Maxincall) {
+		static int beenhere;
+
 		qunlock(c);
+		if (!beenhere) {
+			beenhere = 1;
+			print("Fsnewcall: incall queue full (%d) on port %d\n",
+				i, c->lport);
+		}
 		return nil;
 	}
 
 	/* find a free conversation */
-	nc = Fsprotoclone(c->p, network);
+	nc = Fsprotoclone(c->p, c->owner);
 	if(nc == nil) {
 		qunlock(c);
 		return nil;
@@ -1394,12 +1432,12 @@
 	return nc;
 }
 
-static long
+long
 ndbwrite(Fs *f, char *a, ulong off, int n)
 {
 	if(off > strlen(f->ndb))
 		error(Eio);
-	if(off+n >= sizeof(f->ndb)-1)
+	if(off+n >= sizeof(f->ndb))
 		error(Eio);
 	memmove(f->ndb+off, a, n);
 	f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
 ulong
 scalednconv(void)
 {
-	if(conf.npage*BY2PG >= 128*MB)
+	if(cpuserver && conf.npage*BY2PG >= 128*MB)
 		return Nchans*4;
 	return Nchans;
 }
diff -u a/os/ip//esp.c b/os/ip//esp.c
--- a/os/ip//esp.c
+++ b/os/ip//esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ *	currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ *	transport mode (host-to-host)
+ */
 #include	"u.h"
 #include	"../port/lib.h"
 #include	"mem.h"
@@ -6,47 +14,79 @@
 #include	"../port/error.h"
 
 #include	"ip.h"
+#include	"ipv6.h"
+#include	<libsec.h>
 
-#include	"libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by)  ((by) * BI2BY)
 
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
 typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
 typedef struct Esptail Esptail;
 typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
 
-#define DPRINT if(0)print
+enum {
+	Encrypt,
+	Decrypt,
 
-enum
-{
-	IP_ESPPROTO	= 50,
-	EsphdrSize	= 28,	// includes IP header
-	IphdrSize	= 20,	// options have been striped
-	EsptailSize	= 2,	// does not include pad or auth data
-	UserhdrSize	= 4,	// user visable header size - if enabled
+	IP_ESPPROTO	= 50,	/* IP v4 and v6 protocol number */
+	Esp4hdrlen	= IP4HDR + 8,
+	Esp6hdrlen	= IP6HDR + 8,
+
+	Esptaillen	= 2,	/* does not include pad or auth data */
+	Userhdrlen	= 4,	/* user-visible header size - if enabled */
+
+	Desblk	 = BITS2BYTES(64),
+	Des3keysz = BITS2BYTES(192),
+
+	Aesblk	 = BITS2BYTES(128),
+	Aeskeysz = BITS2BYTES(128),
 };
 
 struct Esphdr
 {
-	/* ip header */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+	uchar	payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ *	 enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ *	orig IP hdrs | ESP hdr |
+ *			enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+	/* ipv4 header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	espproto;	/* Protocol */
 	uchar	espplen[2];	/* Header plus data length */
 	uchar	espsrc[4];	/* Ip source */
 	uchar	espdst[4];	/* Ip destination */
 
-	/* esp header */
-	uchar	espspi[4];	/* Security parameter index */
-	uchar	espseq[4];	/* Sequence number */
+	Esphdr;
 };
 
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+	IPV6HDR;
+	Esphdr;
+};
+
 struct Esptail
 {
 	uchar	pad;
@@ -53,16 +93,28 @@
 	uchar	nexthdr;
 };
 
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+	ulong	version;
+	ulong	iphdrlen;
+	ulong	hdrlen;		/* iphdrlen + esp hdr len */
+	ulong	spi;
+	uchar	laddr[IPaddrlen];
+	uchar	raddr[IPaddrlen];
+};
+
 /* header as seen by the user */
 struct Userhdr
 {
-	uchar	nexthdr;	// next protocol
+	uchar	nexthdr;	/* next protocol */
 	uchar	unused[3];
 };
 
 struct Esppriv
 {
-	ulong	in;
+	uvlong	in;
 	ulong	inerrors;
 };
 
@@ -72,77 +124,68 @@
 struct Espcb
 {
 	int	incoming;
-	int	header;		// user user level header
+	int	header;		/* user-level header */
 	ulong	spi;
-	ulong	seq;		// last seq sent
-	ulong	window;		// for replay attacks
+	ulong	seq;		/* last seq sent */
+	ulong	window;		/* for replay attacks */
+
 	char	*espalg;
-	void	*espstate;	// other state for esp
-	int	espivlen;	// in bytes
+	void	*espstate;	/* other state for esp */
+	int	espivlen;	/* in bytes */
 	int	espblklen;
 	int	(*cipher)(Espcb*, uchar *buf, int len);
+
 	char	*ahalg;
-	void	*ahstate;	// other state for esp
-	int	ahlen;		// auth data length in bytes
+	void	*ahstate;	/* other state for esp */
+	int	ahlen;		/* auth data length in bytes */
 	int	ahblklen;
 	int	(*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+	DigestState *ds;
 };
 
 struct Algorithm
 {
 	char 	*name;
-	int	keylen;		// in bits
-	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+	int	keylen;		/* in bits */
+	void	(*init)(Espcb*, char* name, uchar *key, unsigned keylen);
 };
 
-
-enum {
-	RC4forward	= 10*1024*1024,	// maximum skip forward
-	RC4back = 100*1024,		// maximum look back
-};
-
-struct Esprc4
-{
-	ulong cseq;	// current byte sequence number
-	RC4state current;
-
-	int ovalid;	// old is valid
-	ulong lgseq; // last good sequence
-	ulong oseq;	// old byte sequence number
-	RC4state old;
-};
-
 static	Conv* convlookup(Proto *esp, ulong spi);
 static	char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static	void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
 static	void espkick(void *x);
 
+static	void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static	void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
 static Algorithm espalg[] =
 {
-	"null",			0,	nullespinit,
-	"des_56_cbc",		64,	desespinit,
-	"rc4_128",		128,	rc4espinit,
-	nil,			0,	nil,
+	"null",		0,	nullespinit,
+	"des3_cbc",	192,	des3espinit,	/* new rfc2451, des-ede3 */
+	"aes_128_cbc",	128,	aescbcespinit,	/* new rfc3602 */
+	"aes_ctr",	128,	aesctrespinit,	/* new rfc3686 */
+	"des_56_cbc",	64,	desespinit,	/* rfc2405, deprecated */
+	nil,		0,	nil,
 };
 
 static Algorithm ahalg[] =
 {
-	"null",			0,	nullahinit,
-	"hmac_sha1_96",		128,	shaahinit,
-	"hmac_md5_96",		128,	md5ahinit,
-	nil,			0,	nil,
+	"null",		0,	nullahinit,
+	"hmac_sha1_96",	128,	shaahinit,	/* rfc2404 */
+	"hmac_md5_96",	128,	md5ahinit,	/* rfc2403 */
+	nil,		0,	nil,
 };
 
 static char*
 espconnect(Conv *c, char **argv, int argc)
 {
-	char *p, *pp;
-	char *e = nil;
+	char *p, *pp, *e = nil;
 	ulong spi;
 	Espcb *ecb = (Espcb*)c->ptcl;
 
@@ -157,7 +200,10 @@
 			break;
 		}
 		*p++ = 0;
-		parseip(c->raddr, argv[1]);
+		if (parseip(c->raddr, argv[1]) == -1) {
+			e = Ebadip;
+			break;
+		}
 		findlocalip(c->p->f, c->laddr, c->raddr);
 		ecb->incoming = 0;
 		ecb->seq = 0;
@@ -215,26 +261,86 @@
 	ipmove(c->raddr, IPnoaddr);
 
 	ecb = (Espcb*)c->ptcl;
-	free(ecb->espstate);
-	free(ecb->ahstate);
+	secfree(ecb->espstate);
+	secfree(ecb->ahstate);
 	memset(ecb, 0, sizeof(Espcb));
 }
 
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+	if (*bpp == nil || BLEN(*bpp) == 0) {
+		/* get enough to identify the IP version */
+		*bpp = pullupblock(*bpp, IP4HDR);
+		if(*bpp == nil) {
+			netlog(f, Logesp, "esp: short packet\n");
+			return 0;
+		}
+	}
+	return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
 static void
+getverslens(int version, Versdep *vp)
+{
+	vp->version = version;
+	switch(vp->version) {
+	case V4:
+		vp->iphdrlen = IP4HDR;
+		vp->hdrlen   = Esp4hdrlen;
+		break;
+	case V6:
+		vp->iphdrlen = IP6HDR;
+		vp->hdrlen   = Esp6hdrlen;
+		break;
+	default:
+		panic("esp: getverslens version %d wrong", version);
+	}
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+
+	switch(vp->version) {
+	case V4:
+		eh4 = (Esp4hdr*)pkt;
+		v4tov6(vp->raddr, eh4->espsrc);
+		v4tov6(vp->laddr, eh4->espdst);
+		vp->spi = nhgetl(eh4->espspi);
+		break;
+	case V6:
+		eh6 = (Esp6hdr*)pkt;
+		ipmove(vp->raddr, eh6->src);
+		ipmove(vp->laddr, eh6->dst);
+		vp->spi = nhgetl(eh6->espspi);
+		break;
+	default:
+		panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+	}
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
 espkick(void *x)
 {
+	int nexthdr, payload, pad, align;
+	uchar *auth;
+	Block *bp;
 	Conv *c = x;
-	Esphdr *eh;
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Espcb *ecb;
 	Esptail *et;
 	Userhdr *uh;
-	Espcb *ecb;
-	Block *bp;
-	int nexthdr;
-	int payload;
-	int pad;
-	int align;
-	uchar *auth;
+	Versdep vers;
 
+	getverslens(convipvers(c), &vers);
 	bp = qget(c->wq);
 	if(bp == nil)
 		return;
@@ -244,7 +350,7 @@
 
 	if(ecb->header) {
 		/* make sure the message has a User header */
-		bp = pullupblock(bp, UserhdrSize);
+		bp = pullupblock(bp, Userhdrlen);
 		if(bp == nil) {
 			qunlock(c);
 			return;
@@ -251,15 +357,16 @@
 		}
 		uh = (Userhdr*)bp->rp;
 		nexthdr = uh->nexthdr;
-		bp->rp += UserhdrSize;
+		bp->rp += Userhdrlen;
 	} else {
-		nexthdr = 0;  // what should this be?
+		nexthdr = 0;	/* what should this be? */
 	}
 
 	payload = BLEN(bp) + ecb->espivlen;
 
 	/* Make space to fit ip header */
-	bp = padblock(bp, EsphdrSize + ecb->espivlen);
+	bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+	getpktspiaddrs(bp->rp, &vers);
 
 	align = 4;
 	if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
 		align = ecb->espblklen;
 	if(align % ecb->ahblklen != 0)
 		panic("espkick: ahblklen is important after all");
-	pad = (align-1) - (payload + EsptailSize-1)%align;
+	pad = (align-1) - (payload + Esptaillen-1)%align;
 
 	/*
 	 * Make space for tail
@@ -273,70 +380,88 @@
 	 * this is done by calling padblock with a negative size
 	 * Padblock does not change bp->wp!
 	 */
-	bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
-	bp->wp += pad+EsptailSize+ecb->ahlen;
+	bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+	bp->wp += pad+Esptaillen+ecb->ahlen;
 
-	eh = (Esphdr *)(bp->rp);
-	et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
 
-	// fill in tail
+	/* fill in tail */
 	et->pad = pad;
 	et->nexthdr = nexthdr;
 
-	ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
-	auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+	/* encrypt the payload */
+	ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+	auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
 
-	// fill in head
-	eh->vihl = IP_VER4;
-	hnputl(eh->espspi, ecb->spi);
-	hnputl(eh->espseq, ++ecb->seq);
-	v6tov4(eh->espsrc, c->laddr);
-	v6tov4(eh->espdst, c->raddr);
-	eh->espproto = IP_ESPPROTO;
-	eh->frag[0] = 0;
-	eh->frag[1] = 0;
+	/* fill in head; construct a new IP header and an ESP header */
+	if (vers.version == V4) {
+		eh4 = (Esp4hdr *)bp->rp;
+		eh4->vihl = IP_VER4;
+		v6tov4(eh4->espsrc, c->laddr);
+		v6tov4(eh4->espdst, c->raddr);
+		eh4->espproto = IP_ESPPROTO;
+		eh4->frag[0] = 0;
+		eh4->frag[1] = 0;
 
-	ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+		hnputl(eh4->espspi, ecb->spi);
+		hnputl(eh4->espseq, ++ecb->seq);
+	} else {
+		eh6 = (Esp6hdr *)bp->rp;
+		eh6->vcf[0] = IP_VER6;
+		ipmove(eh6->src, c->laddr);
+		ipmove(eh6->dst, c->raddr);
+		eh6->proto = IP_ESPPROTO;
 
+		hnputl(eh6->espspi, ecb->spi);
+		hnputl(eh6->espseq, ++ecb->seq);
+	}
+
+	/* compute secure hash */
+	ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+		payload + pad + Esptaillen, auth);
+
 	qunlock(c);
-	//print("esp: pass down: %uld\n", BLEN(bp));
-	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	/* print("esp: pass down: %uld\n", BLEN(bp)); */
+	if (vers.version == V4)
+		ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	else
+		ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
 }
 
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
 void
 espiput(Proto *esp, Ipifc*, Block *bp)
 {
-	Esphdr *eh;
-	Esptail *et;
-	Userhdr *uh;
+	int payload, nexthdr;
+	uchar *auth, *espspi;
 	Conv *c;
 	Espcb *ecb;
-	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	Esptail *et;
 	Fs *f;
-	uchar *auth;
-	ulong spi;
-	int payload, nexthdr;
+	Userhdr *uh;
+	Versdep vers;
 
 	f = esp->f;
 
-	bp = pullupblock(bp, EsphdrSize+EsptailSize);
+	getverslens(pktipvers(f, &bp), &vers);
+
+	bp = pullupblock(bp, vers.hdrlen + Esptaillen);
 	if(bp == nil) {
 		netlog(f, Logesp, "esp: short packet\n");
 		return;
 	}
+	getpktspiaddrs(bp->rp, &vers);
 
-	eh = (Esphdr*)(bp->rp);
-	spi = nhgetl(eh->espspi);
-	v4tov6(raddr, eh->espsrc);
-	v4tov6(laddr, eh->espdst);
-
 	qlock(esp);
 	/* Look for a conversation structure for this port */
-	c = convlookup(esp, spi);
+	c = convlookup(esp, vers.spi);
 	if(c == nil) {
 		qunlock(esp);
-		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		icmpnoconv(f, bp);
 		freeblist(bp);
 		return;
@@ -346,76 +471,83 @@
 	qunlock(esp);
 
 	ecb = c->ptcl;
-	// too hard to do decryption/authentication on block lists
-	if(bp->next)
+	/* too hard to do decryption/authentication on block lists */
+	if(bp->next != nil)
 		bp = concatblock(bp);
 
-	if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+	if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	eh = (Esphdr*)(bp->rp);
 	auth = bp->wp - ecb->ahlen;
-	if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+	espspi = vers.version == V4?	((Esp4hdr*)bp->rp)->espspi:
+					((Esp6hdr*)bp->rp)->espspi;
+
+	/* compute secure hash and authenticate */
+	if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
 		qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
-			laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
-	if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+	payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+	if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
-			laddr, spi, payload, BLEN(bp));
+		netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+			vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
 		freeb(bp);
 		return;
 	}
-	if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+	/* decrypt payload */
+	if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
 		qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
-		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
-			laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+			vers.raddr, vers.laddr, vers.spi, up->errstr);
 		freeb(bp);
 		return;
 	}
 
-	payload -= EsptailSize;
-	et = (Esptail*)(bp->rp + EsphdrSize + payload);
+	payload -= Esptaillen;
+	et = (Esptail*)(bp->rp + vers.hdrlen + payload);
 	payload -= et->pad + ecb->espivlen;
 	nexthdr = et->nexthdr;
 	if(payload <= 0) {
 		qunlock(c);
-		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+			vers.raddr, vers.laddr, vers.spi);
 		freeb(bp);
 		return;
 	}
 
-	// trim packet
-	bp->rp += EsphdrSize + ecb->espivlen;
+	/* trim packet */
+	bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
 	bp->wp = bp->rp + payload;
 	if(ecb->header) {
-		// assume UserhdrSize < EsphdrSize
-		bp->rp -= UserhdrSize;
+		/* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+		bp->rp -= Userhdrlen;
 		uh = (Userhdr*)bp->rp;
-		memset(uh, 0, UserhdrSize);
+		memset(uh, 0, Userhdrlen);
 		uh->nexthdr = nexthdr;
 	}
 
+	/* ingress filtering here? */
+
 	if(qfull(c->rq)){
-		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
-			laddr, spi);
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+			vers.laddr, vers.spi);
 		freeblist(bp);
 	}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
-		qpass(c->rq, bp);
+//		print("esp: pass up: %uld\n", BLEN(bp));
+		qpass(c->rq, bp);	/* pass packet up the read queue */
 	}
 
 	qunlock(c);
@@ -440,19 +572,19 @@
 	return e;
 }
 
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
 void
 espadvise(Proto *esp, Block *bp, char *msg)
 {
-	Esphdr *h;
 	Conv *c;
-	ulong spi;
+	Versdep vers;
 
-	h = (Esphdr*)(bp->rp);
+	getverslens(pktipvers(esp->f, &bp), &vers);
+	getpktspiaddrs(bp->rp, &vers);
 
-	spi = nhgets(h->espspi);
 	qlock(esp);
-	c = convlookup(esp, spi);
-	if(c != nil) {
+	c = convlookup(esp, vers.spi);
+	if(c != nil && !c->ignoreadvice) {
 		qhangup(c->rq, msg);
 		qhangup(c->wq, msg);
 	}
@@ -466,7 +598,7 @@
 	Esppriv *upriv;
 
 	upriv = esp->priv;
-	return snprint(buf, len, "%lud %lud\n",
+	return snprint(buf, len, "%llud %lud\n",
 		upriv->in,
 		upriv->inerrors);
 }
@@ -520,10 +652,10 @@
 setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
 {
 	uchar *key;
-	int i, nbyte, nchar;
-	int c;
+	int c, nbyte, nchar;
+	uint i;
 
-	if(n < 2)
+	if(n < 2 || n > 3)
 		return "bad format";
 	for(; alg->name; alg++)
 		if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
 	if(alg->name == nil)
 		return "unknown algorithm";
 
-	if(n != 3)
-		return "bad format";
 	nbyte = (alg->keylen + 7) >> 3;
-	nchar = strlen(f[2]);
+	if (n == 2)
+		nchar = 0;
+	else
+		nchar = strlen(f[2]);
+	if(nchar != 2 * nbyte)			/* TODO: maybe < is ok */
+		return "key not required length";
+	/* convert hex digits from ascii, in place */
 	for(i=0; i<nchar; i++) {
 		c = f[2][i];
 		if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
 		else if(c >= 'A' && c <= 'F')
 			f[2][i] -= 'A'-10;
 		else
-			return "bad character in key";
+			return "non-hex character in key";
 	}
-	key = smalloc(nbyte);
-	for(i=0; i<nchar && i*2<nbyte; i++) {
+	/* collapse hex digits into complete bytes in reverse order in key */
+	key = secalloc(nbyte);
+	for(i = 0; i < nchar && i/2 < nbyte; i++) {
 		c = f[2][nchar-i-1];
 		if(i&1)
 			c <<= 4;
-		key[i>>1] |= c;
+		key[i/2] |= c;
 	}
-
+	memset(f[2], 0, nchar);
 	alg->init(ecb, alg->name, key, alg->keylen);
-	free(key);
+	secfree(key);
 	return nil;
 }
 
+
+/*
+ * null encryption
+ */
+
 static int
 nullcipher(Espcb*, uchar*, int)
 {
@@ -566,7 +708,7 @@
 }
 
 static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->espalg = name;
 	ecb->espblklen = 1;
@@ -581,7 +723,7 @@
 }
 
 static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
 {
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
 	ecb->auth = nullauth;
 }
 
-void
+
+/*
+ * sha1
+ */
+
+static void
 seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
 	DigestState *digest;
-	uchar innerhash[SHA1dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = sha1(ipad, 64, nil, nil);
+	digest = sha1(ipad, Hmacblksz, nil, nil);
 	sha1(t, tlen, innerhash, digest);
-	digest = sha1(opad, 64, nil, nil);
+	digest = sha1(opad, Hmacblksz, nil, nil);
 	sha1(innerhash, SHA1dlen, hash, digest);
 }
 
@@ -615,11 +759,11 @@
 static int
 shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
 {
-	uchar hash[SHA1dlen];
 	int r;
+	uchar hash[SHA1dlen];
 
 	memset(hash, 0, SHA1dlen);
-	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -626,40 +770,162 @@
 }
 
 static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("shaahinit: bad keylen");
-	klen >>= 8;	// convert to bytes
+	klen /= BI2BY;
 
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = shaauth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aeskeysz], ivec[Aeskeysz];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aeskeysz);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aescbccipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n)	/* 128-bit blocks */
+{
+	uchar tmp[AESbsize], q[AESbsize];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	AESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, AESbsize);
+		p += AESbsize;
+		while(p < ep){
+			memmove(tmp, p, AESbsize);
+			aes_decrypt(ds->dkey, ds->rounds, p, q);
+			memmove(p, q, AESbsize);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, AESbsize);
+		for(p += AESbsize; p < ep; p += AESbsize){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip + AESbsize; ip < eip; )
+				*pp++ ^= *ip++;
+			aes_encrypt(ds->ekey, ds->rounds, p, q);
+			memmove(ds->ivec, q, AESbsize);
+			memmove(p, q, AESbsize);
+		}
+	}
+	return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+	uchar key[Aesblk], ivec[Aesblk];
+
+	n = BITS2BYTES(n);
+	if(n > Aeskeysz)
+		n = Aeskeysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Aesblk);
+	ecb->espalg = name;
+	ecb->espblklen = Aesblk;
+	ecb->espivlen = Aesblk;
+	ecb->cipher = aesctrcipher;
+	ecb->espstate = secalloc(sizeof(AESstate));
+	setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
 seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
 {
-	uchar ipad[65], opad[65];
 	int i;
+	uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
 	DigestState *digest;
-	uchar innerhash[MD5dlen];
 
-	for(i=0; i<64; i++){
-		ipad[i] = 0x36;
-		opad[i] = 0x5c;
-	}
-	ipad[64] = opad[64] = 0;
-	for(i=0; i<klen; i++){
+	memset(ipad, 0x36, Hmacblksz);
+	memset(opad, 0x5c, Hmacblksz);
+	ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++){
 		ipad[i] ^= key[i];
 		opad[i] ^= key[i];
 	}
-	digest = md5(ipad, 64, nil, nil);
+	digest = md5(ipad, Hmacblksz, nil, nil);
 	md5(t, tlen, innerhash, digest);
-	digest = md5(opad, 64, nil, nil);
+	digest = md5(opad, Hmacblksz, nil, nil);
 	md5(innerhash, MD5dlen, hash, digest);
 }
 
@@ -670,7 +936,7 @@
 	int r;
 
 	memset(hash, 0, MD5dlen);
-	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
 	r = memcmp(auth, hash, ecb->ahlen) == 0;
 	memmove(auth, hash, ecb->ahlen);
 	return r;
@@ -677,168 +943,102 @@
 }
 
 static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
 {
 	if(klen != 128)
 		panic("md5ahinit: bad keylen");
-	klen >>= 3;	// convert to bytes
-
-
+	klen = BITS2BYTES(klen);
 	ecb->ahalg = name;
 	ecb->ahblklen = 1;
-	ecb->ahlen = 12;
+	ecb->ahlen = BITS2BYTES(96);
 	ecb->auth = md5auth;
-	ecb->ahstate = smalloc(klen);
+	ecb->ahstate = secalloc(klen);
 	memmove(ecb->ahstate, key, klen);
 }
 
+
+/*
+ * des, single and triple
+ */
+
 static int
 descipher(Espcb *ecb, uchar *p, int n)
 {
-	uchar tmp[8];
-	uchar *pp, *tp, *ip, *eip, *ep;
 	DESstate *ds = ecb->espstate;
 
-	ep = p + n;
 	if(ecb->incoming) {
-		memmove(ds->ivec, p, 8);
-		p += 8;
-		while(p < ep){
-			memmove(tmp, p, 8);
-			block_cipher(ds->expanded, p, 1);
-			tp = tmp;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; ){
-				*p++ ^= *ip;
-				*ip++ = *tp++;
-			}
-		}
+		memmove(ds->ivec, p, Desblk);
+		desCBCdecrypt(p + Desblk, n - Desblk, ds);
 	} else {
-		memmove(p, ds->ivec, 8);
-		for(p += 8; p < ep; p += 8){
-			pp = p;
-			ip = ds->ivec;
-			for(eip = ip+8; ip < eip; )
-				*pp++ ^= *ip++;
-			block_cipher(ds->expanded, p, 0);
-			memmove(ds->ivec, p, 8);
-		}
+		memmove(p, ds->ivec, Desblk);
+		desCBCencrypt(p + Desblk, n - Desblk, ds);
 	}
 	return 1;
 }
-	
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+	DES3state *ds = ecb->espstate;
+
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, Desblk);
+		des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+	} else {
+		memmove(p, ds->ivec, Desblk);
+		des3CBCencrypt(p + Desblk, n - Desblk, ds);
+	}
+	return 1;
+}
+
 static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	uchar key[8];
-	uchar ivec[8];
-	int i;
-	
-	// bits to bytes
-	n = (n+7)>>3;
-	if(n > 8)
-		n = 8;
+	uchar key[Desblk], ivec[Desblk];
+
+	n = BITS2BYTES(n);
+	if(n > Desblk)
+		n = Desblk;
 	memset(key, 0, sizeof(key));
 	memmove(key, k, n);
-	for(i=0; i<8; i++)
-		ivec[i] = nrand(256);
+	prng(ivec, Desblk);
 	ecb->espalg = name;
-	ecb->espblklen = 8;
-	ecb->espivlen = 8;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
+
 	ecb->cipher = descipher;
-	ecb->espstate = smalloc(sizeof(DESstate));
+	ecb->espstate = secalloc(sizeof(DESstate));
 	setupDESstate(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
 {
-	Esprc4 *esprc4;
-	RC4state tmpstate;
-	ulong seq;
-	long d, dd;
+	uchar key[3][Desblk], ivec[Desblk];
 
-	if(n < 4)
-		return 0;
+	n = BITS2BYTES(n);
+	if(n > Des3keysz)
+		n = Des3keysz;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	prng(ivec, Desblk);
+	ecb->espalg = name;
+	ecb->espblklen = Desblk;
+	ecb->espivlen = Desblk;
 
-	esprc4 = ecb->espstate;
-	if(ecb->incoming) {
-		seq = nhgetl(p);
-		p += 4;
-		n -= 4;
-		d = seq-esprc4->cseq;
-		if(d == 0) {
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq += n;
-			if(esprc4->ovalid) {
-				dd = esprc4->cseq - esprc4->lgseq;
-				if(dd > RC4back)
-					esprc4->ovalid = 0;
-			}
-		} else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
-			// this link is hosed
-			if(d > RC4forward) {
-				strcpy(up->errstr, "rc4cipher: skipped too much");
-				return 0;
-			}
-			esprc4->lgseq = seq;
-			if(!esprc4->ovalid) {
-				esprc4->ovalid = 1;
-				esprc4->oseq = esprc4->cseq;
-				memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
-			}
-			rc4skip(&esprc4->current, d);
-			rc4(&esprc4->current, p, n);
-			esprc4->cseq = seq+n;
-		} else {
-print("reordered packet: %uld %ld\n", seq, d);
-			dd = seq - esprc4->oseq;
-			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
-				strcpy(up->errstr, "rc4cipher: too far back");
-				return 0;
-			}
-			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
-			rc4skip(&tmpstate, dd);
-			rc4(&tmpstate, p, n);
-			return 1;
-		}
-
-		// move old state up
-		if(esprc4->ovalid) {
-			dd = esprc4->cseq - RC4back - esprc4->oseq;
-			if(dd > 0) {
-				rc4skip(&esprc4->old, dd);
-				esprc4->oseq += dd;
-			}
-		}
-	} else {
-		hnputl(p, esprc4->cseq);
-		p += 4;
-		n -= 4;
-		rc4(&esprc4->current, p, n);
-		esprc4->cseq += n;
-	}
-	return 1;
+	ecb->cipher = des3cipher;
+	ecb->espstate = secalloc(sizeof(DES3state));
+	setupDES3state(ecb->espstate, key, ivec);
+	memset(ivec, 0, sizeof(ivec));
+	memset(key, 0, sizeof(key));
 }
 
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{	
-	Esprc4 *esprc4;
 
-	// bits to bytes
-	n = (n+7)>>3;
-	esprc4 = smalloc(sizeof(Esprc4));
-	memset(esprc4, 0, sizeof(Esprc4));
-	setupRC4state(&esprc4->current, k, n);
-	ecb->espalg = name;
-	ecb->espblklen = 4;
-	ecb->espivlen = 4;
-	ecb->cipher = rc4cipher;
-	ecb->espstate = esprc4;
-}
-	
+/*
+ * interfacing to devip
+ */
 void
 espinit(Fs *fs)
 {
diff -u a/os/ip//ethermedium.c b/os/ip//ethermedium.c
--- a/os/ip//ethermedium.c
+++ b/os/ip//ethermedium.c
@@ -5,9 +5,9 @@
 #include "fns.h"
 #include "../port/error.h"
 
+#include "../port/netif.h"
 #include "ip.h"
 #include "ipv6.h"
-#include "kernel.h"
 
 typedef struct Etherhdr Etherhdr;
 struct Etherhdr
@@ -18,10 +18,10 @@
 };
 
 static uchar ipbroadcast[IPaddrlen] = {
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
-	0xff,0xff,0xff,0xff,  
 	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
 };
 
 static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
 static void	etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
 static void	etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
 static void	etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void	etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 static Block*	multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
 static void	sendarp(Ipifc *ifc, Arpent *a);
-static void	sendgarp(Ipifc *ifc, uchar*);
+static void	sendndp(Ipifc *ifc, Arpent *a);
 static int	multicastea(uchar *ea, uchar *ip);
 static void	recvarpproc(void*);
-static void	resolveaddr6(Ipifc *ifc, Arpent *a);
 static void	etherpref2addr(uchar *pref, uchar *ea);
 
 Medium ethermedium =
@@ -53,8 +53,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -70,8 +69,7 @@
 .bwrite=	etherbwrite,
 .addmulti=	etheraddmulti,
 .remmulti=	etherremmulti,
-.ares=		arpenter,
-.areg=		sendgarp,
+.areg=		etherareg,
 .pref2addr=	etherpref2addr,
 };
 
@@ -94,9 +92,6 @@
  */
 enum
 {
-	ETARP		= 0x0806,
-	ETIP4		= 0x0800,
-	ETIP6		= 0x86DD,
 	ARPREQUEST	= 1,
 	ARPREPLY	= 2,
 };
@@ -127,128 +122,92 @@
 static void
 etherbind(Ipifc *ifc, int argc, char **argv)
 {
-	Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
-	char addr[Maxpath];	//char addr[2*KNAMELEN];
-	char dir[Maxpath];	//char dir[2*KNAMELEN];
-	char *buf;
-	int fd, cfd, n;
-	char *ptr;
+	char addr[Maxpath], dir[Maxpath];
 	Etherrock *er;
+	Chan *c;
+	int n;
 
 	if(argc < 2)
 		error(Ebadarg);
 
-	mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
-	buf = nil;
+	/*
+	 *  get mac address
+	 */
+	snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+	c = namec(addr, Aopen, OREAD, 0);
 	if(waserror()){
-		if(mchan4 != nil)
-			cclose(mchan4);
-		if(cchan4 != nil)
-			cclose(cchan4);
-		if(achan != nil)
-			cclose(achan);
-		if(mchan6 != nil)
-			cclose(mchan6);
-		if(cchan6 != nil)
-			cclose(cchan6);
-		if(buf != nil)
-			free(buf);
-		nexterror(); 
+		cclose(c);
+		nexterror();
 	}
+	n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+	if(n < 0)
+		error(Eio);
+	addr[n] = 0;
+	if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+		error("could not find mac address");
+	cclose(c);
+	poperror();
 
+	er = smalloc(sizeof(*er));
+	er->read4p = er->read6p = er->arpp = (void*)-1;
+	er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+	er->f = ifc->conv->p->f;
+
+	if(waserror()){
+		if(er->mchan4 != nil)
+			cclose(er->mchan4);
+		if(er->cchan4 != nil)
+			cclose(er->cchan4);
+		if(er->mchan6 != nil)
+			cclose(er->mchan6);
+		if(er->cchan6 != nil)
+			cclose(er->cchan6);
+		if(er->achan != nil)
+			cclose(er->achan);
+		free(er);
+		nexterror();
+	}
+
 	/*
-	 *  open ip converstation
+	 *  open ipv4 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x800 failed: %s", up->env->errstr);
-	mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);	/* ETIP4 */
+	er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
 
 	/*
-	 *  get mac address and speed
-	 */
-	snprint(addr, sizeof(addr), "%s/stats", dir);
-	fd = kopen(addr, OREAD);
-	if(fd < 0)
-		errorf("can't open ether stats: %s", up->env->errstr);
-
-	buf = smalloc(512);
-	n = kread(fd, buf, 511);
-	kclose(fd);
-	if(n <= 0)
-		error(Eio);
-	buf[n] = 0;
-
-	ptr = strstr(buf, "addr: ");
-	if(!ptr)
-		error(Eio);
-	ptr += 6;
-	parsemac(ifc->mac, ptr, 6);
-
-	ptr = strstr(buf, "mbps: ");
-	if(ptr){
-		ptr += 6;
-		ifc->mbps = atoi(ptr);
-	} else
-		ifc->mbps = 100;
-
-	/*
- 	 *  open arp conversation
-	 */
-	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
-	fd = kdial(addr, nil, nil, nil);
-	if(fd < 0)
-		errorf("dial 0x806 failed: %s", up->env->errstr);
-	achan = commonfdtochan(fd, ORDWR, 0, 1);
-	kclose(fd);
-
-	/*
-	 *  open ip conversation
+	 *  open ipv6 conversation
 	 *
 	 *  the dial will fail if the type is already open on
 	 *  this device.
 	 */
-	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
-	fd = kdial(addr, nil, dir, &cfd);
-	if(fd < 0)
-		errorf("dial 0x86DD failed: %s", up->env->errstr);
-	mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
-	cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
-	kclose(fd);
-	kclose(cfd);
+	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);	/* ETIP6 */
+	er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
 
 	/*
 	 *  make it non-blocking
 	 */
-	devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+	devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
 
-	er = smalloc(sizeof(*er));
-	er->mchan4 = mchan4;
-	er->cchan4 = cchan4;
-	er->achan = achan;
-	er->mchan6 = mchan6;
-	er->cchan6 = cchan6;
-	er->f = ifc->conv->p->f;
-	ifc->arg = er;
-
-	free(buf);
+	/*
+ 	 *  open arp conversation
+	 */
+	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);	/* ETARP */
+	er->achan = chandial(addr, nil, nil, nil);
 	poperror();
 
-	kproc("etherread4", etherread4, ifc, 0);
-	kproc("recvarpproc", recvarpproc, ifc, 0);
-	kproc("etherread6", etherread6, ifc, 0);
+	ifc->arg = er;
+
+	kproc("etherread4", etherread4, ifc);
+	kproc("etherread6", etherread6, ifc);
+	kproc("recvarpproc", recvarpproc, ifc);
 }
 
 /*
@@ -259,21 +218,35 @@
 {
 	Etherrock *er = ifc->arg;
 
-	if(er->read4p)
+	while(waserror())
+		;
+
+	/* wait for readers to start */
+	while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->read4p != nil)
 		postnote(er->read4p, 1, "unbind", 0);
-	if(er->read6p)
+	if(er->read6p != nil)
 		postnote(er->read6p, 1, "unbind", 0);
-	if(er->arpp)
+	if(er->arpp != nil)
 		postnote(er->arpp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for readers to die */
-	while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+	while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan4 != nil)
 		cclose(er->mchan4);
-	if(er->achan != nil)
-		cclose(er->achan);
 	if(er->cchan4 != nil)
 		cclose(er->cchan4);
 	if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
 		cclose(er->mchan6);
 	if(er->cchan6 != nil)
 		cclose(er->cchan6);
+	if(er->achan != nil)
+		cclose(er->achan);
 
 	free(er);
 }
@@ -297,16 +272,16 @@
 
 	/* get mac address of destination */
 	a = arpget(er->f->arp, bp, version, ifc, ip, mac);
-	if(a){
+	if(a != nil){
 		/* check for broadcast or multicast */
 		bp = multicastarp(er->f, a, ifc->m, mac);
-		if(bp==nil){
+		if(bp == nil){
 			switch(version){
 			case V4:
 				sendarp(ifc, a);
 				break;
-			case V6: 
-				resolveaddr6(ifc, a);
+			case V6:
+				sendndp(ifc, a);
 				break;
 			default:
 				panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
 
 	/* make it a single block with space for the ether header */
 	bp = padblock(bp, ifc->m->hsize);
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 	eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read4p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read4p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput4(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read4p = nil;
+	pexit("hangup", 1);
 }
 
 
@@ -397,29 +369,28 @@
 	ifc = a;
 	er = ifc->arg;
 	er->read6p = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->read6p = 0;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		if(bp == nil)
+			break;
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
 		ifc->in++;
-		bp->rp += ifc->m->hsize;
-		if(ifc->lifc == nil)
+		if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
 			freeb(bp);
-		else
+		else {
+			bp->rp += ifc->m->hsize;
 			ipiput6(er->f, ifc, bp);
+		}
 		runlock(ifc);
 		poperror();
 	}
+	er->read6p = nil;
+	pexit("hangup", 1);
 }
 
 static void
@@ -477,6 +448,7 @@
 	Block *bp;
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
+	uchar targ[IPv4addrlen], src[IPv4addrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
 		return;
 	}
 
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+
 	/* remove all but the last message */
 	while((bp = a->hold) != nil){
 		if(bp == a->last)
@@ -492,18 +467,20 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
+	memmove(targ, a->ip+IPv4off, IPv4addrlen);
 	arprelease(er->f->arp, a);
 
+	if(!ipv4local(ifc, src, 0, targ))
+		return;
+
 	n = sizeof(Etherarp);
-	if(n < a->type->mintu)
-		n = a->type->mintu;
+	if(n < ifc->m->mintu)
+		n = ifc->m->mintu;
 	bp = allocb(n);
 	memset(bp->rp, 0, n);
 	e = (Etherarp*)bp->rp;
-	memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
-	ipv4local(ifc, e->spa);
+	memmove(e->tpa, targ, sizeof(e->tpa));
+	memmove(e->spa, src, sizeof(e->spa));
 	memmove(e->sha, ifc->mac, sizeof(e->sha));
 	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
 	memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("arp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
 {
-	int sflag;
 	Block *bp;
 	Etherrock *er = ifc->arg;
-	uchar ipsrc[IPaddrlen];
 
 	/* don't do anything if it's been less than a second since the last */
 	if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
 		freeblist(bp);
 	}
 
-	/* try to keep it around for a second more */
-	a->ctime = NOW;
-	a->rtime = NOW + ReTransTimer;
-	if(a->rxtsrem <= 0) {
-		arprelease(er->f->arp, a);
-		return;
-	}
-
-	a->rxtsrem--;
-	arprelease(er->f->arp, a);
-
-	if(sflag = ipv6anylocal(ifc, ipsrc)) 
-		icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+	ndpsendsol(er->f, ifc, a);	/* unlocks arp */
 }
 
 /*
@@ -569,10 +530,6 @@
 	Etherarp *e;
 	Etherrock *er = ifc->arg;
 
-	/* don't arp for our initial non address */
-	if(ipcmp(ip, IPnoaddr) == 0)
-		return;
-
 	n = sizeof(Etherarp);
 	if(n < ifc->m->mintu)
 		n = ifc->m->mintu;
@@ -593,15 +550,13 @@
 	hnputs(e->op, ARPREQUEST);
 	bp->wp += n;
 
-	n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
-	if(n < 0)
-		print("garp: send: %r\n");
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
 }
 
 static void
 recvarp(Ipifc *ifc)
 {
-	int n;
+	int n, forme;
 	Block *ebp, *rbp;
 	Etherarp *e, *r;
 	uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
 	Etherrock *er = ifc->arg;
 
 	ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
-	if(ebp == nil) {
-		print("arp: rcv: %r\n");
+	if(ebp == nil)
 		return;
-	}
 
+	rlock(ifc);
+
 	e = (Etherarp*)ebp->rp;
 	switch(nhgets(e->op)) {
 	default:
@@ -620,9 +575,13 @@
 		break;
 
 	case ARPREPLY:
+		/* make sure not to enter multi/broadcat address */
+		if(e->sha[0] & 1)
+			break;
+
 		/* check for machine using my ip address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
 				print("arprep: 0x%E/0x%E also has ip addr %V\n",
 					e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
 			}
 		}
 
-		/* make sure we're not entering broadcast addresses */
-		if(ipcmp(ip, ipbroadcast) == 0 ||
-			!memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
-			print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
-				e->s, e->sha, e->spa);
-			break;
-		}
-
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+		/* refresh what we know about sender */
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
 		break;
 
 	case ARPREQUEST:
+		/* don't reply to multi/broadcat addresses */
+		if(e->sha[0] & 1)
+			break;
+
 		/* don't answer arps till we know who we are */
-		if(ifc->lifc == 0)
+		if(ifc->lifc == nil)
 			break;
 
 		/* check for machine using my ip or ether address */
 		v4tov6(ip, e->spa);
-		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+		if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
-				if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+				if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
 					/* print only once */
-					print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+					print("arpreq: 0x%E also has ip addr %V\n",
+						e->sha, e->spa);
 					memmove(eprinted, e->spa, sizeof(e->spa));
 				}
+				break;
 			}
 		} else {
 			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
-				print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+				print("arpreq: %V also has ether addr %E\n",
+					e->spa, e->sha);
 				break;
 			}
 		}
 
-		/* refresh what we know about sender */
-		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
-		/* answer only requests for our address or systems we're proxying for */
+		/*
+		 * when request is for our address or systems we're proxying for,
+		 * enter senders address into arp table and reply, otherwise just
+		 * refresh the senders address.
+		 */
 		v4tov6(ip, e->tpa);
-		if(!iplocalonifc(ifc, ip))
-		if(!ipproxyifc(er->f, ifc, ip))
+		forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+		if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
 			break;
 
 		n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
 		memmove(r->s, ifc->mac, sizeof(r->s));
 		rbp->wp += n;
 
-		n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
-		if(n < 0)
-			print("arp: write: %r\n");
+		runlock(ifc);
+		freeb(ebp);
+
+		devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+		return;
 	}
+
+	runlock(ifc);
 	freeb(ebp);
 }
 
@@ -707,7 +671,7 @@
 
 	er->arpp = up;
 	if(waserror()){
-		er->arpp = 0;
+		er->arpp = nil;
 		pexit("hangup", 1);
 	}
 	for(;;)
@@ -749,14 +713,9 @@
 multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
 {
 	/* is it broadcast? */
-	switch(ipforme(f, a->ip)){
-	case Runi:
-		return nil;
-	case Rbcast:
-		memset(mac, 0xff, 6);
+	if(ipforme(f, a->ip) == Rbcast){
+		memset(mac, 0xff, medium->maclen);
 		return arpresolve(f->arp, a, medium, mac);
-	default:
-		break;
 	}
 
 	/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
 }
 
 
-static void 
+static void
 etherpref2addr(uchar *pref, uchar *ea)
 {
-	pref[8]  = ea[0] | 0x2;
-	pref[9]  = ea[1];
+	pref[8] = ea[0] ^ 0x2;
+	pref[9] = ea[1];
 	pref[10] = ea[2];
 	pref[11] = 0xFF;
 	pref[12] = 0xFE;
@@ -789,4 +748,41 @@
 	pref[13] = ea[3];
 	pref[14] = ea[4];
 	pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+	static char tdad[] = "dad6";
+	uchar a[IPaddrlen];
+
+	if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+		return;
+
+	if(isv4(ip)){
+		sendgarp(ifc, ip);
+		return;
+	}
+
+	if((lifc->type&Rv4) != 0)
+		return;
+
+	if(!lifc->tentative){
+		icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+		return;
+	}
+
+	if(ipcmp(lifc->local, ip) != 0)
+		return;
+
+	/* temporarily add route for duplicate address detection */
+	ipv62smcast(a, ip);
+	addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+	if(waserror()){
+		remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+		nexterror();
+	}
+	icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+	remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
 }
diff -u a/os/ip//gre.c b/os/ip//gre.c
--- a/os/ip//gre.c
+++ b/os/ip//gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,10 +10,7 @@
 
 #include "ip.h"
 
-#define DPRINT if(0)print
-
-enum
-{
+enum {
 	GRE_IPONLY	= 12,		/* size of ip header */
 	GRE_IPPLUSGRE	= 12,		/* minimum size of GRE header */
 	IP_GREPROTO	= 47,
@@ -18,10 +18,33 @@
 	GRErxms		= 200,
 	GREtickms	= 100,
 	GREmaxxmit	= 10,
+
+	K		= 1024,
+	GREqlen		= 256 * K,
+
+	GRE_cksum	= 0x8000,
+	GRE_routing	= 0x4000,
+	GRE_key		= 0x2000,
+	GRE_seq		= 0x1000,
+
+	Nring		= 1 << 10,	/* power of two, please */
+	Ringmask	= Nring - 1,
+
+	GREctlraw	= 0,
+	GREctlcooked,
+	GREctlretunnel,
+	GREctlreport,
+	GREctldlsuspend,
+	GREctlulsuspend,
+	GREctldlresume,
+	GREctlulresume,
+	GREctlforward,
+	GREctlulkey,
+	Ncmds,
 };
 
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
 	/* ip header */
 	uchar	vihl;		/* Version and header length */
 	uchar	tos;		/* Type of service */
@@ -28,7 +51,7 @@
 	uchar	len[2];		/* packet length (including headers) */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	ttl;
 	uchar	proto;		/* Protocol */
 	uchar	cksum[2];	/* checksum */
 	uchar	src[4];		/* Ip source */
@@ -37,21 +60,115 @@
 	/* gre header */
 	uchar	flags[2];
 	uchar	eproto[2];	/* encapsulation protocol */
-} GREhdr;
+};
 
 typedef struct GREpriv GREpriv;
-struct GREpriv
-{
-	int		raw;			/* Raw GRE mode */
-
+struct GREpriv{
 	/* non-MIB stats */
-	ulong		csumerr;		/* checksum errors */
-	ulong		lenerr;			/* short packet */
+	uvlong	lenerr;			/* short packet */
 };
 
+typedef struct Bring	Bring;
+struct Bring{
+	Block	*ring[Nring];
+	long	produced;
+	long	consumed;
+};
+
+typedef struct GREconv	GREconv;
+struct GREconv{
+	int	raw;
+
+	/* Retunnelling information.  v4 only */
+	uchar	north[4];			/* HA */
+	uchar	south[4];			/* Base station */
+	uchar	hoa[4];				/* Home address */
+	uchar	coa[4];				/* Careof address */
+	ulong	seq;				/* Current sequence # */
+	int	dlsusp;				/* Downlink suspended? */
+	int	ulsusp;				/* Uplink suspended? */
+	ulong	ulkey;				/* GRE key */
+
+	QLock	lock;				/* Lock for rings */
+	Bring	dlpending;			/* Ring of pending packets */
+	Bring	dlbuffered;			/* Received while suspended */
+	Bring	ulbuffered;			/* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+	uchar	*rp;
+	ulong	seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+	char	*cmd;
+	int	argc;
+	char	*(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw]	=	{	"raw",		1,	grectlraw,	},
+[GREctlcooked]	=	{	"cooked",	1,	grectlcooked,	},
+[GREctlretunnel]=	{	"retunnel",	5,	grectlretunnel,	},
+[GREctlreport]	=	{	"report",	2,	grectlreport,	},
+[GREctldlsuspend]=	{	"dlsuspend",	1,	grectldlsuspend,},
+[GREctlulsuspend]=	{	"ulsuspend",	1,	grectlulsuspend,},
+[GREctldlresume]=	{	"dlresume",	1,	grectldlresume,	},
+[GREctlulresume]=	{	"ulresume",	1,	grectlulresume,	},
+[GREctlforward]	=	{	"forward",	2,	grectlforward,	},
+[GREctlulkey]	=	{	"ulkey",	2,	grectlulkey,	},
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
 static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
 
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+	Block *bp;
+
+	if(r->consumed == r->produced)
+		return nil;
+
+	bp = r->ring[r->consumed & Ringmask];
+	r->ring[r->consumed & Ringmask] = nil;
+	r->consumed++;
+	return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+	Block *tbp;
+
+	if(r->produced - r->consumed > Ringmask){
+		/* Full! */
+		tbp = r->ring[r->produced & Ringmask];
+		assert(tbp);
+		freeb(tbp);
+		r->consumed++;
+	}
+	r->ring[r->produced & Ringmask] = bp;
+	r->produced++;
+}
+
+static char *
 greconnect(Conv *c, char **argv, int argc)
 {
 	Proto *p;
@@ -91,7 +208,7 @@
 static void
 grecreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->rq = qopen(GREqlen, Qmsg, 0, c);
 	c->wq = qbypass(grekick, c);
 }
 
@@ -98,44 +215,88 @@
 static int
 grestate(Conv *c, char *state, int n)
 {
-	USED(c);
-	return snprint(state, n, "%s", "Datagram");
+	GREconv *grec;
+	char *ep, *p;
+
+	grec = c->ptcl;
+	p    = state;
+	ep   = p + n;
+	p    = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+	 "pending %uld  %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+			c->inuse? "Open ": "Closed ",
+			grec->raw? "raw ": "",
+			grec->dlsusp? "DL suspended ": "",
+			grec->ulsusp? "UL suspended ": "",
+			grec->hoa, grec->north, grec->south, grec->seq,
+			grec->dlpending.consumed, grec->dlpending.produced,
+			grec->dlbuffered.consumed, grec->dlbuffered.produced,
+			grec->ulbuffered.consumed, grec->ulbuffered.produced,
+			grec->ulkey);
+	return p - state;
 }
 
 static char*
 greannounce(Conv*, char**, int)
 {
-	return "pktifc does not support announce";
+	return "gre does not support announce";
 }
 
 static void
 greclose(Conv *c)
 {
-	qclose(c->rq);
-	qclose(c->wq);
-	qclose(c->eq);
+	GREconv *grec;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	/* Make sure we don't forward any more packets */
+	memset(grec->hoa, 0, sizeof grec->hoa);
+	memset(grec->north, 0, sizeof grec->north);
+	memset(grec->south, 0, sizeof grec->south);
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->dlpending)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->dlbuffered)) != nil)
+		freeb(bp);
+
+	while((bp = getring(&grec->ulbuffered)) != nil)
+		freeb(bp);
+
+	grec->dlpending.produced = grec->dlpending.consumed = 0;
+	grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+	grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+	qunlock(&grec->lock);
+
+	grec->raw = 0;
+	grec->seq = 0;
+	grec->dlsusp = grec->ulsusp = 1;
+
+	qhangup(c->rq, sessend);
+	qhangup(c->wq, sessend);
+	qhangup(c->eq, sessend);
 	ipmove(c->laddr, IPnoaddr);
 	ipmove(c->raddr, IPnoaddr);
-	c->lport = 0;
-	c->rport = 0;
+	c->lport = c->rport = 0;
 }
 
-int drop;
-
 static void
 grekick(void *x, Block *bp)
 {
-	Conv *c = x;
-	GREhdr *ghp;
+	Conv *c;
+	GREconv *grec;
+	GREhdr *gre;
 	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 
 	if(bp == nil)
 		return;
 
+	c    = x;
+	grec = c->ptcl;
+
 	/* Make space to fit ip header (gre header already there) */
 	bp = padblock(bp, GRE_IPONLY);
-	if(bp == nil)
-		return;
 
 	/* make sure the message has a GRE header */
 	bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
 	if(bp == nil)
 		return;
 
-	ghp = (GREhdr *)(bp->rp);
-	ghp->vihl = IP_VER4;
+	gre = (GREhdr *)bp->rp;
+	gre->vihl = IP_VER4;
 
-	if(!((GREpriv*)c->p->priv)->raw){
-		v4tov6(raddr, ghp->dst);
+	if(grec->raw == 0){
+		v4tov6(raddr, gre->dst);
 		if(ipcmp(raddr, v4prefix) == 0)
-			memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
-		v4tov6(laddr, ghp->src);
+			memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+		v4tov6(laddr, gre->src);
 		if(ipcmp(laddr, v4prefix) == 0){
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
-				findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
-			memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+				/* pick interface closest to dest */
+				findlocalip(c->p->f, c->laddr, raddr);
+			memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
 		}
-		hnputs(ghp->eproto, c->rport);
+		hnputs(gre->eproto, c->rport);
 	}
 
-	ghp->proto = IP_GREPROTO;
-	ghp->frag[0] = 0;
-	ghp->frag[1] = 0;
+	gre->proto = IP_GREPROTO;
+	gre->frag[0] = gre->frag[1] = 0;
 
+	grepdout++;
+	grebdout += BLEN(bp);
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
 static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
 {
-	int len;
-	GREhdr *ghp;
-	Conv *c, **p;
-	ushort eproto;
+	Metablock *m;
+	GREconv *grec;
+	GREhdr *gre;
+	int hdrlen, suspended, extra;
+	ushort flags;
+	ulong seq;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1){
+		freeb(bp);
+		return;
+	}
+
+	/*
+	 * We've received a packet with a GRE header and we need to
+	 * re-adjust the packet header to strip all unwanted parts
+	 * but leave room for only a sequence number.
+	 */
+	grec   = c->ptcl;
+	flags  = nhgets(gre->flags);
+	hdrlen = 0;
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%V routing info present.  Discarding packet", gre->src);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	/*
+	 * The outgoing packet only has the sequence number set.  Make room
+	 * for the sequence number.
+	 */
+	if(hdrlen != sizeof(ulong)){
+		extra = hdrlen - sizeof(ulong);
+		if(extra < 0 && bp->rp - bp->base < -extra){
+			print("gredownlink: cannot add sequence number\n");
+			freeb(bp);
+			return;
+		}
+		memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+		bp->rp += extra;
+		assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+		gre = (GREhdr *)bp->rp;
+	}
+	seq = grec->seq++;
+	hnputs(gre->flags, GRE_seq);
+	hnputl(bp->rp + sizeof(GREhdr), seq);
+
+	/*
+	 * Keep rp and seq at the base.  ipoput4 consumes rp for
+	 * refragmentation.
+	 */
+	assert(bp->rp - bp->base >= sizeof(Metablock));
+	m = (Metablock *)bp->base;
+	m->rp  = bp->rp;
+	m->seq = seq;
+
+	/*
+	 * Here we make a decision what we're doing with the packet.  We're
+	 * doing this w/o holding a lock which means that later on in the
+	 * process we may discover we've done the wrong thing.  I don't want
+	 * to call ipoput with the lock held.
+	 */
+restart:
+	suspended = grec->dlsusp;
+	if(suspended){
+		if(!canqlock(&grec->lock)){
+			/*
+			 * just give up.  too bad, we lose a packet.  this
+			 * is just too hard and my brain already hurts.
+			 */
+			freeb(bp);
+			return;
+		}
+
+		if(!grec->dlsusp){
+			/*
+			 * suspend race.  We though we were suspended, but
+			 * we really weren't.
+			 */
+			qunlock(&grec->lock);
+			goto restart;
+		}
+
+		/* Undo the incorrect ref count addition */
+		addring(&grec->dlbuffered, bp);
+		qunlock(&grec->lock);
+		return;
+	}
+
+	/*
+	 * When we get here, we're not suspended.  Proceed to send the
+	 * packet.
+	 */
+	memmove(gre->src, grec->coa, sizeof gre->dst);
+	memmove(gre->dst, grec->south, sizeof gre->dst);
+
+	ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+	grepdout++;
+	grebdout += BLEN(bp);
+
+	/*
+	 * Now make sure we didn't do the wrong thing.
+	 */
+	if(!canqlock(&grec->lock)){
+		freeb(bp);		/* The packet just goes away */
+		return;
+	}
+
+	/* We did the right thing */
+	addring(&grec->dlpending, bp);
+	qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	ushort flags;
+
+	gre = (GREhdr *)bp->rp;
+	if(gre->ttl == 1)
+		return;
+
+	grec = c->ptcl;
+	memmove(gre->src, grec->coa, sizeof gre->src);
+	memmove(gre->dst, grec->north, sizeof gre->dst);
+
+	/*
+	 * Add a key, if needed.
+	 */
+	if(grec->ulkey){
+		flags = nhgets(gre->flags);
+		if(flags & (GRE_cksum|GRE_routing)){
+			print("%V routing info present.  Discarding packet\n",
+				gre->src);
+			freeb(bp);
+			return;
+		}
+
+		if((flags & GRE_key) == 0){
+			/* Make room for the key */
+			if(bp->rp - bp->base < sizeof(ulong)){
+				print("%V can't add key\n", gre->src);
+				freeb(bp);
+				return;
+			}
+
+			bp->rp -= 4;
+			memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+			gre = (GREhdr *)bp->rp;
+			hnputs(gre->flags, flags | GRE_key);
+		}
+
+		/* Add the key */
+		hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+	}
+
+	if(!canqlock(&grec->lock)){
+		freeb(bp);
+		return;
+	}
+
+	if(grec->ulsusp)
+		addring(&grec->ulbuffered, bp);
+	else{
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		grepuout++;
+		grebuout += BLEN(bp);
+	}
+	qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+	int len, hdrlen;
+	ushort eproto, flags;
 	uchar raddr[IPaddrlen];
+	Conv *c, **p;
+	GREconv *grec;
+	GREhdr *gre;
 	GREpriv *gpriv;
+	Ip4hdr *ip;
 
-	gpriv = gre->priv;
-	ghp = (GREhdr*)(bp->rp);
+	/*
+	 * We don't want to deal with block lists.  Ever.  The problem is
+	 * that when the block is forwarded, devether.c puts the block into
+	 * a queue that also uses ->next.  Just do not use ->next here!
+	 */
+	if(bp->next != nil)
+		bp = pullupblock(bp, blocklen(bp));
 
-	v4tov6(raddr, ghp->src);
-	eproto = nhgets(ghp->eproto);
-	qlock(gre);
+	gre = (GREhdr *)bp->rp;
+	if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+		freeb(bp);
+		return;
+	}
 
-	/* Look for a conversation structure for this port and address */
-	c = nil;
-	for(p = gre->conv; *p; p++) {
+	v4tov6(raddr, gre->src);
+	eproto = nhgets(gre->eproto);
+	flags  = nhgets(gre->flags);
+	hdrlen = sizeof(GREhdr);
+
+	if(flags & GRE_cksum)
+		hdrlen += 2;
+	if(flags & GRE_routing){
+		print("%I routing info present.  Discarding packet\n", raddr);
+		freeb(bp);
+		return;
+	}
+	if(flags & (GRE_cksum|GRE_routing))
+		hdrlen += 2;			/* Offset field */
+	if(flags & GRE_key)
+		hdrlen += 4;
+	if(flags & GRE_seq)
+		hdrlen += 4;
+
+	qlock(proto);
+
+	if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+		ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+		/*
+		 * Look for a conversation structure for this port and address, or
+		 * match the retunnel part, or match on the raw flag.
+		 */
+		for(p = proto->conv; *p; p++) {
+			c = *p;
+
+			if(c->inuse == 0)
+				continue;
+
+			/*
+			 * Do not stop this session - blocking here
+			 * implies that etherread is blocked.
+			 */
+			grec = c->ptcl;
+			if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+				grepdin++;
+				grebdin += BLEN(bp);
+				gredownlink(c, bp);
+				qunlock(proto);
+				return;
+			}
+
+			if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+				grepuin++;
+				grebuin += BLEN(bp);
+				greuplink(c, bp);
+				qunlock(proto);
+				return;
+			}
+		}
+	}
+
+
+	/*
+	 * when we get here, none of the forwarding tunnels matched.  now
+	 * try to match on raw and conversational sessions.
+	 */
+	for(c = nil, p = proto->conv; *p; p++) {
 		c = *p;
+
 		if(c->inuse == 0)
 			continue;
-		if(c->rport == eproto && 
-			(gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+		/*
+		 * Do not stop this session - blocking here
+		 * implies that etherread is blocked.
+		 */
+		grec = c->ptcl;
+		if(c->rport == eproto &&
+		    (grec->raw || ipcmp(c->raddr, raddr) == 0))
 			break;
 	}
 
-	if(*p == nil) {
-		qunlock(gre);
-		freeblist(bp);
+	qunlock(proto);
+
+	if(*p == nil){
+		freeb(bp);
 		return;
 	}
 
-	qunlock(gre);
-
 	/*
 	 * Trim the packet down to data size
 	 */
-	len = nhgets(ghp->len) - GRE_IPONLY;
+	len = nhgets(gre->len) - GRE_IPONLY;
 	if(len < GRE_IPPLUSGRE){
-		freeblist(bp);
+		freeb(bp);
 		return;
 	}
+
 	bp = trimblock(bp, GRE_IPONLY, len);
 	if(bp == nil){
+		gpriv = proto->priv;
 		gpriv->lenerr++;
 		return;
 	}
 
-	/*
-	 *  Can't delimit packet so pull it all into one block.
-	 */
-	if(qlen(c->rq) > 64*1024)
-		freeblist(bp);
-	else{
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("greiput");
-		qpass(c->rq, bp);
-	}
+	qpass(c->rq, bp);
 }
 
 int
@@ -234,29 +649,258 @@
 	GREpriv *gpriv;
 
 	gpriv = gre->priv;
+	return snprint(buf, len,
+		"gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+		grepdin, grepdout, grepuin, grepuout,
+		grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
 
-	return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->raw = 1;
+	return nil;
 }
 
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
 {
-	GREpriv *gpriv;
+	GREconv *grec;
 
-	gpriv = c->p->priv;
-	if(n == 1){
-		if(strcmp(f[0], "raw") == 0){
-			gpriv->raw = 1;
-			return nil;
-		}
-		else if(strcmp(f[0], "cooked") == 0){
-			gpriv->raw = 0;
-			return nil;
-		}
+	grec = c->ptcl;
+	grec->raw = 0;
+	return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+	uchar ipaddr[4];
+
+	grec = c->ptcl;
+	if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+		return "tunnel already set up";
+
+	v4parseip(ipaddr, argv[1]);
+	if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+		return "bad hoa";
+	memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+	v4parseip(ipaddr, argv[2]);
+	memmove(grec->north, ipaddr, sizeof grec->north);
+	v4parseip(ipaddr, argv[3]);
+	memmove(grec->south, ipaddr, sizeof grec->south);
+	v4parseip(ipaddr, argv[4]);
+	memmove(grec->coa, ipaddr, sizeof grec->coa);
+	grec->ulsusp = 1;
+	grec->dlsusp = 0;
+
+	return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+	ulong seq;
+	Block *bp;
+	Bring *r;
+	GREconv *grec;
+	Metablock *m;
+
+	grec = c->ptcl;
+	seq  = strtoul(argv[1], nil, 0);
+
+	qlock(&grec->lock);
+	r = &grec->dlpending;
+	while(r->produced - r->consumed > 0){
+		bp = r->ring[r->consumed & Ringmask];
+
+		assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		if((long)(seq - m->seq) <= 0)
+			break;
+
+		r->ring[r->consumed & Ringmask] = nil;
+		r->consumed++;
+
+		freeb(bp);
 	}
-	return "unknown control request";
+	qunlock(&grec->lock);
+	return nil;
 }
 
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->dlsusp)
+		return "already suspended";
+
+	grec->dlsusp = 1;
+	return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	if(grec->ulsusp)
+		return "already suspended";
+
+	grec->ulsusp = 1;
+	return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		qunlock(&grec->lock);
+
+		ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+		qlock(&grec->lock);
+		addring(&grec->dlpending, bp);
+	}
+	grec->dlsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+	GREconv *grec;
+	GREhdr *gre;
+	Block *bp;
+
+	grec = c->ptcl;
+
+	qlock(&grec->lock);
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	grec->ulsusp = 0;
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+	Block *bp;
+	GREconv *grec;
+	GREhdr *gre;
+	Metablock *m;
+
+	grec = c->ptcl;
+
+	v4parseip(grec->south, argv[1]);
+	memmove(grec->north, grec->south, sizeof grec->north);
+
+	qlock(&grec->lock);
+	if(!grec->dlsusp){
+		qunlock(&grec->lock);
+		return "not suspended";
+	}
+	grec->dlsusp = 0;
+	grec->ulsusp = 0;
+
+	while((bp = getring(&grec->dlpending)) != nil){
+
+		assert(bp->rp - bp->base >= sizeof(Metablock));
+		m = (Metablock *)bp->base;
+		assert(m->rp >= bp->base && m->rp < bp->lim);
+
+		bp->rp = m->rp;
+
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->dlbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+
+	while((bp = getring(&grec->ulbuffered)) != nil){
+		gre = (GREhdr *)bp->rp;
+
+		memmove(gre->src, grec->coa, sizeof gre->dst);
+		memmove(gre->dst, grec->south, sizeof gre->dst);
+
+		qunlock(&grec->lock);
+		ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+		qlock(&grec->lock);
+	}
+	qunlock(&grec->lock);
+	return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+	GREconv *grec;
+
+	grec = c->ptcl;
+	grec->ulkey = strtoul(argv[1], nil, 0);
+	return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+	int i;
+
+	if(n < 1)
+		return "too few arguments";
+
+	for(i = 0; i < Ncmds; i++)
+		if(strcmp(f[0], grectls[i].cmd) == 0)
+			break;
+
+	if(i == Ncmds)
+		return "no such command";
+	if(grectls[i].argc != 0 && grectls[i].argc != n)
+		return "incorrect number of arguments";
+
+	return grectls[i].f(c, n, f);
+}
+
 void
 greinit(Fs *fs)
 {
@@ -276,7 +920,7 @@
 	gre->stats = grestats;
 	gre->ipproto = IP_GREPROTO;
 	gre->nc = 64;
-	gre->ptclsize = 0;
+	gre->ptclsize = sizeof(GREconv);
 
 	Fsproto(fs, gre);
 }
diff -u a/os/ip//icmp.c b/os/ip//icmp.c
--- a/os/ip//icmp.c
+++ b/os/ip//icmp.c
@@ -44,11 +44,6 @@
 	Maxtype		= 18,
 };
 
-enum
-{
-	MinAdvise	= 24,	/* minimum needed for us to advise another protocol */ 
-};
-
 char *icmpnames[Maxtype+1] =
 {
 [EchoReply]		"EchoReply",
@@ -70,6 +65,8 @@
 	IP_ICMPPROTO	= 1,
 	ICMP_IPSIZE	= 20,
 	ICMP_HDRSIZE	= 8,
+
+	MinAdvise	= ICMP_IPSIZE+4,	/* minimum needed for us to advise another protocol */ 
 };
 
 enum
@@ -113,7 +110,7 @@
 	c->wq = qbypass(icmpkick, c);
 }
 
-extern char*
+char*
 icmpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -126,11 +123,11 @@
 	return nil;
 }
 
-extern int
+int
 icmpstate(Conv *c, char *state, int n)
 {
 	USED(c);
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		"Datagram",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
 	);
 }
 
-extern char*
+char*
 icmpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
@@ -150,7 +147,7 @@
 	return nil;
 }
 
-extern void
+void
 icmpclose(Conv *c)
 {
 	qclose(c->rq);
@@ -169,8 +166,7 @@
 
 	if(bp == nil)
 		return;
-
-	if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+	if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
 		freeblist(bp);
 		return;
 	}
@@ -190,21 +186,50 @@
 	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
 {
+	uchar addr[IPaddrlen];
+	int i;
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	i = ipforme(f, addr);
+	return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+	uchar addr[IPaddrlen];
+
+	v4tov6(addr, ip4);
+	if(ipismulticast(addr))
+		return 0;
+	return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
 	Block	*nbp;
 	Icmp	*p, *np;
+	uchar	ia[IPv4addrlen];
 
 	p = (Icmp *)bp->rp;
+	if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+		return;
 
-	netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+	netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+		ia, p->src, p->dst);
+
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
 	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
 	np = (Icmp *)nbp->rp;
 	np->vihl = IP_VER4;
+	memmove(np->src, ia, sizeof(np->src));
 	memmove(np->dst, p->src, sizeof(np->dst));
-	v6tov4(np->src, ia);
 	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
 	np->type = TimeExceed;
 	np->code = 0;
@@ -214,7 +239,6 @@
 	memset(np->cksum, 0, sizeof(np->cksum));
 	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
 }
 
 static void
@@ -222,20 +246,10 @@
 {
 	Block	*nbp;
 	Icmp	*p, *np;
-	int	i;
-	uchar	addr[IPaddrlen];
 
 	p = (Icmp *)bp->rp;
-
-	/* only do this for unicast sources and destinations */
-	v4tov6(addr, p->dst);
-	i = ipforme(f, addr);
-	if((i&Runi) == 0)
+	if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
 		return;
-	v4tov6(addr, p->src);
-	i = ipforme(f, addr);
-	if(i != 0 && (i&Runi) == 0)
-		return;
 
 	netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
 	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
 	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmpnoconv(Fs *f, Block *bp)
 {
 	icmpunreachable(f, bp, 3, 0);
 }
 
-extern void
+void
 icmpcantfrag(Fs *f, Block *bp, int mtu)
 {
 	icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
 static void
 goticmpkt(Proto *icmp, Block *bp)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
-	v4tov6(dst, p->src);
+	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
-		if(ipcmp(s->raddr, dst) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+		if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
 {
 	Icmp	*q;
 	uchar	ip[4];
 
 	q = (Icmp *)bp->rp;
+	if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+		return nil;
+
 	q->vihl = IP_VER4;
 	memmove(ip, q->src, sizeof(q->dst));
 	memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
 [3]	"port unreachable",
 [4]	"fragmentation needed and DF set",
 [5]	"source route failed",
+[6]	"destination network unknown",
+[7]	"destination host unknown",
+[8]	"source host isolated",
+[9]	"network administratively prohibited",
+[10]	"host administratively prohibited",
+[11]	"network unreachable for tos",
+[12]	"host unreachable for tos",
+[13]	"communication administratively prohibited",
+[14]	"host precedence violation",
+[15]	"precedence cutoff in effect",
 };
 
 static void
 icmpiput(Proto *icmp, Ipifc*, Block *bp)
 {
-	int	n, iplen;
+	int	n;
 	Icmp	*p;
 	Block	*r;
 	Proto	*pr;
@@ -332,12 +355,10 @@
 	Icmppriv *ipriv;
 
 	ipriv = icmp->priv;
-	
 	ipriv->stats[InMsgs]++;
 
-	p = (Icmp *)bp->rp;
-	netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
-	n = blocklen(bp);
+	bp = concatblock(bp);
+	n = BLEN(bp);
 	if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
 		ipriv->stats[InErrors]++;
 		ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
 		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
 		goto raise;
 	}
-	iplen = nhgets(p->length);
-	if(iplen > n || (iplen % 1)){
-		ipriv->stats[LenErrs]++;
+	if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
 		ipriv->stats[InErrors]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto raise;
-	}
-	if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
-		ipriv->stats[InErrors]++;
 		ipriv->stats[CsumErrs]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto raise;
 	}
+	p = (Icmp *)bp->rp;
+	netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+		(p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+		p->type, p->code);
 	if(p->type <= Maxtype)
 		ipriv->in[p->type]++;
 
 	switch(p->type) {
 	case EchoRequest:
-		if (iplen < n)
-			bp = trimblock(bp, 0, iplen);
-		r = mkechoreply(bp);
+		r = mkechoreply(bp, icmp->f);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 	case Unreachable:
-		if(p->code > 5)
-			msg = unreachcode[1];
-		else
+		if(p->code >= nelem(unreachcode)) {
+			snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+				p->src, p->dst, p->code);
+			msg = m2;
+		} else
 			msg = unreachcode[p->code];
 
+	Advise:
 		bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-		if(blocklen(bp) < MinAdvise){
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs]++;
 			goto raise;
 		}
 		p = (Icmp *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
+		if((nhgets(p->frag) & IP_FO) == 0){
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr != nil && pr->advise != nil) {
+				(*pr->advise)(pr, bp, msg);
+				return;
+			}
 		}
-
 		bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
 		goticmpkt(icmp, bp);
 		break;
 	case TimeExceed:
 		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %V", p->src);
-
-			bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
-			if(blocklen(bp) < MinAdvise){
-				ipriv->stats[LenErrs]++;
-				goto raise;
-			}
-			p = (Icmp *)bp->rp;
-			pr = Fsrcvpcolx(icmp->f, p->proto);
-			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
-				return;
-			}
-			bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+			goto Advise;
 		}
-
 		goticmpkt(icmp, bp);
 		break;
 	default:
@@ -419,22 +428,25 @@
 	freeblist(bp);
 }
 
-void
+static void
 icmpadvise(Proto *icmp, Block *bp, char *msg)
 {
+	ushort	recid;
+	uchar	dst[IPaddrlen], src[IPaddrlen];
 	Conv	**c, *s;
 	Icmp	*p;
-	uchar	dst[IPaddrlen];
-	ushort	recid;
 
 	p = (Icmp *) bp->rp;
 	v4tov6(dst, p->dst);
+	v4tov6(src, p->src);
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, src) == 0)
 		if(ipcmp(s->raddr, dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -443,7 +455,7 @@
 	freeblist(bp);
 }
 
-int
+static int
 icmpstats(Proto *icmp, char *buf, int len)
 {
 	Icmppriv *priv;
@@ -456,7 +468,7 @@
 	for(i = 0; i < Nstats; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
 	for(i = 0; i <= Maxtype; i++){
-		if(icmpnames[i])
+		if(icmpnames[i] != nil)
 			p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
 		else
 			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
 	}
 	return p - buf;
 }
-
-int
-icmpgc(Proto *icmp)
-{
-	return natgc(icmp->ipproto);
-}
-
+	
 void
 icmpinit(Fs *fs)
 {
@@ -487,7 +493,7 @@
 	icmp->stats = icmpstats;
 	icmp->ctl = nil;
 	icmp->advise = icmpadvise;
-	icmp->gc = icmpgc;
+	icmp->gc = nil;
 	icmp->ipproto = IP_ICMPPROTO;
 	icmp->nc = 128;
 	icmp->ptclsize = 0;
diff -u a/os/ip//icmp6.c b/os/ip//icmp6.c
--- a/os/ip//icmp6.c
+++ b/os/ip//icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -7,41 +10,36 @@
 #include "ip.h"
 #include "ipv6.h"
 
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+	InMsgs6,
+	InErrors6,
+	OutMsgs6,
+	CsumErrs6,
+	LenErrs6,
+	HlenErrs6,
+	HoplimErrs6,
+	IcmpCodeErrs6,
+	TargetErrs6,
+	OptlenErrs6,
+	AddrmxpErrs6,
+	RouterAddrErrs6,
 
-struct ICMPpkt {
-	uchar	type;
-	uchar	code;
-	uchar	cksum[2];
-	uchar	icmpid[2];
-	uchar	seq[2];
+	Nstats6,
 };
 
-struct IPICMP {
-	Ip6hdr;
-	ICMPpkt;
+enum {
+	ICMP_USEAD6	= 40,
 };
 
-struct NdiscC
-{
-	IPICMP;
-	uchar target[IPaddrlen];
+enum {
+	Oflag	= 1<<5,
+	Sflag	= 1<<6,
+	Rflag	= 1<<7,
 };
 
-struct Ndpkt
-{
-	NdiscC;
-	uchar otype;
-	uchar olen;	// length in units of 8 octets(incl type, code),
-				// 1 for IEEE 802 addresses
-	uchar lnaddr[6];	// link-layer address
-};
-
-enum {	
-	// ICMPv6 types
+enum {
+	/* ICMPv6 types */
 	EchoReply	= 0,
 	UnreachableV6	= 1,
 	PacketTooBigV6	= 2,
@@ -69,6 +67,65 @@
 	Maxtype6	= 137,
 };
 
+enum {
+	MinAdvise	= IP6HDR+4,	/* minimum needed for us to advise another protocol */ 
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding  */
+#define ICMPHDR \
+	IPV6HDR; \
+	uchar	type; \
+	uchar	code; \
+	uchar	cksum[2]; \
+	uchar	icmpid[2]; \
+	uchar	seq[2]
+
+struct IPICMP {
+	ICMPHDR;
+	uchar	payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+	ICMPHDR;
+	uchar	target[IPaddrlen];
+	uchar	otype;
+	uchar	olen;		/* length in units of 8 octets(incl type, code),
+				 * 1 for IEEE 802 addresses */
+	uchar	lnaddr[6];	/* link-layer address */
+	uchar	payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+	ulong	stats[Nstats6];
+
+	/* message counts */
+	ulong	in[Maxtype6+1];
+	ulong	out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+	uchar	headers;
+} Icmpcb6;
+
 char *icmpnames6[Maxtype6+1] =
 {
 [EchoReply]		"EchoReply",
@@ -95,24 +152,6 @@
 [RedirectV6]		"RedirectV6",
 };
 
-enum
-{
-	InMsgs6,
-	InErrors6,
-	OutMsgs6,
-	CsumErrs6,
-	LenErrs6,
-	HlenErrs6,
-	HoplimErrs6,
-	IcmpCodeErrs6,
-	TargetErrs6,
-	OptlenErrs6,
-	AddrmxpErrs6,
-	RouterAddrErrs6,
-
-	Nstats6,
-};
-
 static char *statnames6[Nstats6] =
 {
 [InMsgs6]	"InMsgs",
@@ -129,49 +168,18 @@
 [RouterAddrErrs6]	"RouterAddrErrs",
 };
 
-typedef struct Icmppriv6
-{
-	ulong	stats[Nstats6];
-
-	/* message counts */
-	ulong	in[Maxtype6+1];
-	ulong	out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6 
-{
-	QLock;
-	uchar headers;
-} Icmpcb6;
-
 static char *unreachcode[] =
 {
-[icmp6_no_route]	"no route to destination",
-[icmp6_ad_prohib]	"comm with destination administratively prohibited",
-[icmp6_unassigned]	"icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach]	"address unreachable",
-[icmp6_port_unreach]	"port unreachable",
-[icmp6_unkn_code]	"icmp unreachable: unknown code",
+[Icmp6_no_route]	"no route to destination",
+[Icmp6_ad_prohib]	"comm with destination administratively prohibited",
+[Icmp6_out_src_scope]	"beyond scope of source address",
+[Icmp6_adr_unreach]	"address unreachable",
+[Icmp6_port_unreach]	"port unreachable",
+[Icmp6_gress_src_fail]	"source address failed ingress/egress policy",
+[Icmp6_rej_route]	"reject route to destination",
+[Icmp6_unknown]		"icmp unreachable: unknown code",
 };
 
-enum {
-	ICMP_USEAD6	= 40,
-};
-
-enum {
-	Oflag	= 1<<5,
-	Sflag	= 1<<6,
-	Rflag	= 1<<7,
-};
-
-enum {
-	slladd	= 1,
-	tlladd	= 2,
-	prfinfo	= 3,
-	redhdr	= 4,
-	mtuopt	= 5,
-};
-
 static void icmpkick6(void *x, Block *bp);
 
 static void
@@ -185,13 +193,14 @@
 set_cksum(Block *bp)
 {
 	IPICMP *p = (IPICMP *)(bp->rp);
+	int n = blocklen(bp);
 
-	hnputl(p->vcf, 0);  // borrow IP header as pseudoheader
-	hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+	hnputl(p->vcf, 0);  	/* borrow IP header as pseudoheader */
+	hnputs(p->ploadlen, n - IP6HDR);
 	p->proto = 0;
-	p->ttl = ICMPv6;	// ttl gets set later
+	p->ttl = ICMPv6;	/* ttl gets set later */
 	hnputs(p->cksum, 0);
-	hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+	hnputs(p->cksum, ptclcsum(bp, 0, n));
 	p->proto = ICMPv6;
 }
 
@@ -198,7 +207,8 @@
 static Block *
 newIPICMP(int packetlen)
 {
-	Block	*nbp;
+	Block *nbp;
+
 	nbp = allocb(packetlen);
 	nbp->wp += packetlen;
 	memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
 	return nbp;
 }
 
-void
+static void
 icmpadvise6(Proto *icmp, Block *bp, char *msg)
 {
-	Conv	**c, *s;
-	IPICMP	*p;
-	ushort	recid;
+	ushort recid;
+	Conv **c, *s;
+	IPICMP *p;
 
-	p = (IPICMP *) bp->rp;
+	p = (IPICMP *)bp->rp;
 	recid = nhgets(p->icmpid);
 
-	for(c = icmp->conv; *c; c++) {
-		s = *c;
+	for(c = icmp->conv; (s = *c) != nil; c++){
 		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->src) == 0)
 		if(ipcmp(s->raddr, p->dst) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -230,9 +242,9 @@
 static void
 icmpkick6(void *x, Block *bp)
 {
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Conv *c = x;
 	IPICMP *p;
-	uchar laddr[IPaddrlen], raddr[IPaddrlen];
 	Icmppriv6 *ipriv = c->p->priv;
 	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
 
@@ -249,10 +261,10 @@
 		bp->rp += IPaddrlen;
 		ipmove(raddr, bp->rp);
 		bp->rp += IPaddrlen;
-		bp = padblock(bp, sizeof(Ip6hdr));
+		bp = padblock(bp, IP6HDR);
 	}
 
-	if(blocklen(bp) < sizeof(IPICMP)){
+	if(BLEN(bp) < IPICMPSZ){
 		freeblist(bp);
 		return;
 	}
@@ -268,23 +280,20 @@
 
 	set_cksum(bp);
 	p->vcf[0] = 0x06 << 4;
-	if(p->type <= Maxtype6)	
+	if(p->type <= Maxtype6)
 		ipriv->out[p->type]++;
 	ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
 }
 
-char*
+static char*
 icmpctl6(Conv *c, char **argv, int argc)
 {
 	Icmpcb6 *icb;
 
 	icb = (Icmpcb6*) c->ptcl;
-
-	if(argc==1) {
-		if(strcmp(argv[0], "headers")==0) {
-			icb->headers = 6;
-			return nil;
-		}
+	if(argc==1 && strcmp(argv[0], "headers")==0) {
+		icb->headers = 6;
+		return nil;
 	}
 	return "unknown control request";
 }
@@ -292,41 +301,39 @@
 static void
 goticmpkt6(Proto *icmp, Block *bp, int muxkey)
 {
-	Conv	**c, *s;
-	IPICMP	*p = (IPICMP *)bp->rp;
-	ushort	recid; 
-	uchar 	*addr;
+	ushort recid;
+	uchar *addr;
+	Conv **c, *s;
+	IPICMP *p = (IPICMP *)bp->rp;
 
 	if(muxkey == 0) {
 		recid = nhgets(p->icmpid);
 		addr = p->src;
-	}
-	else {
+	} else {
 		recid = muxkey;
 		addr = p->dst;
 	}
-
-	for(c = icmp->conv; *c; c++){
-		s = *c;
-		if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
-			bp = concatblock(bp);
-			if(bp != nil)
-				qpass(s->rq, bp);
-			return;
-		}
+	for(c = icmp->conv; (s = *c) != nil; c++){
+		if(s->lport == recid)
+		if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+			qpass(s->rq, copyblock(bp, blocklen(bp)));
 	}
-
 	freeblist(bp);
 }
 
 static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
 {
+	uchar addr[IPaddrlen];
 	IPICMP *p = (IPICMP *)(bp->rp);
-	uchar	addr[IPaddrlen];
 
+	if(isv6mcast(p->src))
+		return nil;
 	ipmove(addr, p->src);
-	ipmove(p->src, p->dst);
+	if(!isv6mcast(p->dst))
+		ipmove(p->src, p->dst);
+	else if (!ipv6local(ifc, p->src, 0, addr))
+		return nil;
 	ipmove(p->dst, addr);
 	p->type = EchoReplyV6;
 	set_cksum(bp);
@@ -335,49 +342,43 @@
 
 /*
  * sends out an ICMPv6 neighbor solicitation
- * 	suni == SRC_UNSPEC or SRC_UNI, 
+ * 	suni == SRC_UNSPEC or SRC_UNI,
  *	tuni == TARG_MULTI => multicast for address resolution,
  * 	and tuni == TARG_UNI => neighbor reachability.
  */
-
-extern void
+void
 icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-
-	nbp = newIPICMP(sizeof(Ndpkt));
+	nbp = newIPICMP(NDPKTSZ);
 	np = (Ndpkt*) nbp->rp;
 
+	if(suni == SRC_UNSPEC)
+		ipmove(np->src, v6Unspecified);
+	else
+		ipmove(np->src, src);
 
-	if(suni == SRC_UNSPEC) 
-		memmove(np->src, v6Unspecified, IPaddrlen);
-	else 
-		memmove(np->src, src, IPaddrlen);
-
 	if(tuni == TARG_UNI)
-		memmove(np->dst, targ, IPaddrlen);
+		ipmove(np->dst, targ);
 	else
 		ipv62smcast(np->dst, targ);
 
 	np->type = NbrSolicit;
 	np->code = 0;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 	if(suni != SRC_UNSPEC) {
-		np->otype = SRC_LLADDRESS;
-		np->olen = 1;	/* 1+1+6 = 8 = 1 8-octet */
+		np->otype = SRC_LLADDR;
+		np->olen = 1;		/* 1+1+6 = 8 = 1 8-octet */
 		memmove(np->lnaddr, mac, sizeof(np->lnaddr));
-	}
-	else {
-		int r = sizeof(Ndpkt)-sizeof(NdiscC);
-		nbp->wp -= r;
-	}
+	} else
+		nbp->wp -= NDPKTSZ - NDISCSZ;
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
 /*
  * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
  */
-extern void
+void
 icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
 {
-	Block	*nbp;
+	Block *nbp;
 	Ndpkt *np;
 	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
 
-	nbp = newIPICMP(sizeof(Ndpkt));
-	np = (Ndpkt*) nbp->rp;
+	nbp = newIPICMP(NDPKTSZ);
+	np = (Ndpkt*)nbp->rp;
 
-	memmove(np->src, src, IPaddrlen);
-	memmove(np->dst, dst, IPaddrlen);
+	ipmove(np->src, src);
+	ipmove(np->dst, dst);
 
 	np->type = NbrAdvert;
 	np->code = 0;
 	np->icmpid[0] = flags;
-	memmove(np->target, targ, IPaddrlen);
+	ipmove(np->target, targ);
 
-	np->otype = TARGET_LLADDRESS;
-	np->olen = 1;	
+	np->otype = TARGET_LLADDR;
+	np->olen = 1;
 	memmove(np->lnaddr, mac, sizeof(np->lnaddr));
 
 	set_cksum(nbp);
-	np = (Ndpkt*) nbp->rp;
+	np = (Ndpkt*)nbp->rp;
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[NbrAdvert]++;
-	netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+	netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+		return;
 
-	if(isv6mcast(p->src)) 
-		goto clean;
+	netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
 	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
-
-	rlock(ifc);
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
-		freeblist(nbp);
-		if(free) 
-			goto clean;
-		else
-			return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = UnreachableV6;
 	np->code = code;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
 	ipriv->out[UnreachableV6]++;
 
-	if(free)
+	if(tome)
 		ipiput6(f, ifc, nbp);
-	else {
+	else 
 		ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-		return;
-	}
-
-clean:
-	runlock(ifc);
-	freeblist(bp);
 }
 
-extern void
+void
 icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
+	netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
+
 	nbp = newIPICMP(sz);
 	np = (IPICMP *) nbp->rp;
-
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = TimeExceedV6;
 	np->code = 0;
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
 	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
 }
 
-extern void
+void
 icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
 {
+	int osz = BLEN(bp);
+	int sz = MIN(IPICMPSZ + osz, v6MINTU);
 	Block *nbp;
 	IPICMP *np;
-	Ip6hdr	*p;
-	int osz = BLEN(bp);
-	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
-	Proto	*icmp = f->t2p[ICMPv6];
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
 	Icmppriv6 *ipriv = icmp->priv;
+	uchar ia[IPaddrlen];
 
-	p = (Ip6hdr *) bp->rp;
-
-	if(isv6mcast(p->src)) 
+	p = (Ip6hdr *)bp->rp;
+	if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
 		return;
 
-	nbp = newIPICMP(sz);
-	np = (IPICMP *) nbp->rp;
+	netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+		ia, p->src, p->dst);
 
-	if(ipv6anylocal(ifc, np->src)) {
-		netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
-	}
-	else {
-		netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
-		return;
-	}
-
-	memmove(np->dst, p->src, IPaddrlen);
+	nbp = newIPICMP(sz);
+	np = (IPICMP *)nbp->rp;
+	ipmove(np->src, ia);
+	ipmove(np->dst, p->src);
 	np->type = PacketTooBigV6;
 	np->code = 0;
 	hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
-	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
 	set_cksum(nbp);
 	np->ttl = HOP_LIMIT;
 	np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
  * RFC 2461, pages 39-40, pages 57-58.
  */
 static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
-	int 	sz, osz, unsp, n, ttl, iplen;
-	int 	pktsz = BLEN(bp);
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *) packet;
-	Ndpkt	*np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+	int sz, osz, unsp, ttl;
+	int pktsz = BLEN(bp);
+	uchar *packet = bp->rp;
+	IPICMP *p = (IPICMP *) packet;
+	Ndpkt *np;
 
-	USED(ifc);
-	n = blocklen(bp);
-	if(n < sizeof(IPICMP)) {
+	if(pktsz < IPICMPSZ) {
 		ipriv->stats[HlenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
 		goto err;
 	}
 
-	iplen = nhgets(p->ploadlen);
-	if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
-		ipriv->stats[LenErrs6]++;
-		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
-		goto err;
-	}
-
-	// Rather than construct explicit pseudoheader, overwrite IPv6 header
+	/* Rather than construct explicit pseudoheader, overwrite IPv6 header */
 	if(p->proto != ICMPv6) {
-		// This code assumes no extension headers!!!
+		/* This code assumes no extension headers!!! */
 		netlog(icmp->f, Logicmp, "icmp error: extension header\n");
 		goto err;
 	}
@@ -586,7 +553,7 @@
 	ttl = p->ttl;
 	p->ttl = p->proto;
 	p->proto = 0;
-	if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+	if(ptclcsum(bp, 0, pktsz)) {
 		ipriv->stats[CsumErrs6]++;
 		netlog(icmp->f, Logicmp, "icmp checksum error\n");
 		goto err;
@@ -595,19 +562,16 @@
 	p->ttl = ttl;
 
 	/* additional tests for some pkt types */
-	if( (p->type == NbrSolicit) ||
-		(p->type == NbrAdvert) ||
-		(p->type == RouterAdvert) ||
-		(p->type == RouterSolicit) ||
-		(p->type == RedirectV6) ) {
-
+	if (p->type == NbrSolicit   || p->type == NbrAdvert ||
+	    p->type == RouterAdvert || p->type == RouterSolicit ||
+	    p->type == RedirectV6) {
 		if(p->ttl != HOP_LIMIT) {
-			ipriv->stats[HoplimErrs6]++; 
-			goto err; 
+			ipriv->stats[HoplimErrs6]++;
+			goto err;
 		}
 		if(p->code != 0) {
-			ipriv->stats[IcmpCodeErrs6]++; 
-			goto err; 
+			ipriv->stats[IcmpCodeErrs6]++;
+			goto err;
 		}
 
 		switch (p->type) {
@@ -615,82 +579,78 @@
 		case NbrAdvert:
 			np = (Ndpkt*) p;
 			if(isv6mcast(np->target)) {
-				ipriv->stats[TargetErrs6]++; 
-				goto err; 
+				ipriv->stats[TargetErrs6]++;
+				goto err;
 			}
-			if(optexsts(np) && (np->olen == 0)) {
-				ipriv->stats[OptlenErrs6]++; 
-				goto err; 
+			if(optexsts(np) && np->olen == 0) {
+				ipriv->stats[OptlenErrs6]++;
+				goto err;
 			}
-		
-			if(p->type == NbrSolicit) {
-				if(ipcmp(np->src, v6Unspecified) == 0) { 
-					if(!issmcast(np->dst) || optexsts(np))  {
-						ipriv->stats[AddrmxpErrs6]++; 
-						goto err;
-					}
+
+			if (p->type == NbrSolicit &&
+			    ipcmp(np->src, v6Unspecified) == 0)
+				if(!issmcast(np->dst) || optexsts(np)) {
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
-		
-			if(p->type == NbrAdvert) {
-				if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
-					ipriv->stats[AddrmxpErrs6]++; 
-					goto err; 
+
+			if(p->type == NbrAdvert)
+				if(isv6mcast(np->dst) &&
+				    (nhgets(np->icmpid) & Sflag)){
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
 				}
-			}
 			break;
-	
+
 		case RouterAdvert:
-			if(pktsz - sizeof(Ip6hdr) < 16) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 16) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			if(!islinklocal(p->src)) {
-				ipriv->stats[RouterAddrErrs6]++; 
-				goto err; 
+				ipriv->stats[RouterAddrErrs6]++;
+				goto err;
 			}
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
 				if(osz <= 0) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
-				}	
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
+				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RouterSolicit:
-			if(pktsz - sizeof(Ip6hdr) < 8) {
-				ipriv->stats[HlenErrs6]++; 
-				goto err; 
+			if(pktsz - IP6HDR < 8) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
 			}
 			unsp = (ipcmp(p->src, v6Unspecified) == 0);
-			sz = sizeof(IPICMP) + 8;
-			while ((sz+1) < pktsz) {
-				osz = *(packet+sz+1);
-				if((osz <= 0) ||
-					(unsp && (*(packet+sz) == slladd)) ) {
-					ipriv->stats[OptlenErrs6]++; 
-					goto err; 
+			sz = IPICMPSZ + 8;
+			while (sz+8 <= pktsz) {
+				osz = packet[sz+1];
+				if(osz <= 0 ||
+				    (unsp && packet[sz] == SRC_LLADDR)) {
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
 				}
 				sz += 8*osz;
 			}
 			break;
-	
+
 		case RedirectV6:
-			//to be filled in
+			/* to be filled in */
 			break;
-	
+
 		default:
 			goto err;
 		}
 	}
-
 	return 1;
-
 err:
-	ipriv->stats[InErrors6]++; 
+	ipriv->stats[InErrors6]++;
 	return 0;
 }
 
@@ -700,169 +660,162 @@
 	Iplifc *lifc;
 	int t;
 
-	rlock(ifc);
-	if(ipproxyifc(f, ifc, target)) {
-		runlock(ifc);
-		return t_uniproxy;
-	}
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, target) == 0) {
-			t = (lifc->tentative) ? t_unitent : t_unirany; 
-			runlock(ifc);
-			return t;
-		}
-	}
-
-	runlock(ifc);
-	return 0;
+	if((lifc = iplocalonifc(ifc, target)) != nil)
+		t = lifc->tentative? Tunitent: Tunirany;
+	else if(ipproxyifc(f, ifc, target))
+		t = Tuniproxy;
+	else
+		t = 0;
+	return t;
 }
 
 static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
 {
-	uchar	*packet = bp->rp;
-	IPICMP	*p = (IPICMP *)packet;
-	Icmppriv6 *ipriv = icmp->priv;
-	Block	*r;
-	Proto	*pr;
-	char	*msg, m2[128];
-	Ndpkt* np;
+	char *msg, m2[128];
 	uchar pktflags;
-	uchar lsrc[IPaddrlen];
-	int refresh = 1;
+	uchar ia[IPaddrlen];
+	Block *r;
+	IPICMP *p;
+	Icmppriv6 *ipriv = icmp->priv;
 	Iplifc *lifc;
+	Ndpkt* np;
+	Proto *pr;
 
-	if(!valid(icmp, ipifc, bp, ipriv)) 
-		goto raise;
+	bp = concatblock(bp);
+	p = (IPICMP*)bp->rp;
 
-	if(p->type <= Maxtype6)
-		ipriv->in[p->type]++;
-	else
+	if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
 		goto raise;
 
+	ipriv->in[p->type]++;
+
 	switch(p->type) {
 	case EchoRequestV6:
-		r = mkechoreply6(bp);
+		r = mkechoreply6(bp, ifc);
+		if(r == nil)
+			goto raise;
 		ipriv->out[EchoReply]++;
 		ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
 		break;
 
 	case UnreachableV6:
-		if(p->code > 4)
-			msg = unreachcode[icmp6_unkn_code];
+		if(p->code >= nelem(unreachcode))
+			msg = unreachcode[Icmp6_unknown];
 		else
 			msg = unreachcode[p->code];
-
-		bp->rp += sizeof(IPICMP);
-		if(blocklen(bp) < 8){
+	Advise:
+		bp->rp += IPICMPSZ;
+		if(BLEN(bp) < MinAdvise){
 			ipriv->stats[LenErrs6]++;
 			goto raise;
 		}
 		p = (IPICMP *)bp->rp;
-		pr = Fsrcvpcolx(icmp->f, p->proto);
-		if(pr != nil && pr->advise != nil) {
-			(*pr->advise)(pr, bp, msg);
-			return;
-		}
 
-		bp->rp -= sizeof(IPICMP);
-		goticmpkt6(icmp, bp, 0);
-		break;
-
-	case TimeExceedV6:
-		if(p->code == 0){
-			sprint(m2, "ttl exceeded at %I", p->src);
-
-			bp->rp += sizeof(IPICMP);
-			if(blocklen(bp) < 8){
-				ipriv->stats[LenErrs6]++;
-				goto raise;
+		/* get rid of fragment header if this is the first fragment */
+		if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+			Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+			if((nhgets(fh->offsetRM) & ~7) == 0){	/* first fragment */
+				p->proto = fh->nexthdr;
+				/* copy down payload over fragment header */
+				bp->rp += IP6HDR;
+				bp->wp -= IP6FHDR;
+				memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+				hnputs(p->ploadlen, BLEN(bp));
+				bp->rp -= IP6HDR;
 			}
-			p = (IPICMP *)bp->rp;
+		}
+		if(p->proto != FH){
 			pr = Fsrcvpcolx(icmp->f, p->proto);
 			if(pr != nil && pr->advise != nil) {
-				(*pr->advise)(pr, bp, m2);
+				(*pr->advise)(pr, bp, msg);
 				return;
 			}
-			bp->rp -= sizeof(IPICMP);
 		}
+		bp->rp -= IPICMPSZ;
+		goticmpkt6(icmp, bp, 0);
+		break;
 
+	case TimeExceedV6:
+		if(p->code == 0){
+			snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+			goto Advise;
+		}
+		if(p->code == 1){
+			snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+			goto Advise;
+		}
 		goticmpkt6(icmp, bp, 0);
 		break;
 
+	case PacketTooBigV6:
+		snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+			(ulong)nhgetl(p->icmpid), p->src);
+		goto Advise;
+
 	case RouterAdvert:
 	case RouterSolicit:
-		/* using lsrc as a temp, munge hdr for goticmp6 
-		memmove(lsrc, p->src, IPaddrlen);
-		memmove(p->src, p->dst, IPaddrlen);
-		memmove(p->dst, lsrc, IPaddrlen); */
-
 		goticmpkt6(icmp, bp, p->type);
 		break;
 
 	case NbrSolicit:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 		pktflags = 0;
-		switch (targettype(icmp->f, ipifc, np->target)) {
-		case t_unirany:
+		if(ifc->sendra6)
+			pktflags |= Rflag;
+		switch (targettype(icmp->f, ifc, np->target)) {
+		case Tunirany:
 			pktflags |= Oflag;
 			/* fall through */
 
-		case t_uniproxy: 
-			if(ipcmp(np->src, v6Unspecified) != 0) {
-				arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+		case Tuniproxy:
+			if(ipv6local(ifc, ia, 0, np->src)) {
+				if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+					break;
 				pktflags |= Sflag;
-			}
-			if(ipv6local(ipifc, lsrc)) {
-				icmpna(icmp->f, lsrc, 
-				   (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
-				   np->target, ipifc->mac, pktflags); 
-			}
-			else
-				freeblist(bp);
+			} else
+				ipmove(ia, np->target);
+			icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+				np->target, ifc->mac, pktflags);
 			break;
-
-		case t_unitent:
-			/* not clear what needs to be done. send up
-			 * an icmp mesg saying don't use this address? */
-
-		default:
-			freeblist(bp);
+		case Tunitent:
+			/*
+			 * not clear what needs to be done. send up
+			 * an icmp mesg saying don't use this address?
+			 */
+			break;
 		}
-
+		freeblist(bp);
 		break;
 
 	case NbrAdvert:
-		np = (Ndpkt*) p;
+		np = (Ndpkt*)p;
 
-		/* if the target address matches one of the local interface 
-		 * address and the local interface address has tentative bit set, 
-		 * then insert into ARP table. this is so the duplication address 
-		 * detection part of ipconfig can discover duplication through 
-		 * the arp table
+		/*
+		 * if the target address matches one of the local interface
+		 * addresses and the local interface address has tentative bit
+		 * set, insert into ARP table. this is so the duplicate address
+		 * detection part of ipconfig can discover duplication through
+		 * the arp table.
 		 */
-		lifc = iplocalonifc(ipifc, np->target);
-		if(lifc && lifc->tentative)
-			refresh = 0;
-		arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+		lifc = iplocalonifc(ifc, np->target);
+		if(lifc != nil && lifc->tentative)
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+		else if(ipv6local(ifc, ia, 0, np->target))
+			arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
 		freeblist(bp);
 		break;
 
-	case PacketTooBigV6:
-
 	default:
 		goticmpkt6(icmp, bp, 0);
 		break;
 	}
 	return;
-
 raise:
 	freeblist(bp);
-
 }
 
-int
+static int
 icmpstats6(Proto *icmp6, char *buf, int len)
 {
 	Icmppriv6 *priv;
@@ -874,23 +827,28 @@
 	e = p+len;
 	for(i = 0; i < Nstats6; i++)
 		p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
-	for(i = 0; i <= Maxtype6; i++){
+	for(i = 0; i <= Maxtype6; i++)
 		if(icmpnames6[i])
-			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/*		else
-			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
-	}
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+				priv->in[i], priv->out[i]);
 	return p - buf;
 }
 
 
-// need to import from icmp.c
+/* import from icmp.c */
 extern int	icmpstate(Conv *c, char *state, int n);
 extern char*	icmpannounce(Conv *c, char **argv, int argc);
 extern char*	icmpconnect(Conv *c, char **argv, int argc);
 extern void	icmpclose(Conv *c);
 
+static void
+icmpclose6(Conv *c)
+{
+	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+	icb->headers = 0;
+	icmpclose(c);
+}
+
 void
 icmp6init(Fs *fs)
 {
@@ -902,7 +860,7 @@
 	icmp6->announce = icmpannounce;
 	icmp6->state = icmpstate;
 	icmp6->create = icmpcreate6;
-	icmp6->close = icmpclose;
+	icmp6->close = icmpclose6;
 	icmp6->rcv = icmpiput6;
 	icmp6->stats = icmpstats6;
 	icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
 
 	Fsproto(fs, icmp6);
 }
-
diff -u a/os/ip//igmp.c b/os/ip//igmp.c
--- a/os/ip//igmp.c
+++ b/os/ip//igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -40,8 +44,12 @@
 	uchar	unused;
 	uchar	igmpcksum[2];		/* checksum of igmp portion */
 	uchar	group[IPaddrlen];	/* multicast group */
+
+	uchar	payload[];
 };
 
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
 /*
  *  lists for group reports
  */
@@ -49,7 +57,7 @@
 struct IGMPrep
 {
 	IGMPrep		*next;
-	Media		*m;
+	Medium		*m;
 	int		ticks;
 	Multicast	*multi;
 };
@@ -76,19 +84,17 @@
 } stats;
 
 void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
 {
 	IGMPpkt *p;
 	Block *bp;
 
 	bp = allocb(sizeof(IGMPpkt));
-	if(bp == nil)
-		return;
 	p = (IGMPpkt*)bp->wp;
 	p->vihl = IP_VER4;
-	bp->wp += sizeof(IGMPpkt);
-	memset(bp->rp, 0, sizeof(IGMPpkt));
-	hnputl(p->src, Mediagetaddr(m));
+	bp->wp += IGMPPKTSZ;
+	memset(bp->rp, 0, IGMPPKTSZ);
+	hnputl(p->src, Mediumgetaddr(m));
 	hnputl(p->dst, Ipallsys);
 	p->vertype = (1<<4) | IGMPreport;
 	p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
 }
 
 void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
 {
 	int n;
 	IGMPpkt *ghp;
@@ -206,7 +212,7 @@
 		if(rp != nil)
 			break;	/* already reporting */
 
-		mp = Mediacopymulti(m);
+		mp = Mediumcopymulti(m);
 		if(mp == nil)
 			break;
 
@@ -285,7 +291,7 @@
 	igmp.ptclsize = 0;
 
 	igmpreportfn = igmpsendreport;
-	kproc("igmpproc", igmpproc, 0, 0);
+	kproc("igmpproc", igmpproc, 0);
 
 	Fsproto(fs, &igmp);
 }
diff -u a/os/ip//il.c b/os/ip//il.c
--- a/os/ip//il.c
+++ b/os/ip//il.c
@@ -189,7 +189,7 @@
 {
 	Ipht	ht;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 
 	ulong	csumerr;		/* checksum errors */
 	ulong	hlenerr;		/* header length error */
@@ -208,7 +208,7 @@
 
 
 void	ilrcvmsg(Conv*, Block*);
-void	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
 void	ilackq(Ilcb*, Block*);
 void	ilprocess(Conv*, Ilhdr*, Block*);
 void	ilpullup(Conv*);
@@ -251,6 +251,9 @@
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
+	if(c->ipversion != V4)
+		return "only IP version 4 supported";
+		
 	return ilstart(c, IL_CONNECT, fast);
 }
 
@@ -260,7 +263,7 @@
 	Ilcb *ic;
 
 	ic = (Ilcb*)(c->ptcl);
-	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
 		ilstates[ic->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -548,6 +551,9 @@
 
 	ih = (Ilhdr *)bp->rp;
 	plen = blocklen(bp);
+	if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+		goto raise;	/* ignore non V4 packets */
+
 	if(plen < IL_IPSIZE+IL_HDRSIZE){
 		netlog(il->f, Logil, "il: hlenerr\n");
 		ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
 		else
 			st = iltype[ih->iltype];
 		ipriv->stats[CsumErrs]++;
-		netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+		netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
 			csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
 		goto raise;
 	}
@@ -595,7 +601,7 @@
 			else
 				st = iltype[ih->iltype];
 			ilreject(il->f, ih);		/* no channel and not sync */
-			netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+			netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
 				st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); 
 			goto raise;
 		}
@@ -829,7 +835,7 @@
 
 	c = ic->conv;
 	id = nhgetl(h->ilid);
-	netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+	netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
 		ic->rexmit, ic->timeout,
 		c->raddr, c->lport, c->rport);
 
@@ -852,7 +858,7 @@
 	ic = (Ilcb*)s->ptcl;
 
 	USED(ic);
-	netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
 		ilstates[ic->state],  ic->rstart, ic->recvd, ic->start, 
 		ic->next, iltype[h->iltype], nhgetl(h->ilid), 
 		nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
 
 	_ilprocess(s, h, bp);
 
-	netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+	netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
 }
 
 void
@@ -917,17 +923,12 @@
 		bp->list = nil;
 		dlen = nhgets(oh->illen)-IL_HDRSIZE;
 		bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+			
 		/*
 		 * Upper levels don't know about multiple-block
 		 * messages so copy all into one (yick).
 		 */
-		bp = concatblock(bp);
-		if(bp == 0)
-			panic("ilpullup");
-		bp = packblock(bp);
-		if(bp == 0)
-			panic("ilpullup2");
-		qpass(s->rq, bp);
+		qpass(s->rq, packblock(concatblock(bp)));
 	}
 	qunlock(&ic->outo);
 }
@@ -948,7 +949,7 @@
 	id = nhgetl(h->ilid);
 	/* Window checks */
 	if(id <= ic->recvd || id > ic->recvd+ic->window) {
-		netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+		netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
 			id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
 		freeblist(bp);
 		return;
@@ -983,7 +984,7 @@
 	qunlock(&ic->outo);
 }
 
-void
+int
 ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
 {
 	Ilhdr *ih;
@@ -1034,7 +1035,7 @@
 		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
 
 if(ipc==nil)
-	panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+	panic("ipc is nil caller is %#p", getcallerpc(&ipc));
 if(ipc->p==nil)
 	panic("ipc->p is nil");
 
@@ -1042,7 +1043,7 @@
 		iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), 
 		nhgets(ih->ilsrc), nhgets(ih->ildst));
 
-	ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+	return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
 }
 
 void
@@ -1145,6 +1146,8 @@
 
 	il = x;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Iltickms);
 	for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
 		qlock(&ipriv->apl);
 		if(ipriv->ackprocstarted == 0){
 			sprint(kpname, "#I%dilack", c->p->f->dev);
-			kproc(kpname, ilackproc, c->p, 0);
+			kproc(kpname, ilackproc, c->p);
 			ipriv->ackprocstarted = 1;
 		}
 		qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
 	case IL_CONNECT:
 		ic->state = Ilsyncer;
 		iphtadd(&ipriv->ht, c);
-		ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+		if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+			ilhangup(c, "no route");
 		break;
 	}
 
@@ -1332,6 +1336,8 @@
 		if(s->lport == psource)
 		if(ipcmp(s->laddr, source) == 0)
 		if(ipcmp(s->raddr, dest) == 0){
+			if(s->ignoreadvice)
+				break;
 			qunlock(il);
 			ic = (Ilcb*)s->ptcl;
 			switch(ic->state){
@@ -1380,12 +1386,6 @@
 	}
 }
 
-int
-ilgc(Proto *il)
-{
-	return natgc(il->ipproto);
-}
-
 void
 ilinit(Fs *f)
 {
@@ -1406,7 +1406,7 @@
 	il->advise = iladvise;
 	il->stats = ilxstats;
 	il->inuse = ilinuse;
-	il->gc = ilgc;
+	il->gc = nil;
 	il->ipproto = IP_ILPROTO;
 	il->nc = scalednconv();
 	il->ptclsize = sizeof(Ilcb);
diff -u a/os/ip//ip.c b/os/ip//ip.c
--- a/os/ip//ip.c
+++ b/os/ip//ip.c
@@ -7,94 +7,6 @@
 
 #include	"ip.h"
 
-typedef struct IP		IP;
-typedef struct Fragment4	Fragment4;
-typedef struct Fragment6	Fragment6;
-typedef struct Ipfrag		Ipfrag;
-
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= 64*1024,	/* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp)	(((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
-
-	Nstats,
-};
-
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
-
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
-
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 static char *statnames[] =
 {
 [Forwarding]	"Forwarding",
@@ -118,45 +30,11 @@
 [FragCreates]	"FragCreates",
 };
 
-#define BLKIP(xp)	((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
+static Block*		ip4reassemble(IP*, int, Block*);
+static void		ipfragfree4(IP*, Fragment4*);
+static Fragment4*	ipfragallo4(IP*);
 
-ushort		ipcsum(uchar*);
-Block*		ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void		ipfragfree4(IP*, Fragment4*);
-Fragment4*	ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
-	V6params *v6p;
-
-	v6p = smalloc(sizeof(V6params));
-	
-	v6p->rp.mflag		= 0;		// default not managed
-	v6p->rp.oflag		= 0;
-	v6p->rp.maxraint	= 600000;	// millisecs
-	v6p->rp.minraint	= 200000;
-	v6p->rp.linkmtu		= 0;		// no mtu sent
-	v6p->rp.reachtime	= 0;
-	v6p->rp.rxmitra		= 0;
-	v6p->rp.ttl		= MAXTTL;
-	v6p->rp.routerlt	= 3*(v6p->rp.maxraint);	
-
-	v6p->hp.rxmithost	= 1000;		// v6 RETRANS_TIMER
-
-	v6p->cdrouter 		= -1;
-
-	f->v6p			= v6p;
-
-}
-
-void
+static void
 initfrag(IP *ip, int size)
 {
 	Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
 	IP *ip;
 
 	ip = smalloc(sizeof(IP));
+	ip->stats[DefaultTTL] = MAXTTL;
 	initfrag(ip, 100);
 	f->ip = ip;
 
@@ -202,11 +81,11 @@
 	if(f->ip->iprouting==0)
 		f->ip->stats[Forwarding] = 2;
 	else
-		f->ip->stats[Forwarding] = 1;	
+		f->ip->stats[Forwarding] = 1;
 }
 
 int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
 	Ipifc *ifc;
 	uchar *gate;
@@ -213,66 +92,41 @@
 	ulong fragoff;
 	Block *xp, *nb;
 	Ip4hdr *eh, *feh;
-	int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
-	Route *r, *sr;
+	int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+	Route *r;
 	IP *ip;
 	int rv = 0;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip4hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip4hdr*)bp->rp;
+	assert(BLEN(bp) >= IP4HDR);
 	len = blocklen(bp);
-
-	if(gating){
-		chunk = nhgets(eh->length);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk < len)
-			len = chunk;
-	}
 	if(len >= IP_MAX){
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v4lookup(f, eh->dst, c);
-	if(r == nil){
+	r = v4lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %V\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v4lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v4.gate;
 
-	if(!gating)
-		eh->vihl = IP_VER4|IP_HLEN4;
-	eh->ttl = ttl;
-	if(!gating)
-		eh->tos = tos;
-
-	if(!canrlock(ifc))
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
+	}
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
@@ -280,17 +134,18 @@
 	if(ifc->m == nil)
 		goto raise;
 
-	/* Output NAT */
-	if(nato(bp, ifc, f) != 0)
-		goto raise;
+	if(!gating){
+		eh->vihl = IP_VER4|IP_HLEN4;
+		eh->tos = tos;
+	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		if(!gating)
-			hnputs(eh->id, incref(&ip->id4));
 		hnputs(eh->length, len);
 		if(!gating){
+			hnputs(eh->id, incref(&ip->id4));
 			eh->frag[0] = 0;
 			eh->frag[1] = 0;
 		}
@@ -297,31 +152,31 @@
 		eh->cksum[0] = 0;
 		eh->cksum[1] = 0;
 		hnputs(eh->cksum, ipcsum(&eh->vihl));
-		ifc->m->bwrite(ifc, bp, V4, gate);
+
+		ipifcoput(ifc, bp, V4, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
 	if(eh->frag[0] & (IP_DF>>8)){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
 		icmpcantfrag(f, bp, medialen);
-		netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	seglen = (medialen - IP4HDR) & ~7;
+	hlen = (eh->vihl & 0xF)<<2;
+	seglen = (medialen - hlen) & ~7;
 	if(seglen < 8){
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
-	dlen = len - IP4HDR;
+	dlen = len - hlen;
 	xp = bp;
 	if(gating)
 		lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
 	else
 		lid = incref(&ip->id4);
 
-	offset = IP4HDR;
-	while(xp != nil && offset && offset >= BLEN(xp)) {
+	offset = hlen;
+	while(offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
@@ -341,30 +196,30 @@
 		fragoff = 0;
 	dlen += fragoff;
 	for(; fragoff < dlen; fragoff += seglen) {
-		nb = allocb(IP4HDR+seglen);
-		feh = (Ip4hdr*)(nb->rp);
+		nb = allocb(hlen+seglen);
+		feh = (Ip4hdr*)nb->rp;
 
-		memmove(nb->wp, eh, IP4HDR);
-		nb->wp += IP4HDR;
+		memmove(nb->wp, eh, hlen);
+		nb->wp += hlen;
 
 		if((fragoff + seglen) >= dlen) {
 			seglen = dlen - fragoff;
 			hnputs(feh->frag, fragoff>>3);
 		}
-		else	
+		else
 			hnputs(feh->frag, (fragoff>>3)|IP_MF);
 
-		hnputs(feh->length, seglen + IP4HDR);
+		hnputs(feh->length, seglen + hlen);
 		hnputs(feh->id, lid);
 
 		/* Copy up the data area */
 		chunk = seglen;
 		while(chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk %d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -376,12 +231,13 @@
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
 				xp = xp->next;
-		} 
+		}
 
 		feh->cksum[0] = 0;
 		feh->cksum[1] = 0;
 		hnputs(feh->cksum, ipcsum(&feh->vihl));
-		ifc->m->bwrite(ifc, nb, V4, gate);
+
+		ipifcoput(ifc, nb, V4, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
 void
 ipiput4(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos, proto, olen;
+	int hl, len, hop, tos;
+	uchar v6dst[IPaddrlen];
+	ushort frag;
 	Ip4hdr *h;
 	Proto *p;
-	ushort frag;
-	int notforme;
-	uchar *dp, v6dst[IPaddrlen];
 	IP *ip;
-	Route *r;
 
-	if(BLKIPVER(bp) != IP_VER4) {
+	if((bp->rp[0]&0xF0) != IP_VER4) {
 		ipiput6(f, ifc, bp);
 		return;
 	}
@@ -430,58 +283,45 @@
 			return;
 	}
 
-	h = (Ip4hdr*)(bp->rp);
-
-	/* Input NAT */
-	nati(bp, ifc);
-
-	/* dump anything that whose header doesn't checksum */
+	h = (Ip4hdr*)bp->rp;
+	hl = (h->vihl & 0xF)<<2;
+	if(hl < IP4HDR || hl > BLEN(bp)) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+		goto drop;
+	}
 	if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
 		ip->stats[InHdrErrors]++;
-		netlog(f, Logip, "ip: checksum error %V\n", h->src);
-		freeblist(bp);
+		netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+		goto drop;
+	}
+	len = nhgets(h->length);
+	if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+		if(bp != nil)
+			goto drop;
 		return;
 	}
-	v4tov6(v6dst, h->dst);
-	notforme = ipforme(f, v6dst) == 0;
+	h = (Ip4hdr*)bp->rp;
 
-	/* Check header length and version */
-	if((h->vihl&0x0F) != IP_HLEN4) {
-		hl = (h->vihl&0xF)<<2;
-		if(hl < (IP_HLEN4<<2)) {
-			ip->stats[InHdrErrors]++;
-			netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
-			freeblist(bp);
-			return;
-		}
-	  /* If this is not routed strip off the options */
-		if(notforme == 0) {
-			olen = nhgets(h->length);
-			dp = bp->rp + (hl - (IP_HLEN4<<2));
-			memmove(dp, h, IP_HLEN4<<2);
-			bp->rp = dp;
-			h = (Ip4hdr*)(bp->rp);
-			h->vihl = (IP_VER4|IP_HLEN4);
-			hnputs(h->length, olen-hl+(IP_HLEN4<<2));
-		}
-	}
-
 	/* route */
-	if(notforme) {
-		Conv conv;
+	v4tov6(v6dst, h->dst);
+	if(!ipforme(f, v6dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
 
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
-		}
+		if(!ip->iprouting)
+			goto drop;
 
 		/* don't forward to source's network */
-		conv.r = nil;
-		r = v4lookup(f, h->dst, &conv);
-		if(r == nil || r->ifc == ifc){
+		rh.r = nil;
+		r = v4lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
 		hop = h->ttl;
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
-			icmpttlexceeded(f, ifc->lifc->local, bp);
-			freeblist(bp);
-			return;
+			icmpttlexceeded(f, ifc, bp);
+			goto drop;
 		}
 
 		/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
-		if(r->ifc->reassemble){
+		if(nifc->reassemble){
 			frag = nhgets(h->frag);
-			if(frag) {
-				h->tos = 0;
-				if(frag & IP_MF)
-					h->tos = 1;
-				bp = ip4reassemble(ip, frag, bp, h);
+			if(frag & (IP_MF|IP_FO)) {
+				bp = ip4reassemble(ip, frag, bp);
 				if(bp == nil)
 					return;
-				h = (Ip4hdr*)(bp->rp);
+				h = (Ip4hdr*)bp->rp;
 			}
 		}
 
@@ -511,27 +346,30 @@
 		ip->stats[ForwDatagrams]++;
 		tos = h->tos;
 		hop = h->ttl;
-		ipoput4(f, bp, 1, hop - 1, tos, &conv);
+		ipoput4(f, bp, 1, hop - 1, tos, &rh);
 		return;
 	}
 
+	/* If this is not routed strip off the options */
+	if(hl > IP4HDR) {
+		hl -= IP4HDR;
+		len -= hl;
+		bp->rp += hl;
+		memmove(bp->rp, h, IP4HDR);
+		h = (Ip4hdr*)bp->rp;
+		h->vihl = IP_VER4|IP_HLEN4;
+		hnputs(h->length, len);
+	}
+
 	frag = nhgets(h->frag);
-	if(frag) {
-		h->tos = 0;
-		if(frag & IP_MF)
-			h->tos = 1;
-		bp = ip4reassemble(ip, frag, bp, h);
+	if(frag & (IP_MF|IP_FO)) {
+		bp = ip4reassemble(ip, frag, bp);
 		if(bp == nil)
 			return;
-		h = (Ip4hdr*)(bp->rp);
+		h = (Ip4hdr*)bp->rp;
 	}
 
-	/* don't let any frag info go up the stack */
-	h->frag[0] = 0;
-	h->frag[1] = 0;
-
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
 	}
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -550,45 +389,43 @@
 	int i;
 
 	ip = f->ip;
-	ip->stats[DefaultTTL] = MAXTTL;
-
 	p = buf;
 	e = p+len;
-	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+	for(i = 0; i < Nipstats; i++)
+		p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
 	return p - buf;
 }
 
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
 {
-	int fend;
+	int ovlap, fragsize, len;
+	ulong src, dst;
 	ushort id;
+	Block *bl, **l, *prev;
 	Fragment4 *f, *fnext;
-	ulong src, dst;
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Ipfrag *fp, *fq;
+	Ip4hdr *ih;
 
+	/*
+	 *  block lists are too hard, concatblock into a single block
+	 */
+	bp = concatblock(bp);
+
+	ih = (Ip4hdr*)bp->rp;
 	src = nhgetl(ih->src);
 	dst = nhgetl(ih->dst);
 	id = nhgets(ih->id);
+	fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
 
-	/*
-	 *  block lists are too hard, pullupblock into a single block
-	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip4hdr*)(bp->rp);
-	}
-
 	qlock(&ip->fraglock4);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead4; f; f = fnext){
+	for(f = ip->flisthead4; f != nil; f = fnext){
 		fnext = f->next;	/* because ipfragfree4 changes the list */
-		if(f->src == src && f->dst == dst && f->id == id)
+		if(f->id == id && f->src == src && f->dst == dst)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+	if((offset & (IP_MF|IP_FO)) == 0) {
 		if(f != nil) {
-			ipfragfree4(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree4(ip, f);
 		}
 		qunlock(&ip->fraglock4);
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset<<3;
-	BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = (offset & IP_FO)<<3;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -627,8 +465,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock4);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock4);
+
 		return nil;
 	}
 
@@ -638,7 +477,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -645,15 +484,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock4);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -662,26 +502,26 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 		/* Take completely covered segments out */
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
 			if(ovlap <= 0)
 				break;
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
-				(*l)->rp += ovlap;
+			if(ovlap < fq->flen) {
+				/* move up ip header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
 				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -689,35 +529,50 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  without IP_MF set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-		if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
-			bl = f->blist;
-			len = nhgets(BLKIP(bl)->length);
-			bl->wp = bl->rp + len;
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += IP4HDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		ih = (Ip4hdr*)bl->rp;
+		if(ih->frag[0]&(IP_MF>>8))
+			continue;
 
-			bl = f->blist;
-			f->blist = nil;
+		bl = f->blist;
+		len = BLEN(bl);
+
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
+
+		if(len >= IP_MAX){
 			ipfragfree4(ip, f);
-			ih = BLKIP(bl);
-			hnputs(ih->length, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock4);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree4(ip, f);
+
+		ih = (Ip4hdr*)bl->rp;
+		ih->frag[0] = 0;
+		ih->frag[1] = 0;
+		hnputs(ih->length, len);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock4);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock4);
 	return nil;
@@ -726,20 +581,20 @@
 /*
  * ipfragfree4 - Free a list of fragments - assume hold fraglock4
  */
-void
+static void
 ipfragfree4(IP *ip, Fragment4 *frag)
 {
 	Fragment4 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	frag->src = 0;
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	frag->src = 0;
+	frag->dst = 0;
 
 	l = &ip->flisthead4;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -755,7 +610,7 @@
 /*
  * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
  */
-Fragment4 *
+static Fragment4*
 ipfragallo4(IP *ip)
 {
 	Fragment4 *f;
@@ -762,7 +617,7 @@
 
 	while(ip->fragfree4 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead4; f->next; f = f->next)
+		for(f = ip->flisthead4; f->next != nil; f = f->next)
 			;
 		ipfragfree4(ip, f);
 	}
diff -u a/os/ip//ip.h b/os/ip//ip.h
--- a/os/ip//ip.h
+++ b/os/ip//ip.h
@@ -1,35 +1,33 @@
 typedef struct	Conv	Conv;
+typedef struct	Fragment4 Fragment4;
+typedef struct	Fragment6 Fragment6;
 typedef struct	Fs	Fs;
 typedef union	Hwaddr	Hwaddr;
 typedef struct	IP	IP;
 typedef struct	IPaux	IPaux;
+typedef struct	Ip4hdr	Ip4hdr;
+typedef struct	Ipfrag	Ipfrag;
 typedef struct	Ipself	Ipself;
 typedef struct	Ipselftab	Ipselftab;
 typedef struct	Iplink	Iplink;
 typedef struct	Iplifc	Iplifc;
 typedef struct	Ipmulti	Ipmulti;
-typedef struct	IProuter IProuter;
 typedef struct	Ipifc	Ipifc;
 typedef struct	Iphash	Iphash;
 typedef struct	Ipht	Ipht;
 typedef struct	Netlog	Netlog;
-typedef struct	Ifclog	Ifclog;
 typedef struct	Medium	Medium;
 typedef struct	Proto	Proto;
 typedef struct	Arpent	Arpent;
 typedef struct	Arp Arp;
 typedef struct	Route	Route;
+typedef struct	Routehint Routehint;
 
 typedef struct	Routerparams	Routerparams;
 typedef struct 	Hostparams	Hostparams;
-typedef struct 	V6router	V6router;
-typedef struct	V6params	V6params;
+typedef struct	v6params	v6params;
 
-typedef struct Ip4hdr     Ip4hdr;
-typedef struct Nat	Nat;
-
 #pragma incomplete Arp
-#pragma	incomplete Ifclog
 #pragma incomplete Ipself
 #pragma incomplete Ipselftab
 #pragma incomplete IP
@@ -39,10 +37,9 @@
 {
 	Addrlen=	64,
 	Maxproto=	20,
-	Nhash=		64,
-	Maxincall=	5,
-	Nchans=		16383,
-	MAClen=		16,		/* longest mac address */
+	Maxincall=	10,
+	Nchans=		1024,
+	MAClen=		8,		/* longest mac address */
 
 	MAXTTL=		255,
 	DFLTTOS=	0,
@@ -57,6 +54,12 @@
 	V6=		6,
 	IP_VER4= 	0x40,
 	IP_VER6=	0x60,
+	IP_HLEN4=	5,		/* v4: Header length in words */
+	IP_DF=		0x4000,		/* v4: Don't fragment */
+	IP_MF=		0x2000,		/* v4: More fragments */
+	IP_FO=		0x1fff,		/* v4: Fragment offset */
+	IP4HDR=		IP_HLEN4<<2,	/* sizeof(Ip4hdr) */
+	IP_MAX=		64*1024,	/* Max. Internet packet size, v4 & v6 */
 
 	/* 2^Lroot trees in the root table */
 	Lroot=		10,
@@ -73,6 +76,79 @@
 	Connected=	4,
 };
 
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nipstats,
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+	uchar	payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+	uvlong		stats[Nipstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
 /* on the wire packet header */
 struct Ip4hdr
 {
@@ -86,9 +162,14 @@
 	uchar	cksum[2];	/* Header checksum */
 	uchar	src[4];		/* IP source */
 	uchar	dst[4];		/* IP destination */
-	uchar	data[1];	/* start of data */
 };
 
+struct Routehint
+{
+	Route	*r;			/* last route used */
+	ulong	rgen;			/* routetable generation for *r */
+};
+
 /*
  *  one per conversation directory
  */
@@ -100,9 +181,9 @@
 	Proto*	p;
 
 	int	restricted;		/* remote port is restricted */
+	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 	uint	ttl;			/* max time to live */
 	uint	tos;			/* type of service */
-	int	ignoreadvice;		/* don't terminate connection on icmp errors */
 
 	uchar	ipversion;
 	uchar	laddr[IPaddrlen];	/* local IP address */
@@ -139,8 +220,7 @@
 
 	void*	ptcl;			/* protocol specific stuff */
 
-	Route	*r;			/* last route used */
-	ulong	rgen;			/* routetable generation for *r */
+	Routehint;
 };
 
 struct Medium
@@ -161,18 +241,8 @@
 	/* process packets written to 'data' */
 	void	(*pktin)(Fs *f, Ipifc *ifc, Block *bp);
 
-	/* routes for router boards */
-	void	(*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
-	void	(*remroute)(Ipifc *ifc, int, uchar*, uchar*);
-	void	(*flushroutes)(Ipifc *ifc);
-
-	/* for routing multicast groups */
-	void	(*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
-	void	(*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
 	/* address resolution */
-	void	(*ares)(Fs*, int, uchar*, uchar*, int, int);	/* resolve */
-	void	(*areg)(Ipifc*, uchar*);			/* register */
+	void	(*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
 
 	/* v6 address generation */
 	void	(*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
 	uchar	mask[IPaddrlen];
 	uchar	remote[IPaddrlen];
 	uchar	net[IPaddrlen];
+	uchar	type;		/* route type */
 	uchar	tentative;	/* =1 => v6 dup disc on, =0 => confirmed unique */
 	uchar	onlink;		/* =1 => onlink, =0 offlink. */
 	uchar	autoflag;	/* v6 autonomous flag */
-	long 	validlt;	/* v6 valid lifetime */
-	long 	preflt;		/* v6 preferred lifetime */
-	long	origint;	/* time when addr was added */
+	ulong 	validlt;	/* v6 valid lifetime */
+	ulong 	preflt;		/* v6 preferred lifetime */
+	ulong	origint;	/* time when addr was added */
 	Iplink	*link;		/* addresses linked to this lifc */
 	Iplifc	*next;
 };
@@ -203,25 +274,25 @@
 	Ipself	*self;
 	Iplifc	*lifc;
 	Iplink	*selflink;	/* next link for this local address */
-	Iplink	*lifclink;	/* next link for this ifc */
-	ulong	expire;
+	Iplink	*lifclink;	/* next link for this lifc */
 	Iplink	*next;		/* free list */
+	ulong	expire;
 	int	ref;
 };
 
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
 
 /* default values, one per stack */
 struct Routerparams {
-	int	mflag;
-	int	oflag;
-	int 	maxraint;
-	int	minraint;
-	int	linkmtu;
-	int	reachtime;
-	int	rxmitra;
-	int	ttl;
-	int	routerlt;	
+	int	mflag;		/* flag: managed address configuration */
+	int	oflag;		/* flag: other stateful configuration */
+	int 	maxraint;	/* max. router adv interval (ms) */
+	int	minraint;	/* min. router adv interval (ms) */
+	int	linkmtu;	/* mtu options */
+	int	reachtime;	/* reachable time */
+	int	rxmitra;	/* retransmit interval */
+	int	ttl;		/* cur hop count limit */
+	int	routerlt;	/* router lifetime */
 };
 
 struct Hostparams {
@@ -231,22 +302,18 @@
 struct Ipifc
 {
 	RWlock;
-	
+
 	Conv	*conv;		/* link to its conversation structure */
 	char	dev[64];	/* device we're attached to */
 	Medium	*m;		/* Media pointer */
 	int	maxtu;		/* Maximum transfer unit */
 	int	mintu;		/* Minumum tranfer unit */
-	int	mbps;		/* megabits per second */
 	void	*arg;		/* medium specific */
-	int	reassemble;	/* reassemble IP packets before forwarding */
 
-	/* these are used so that we can unbind on the fly */
-	Lock	idlock;
+	uchar	reflect;	/* allow forwarded packets to go out the same interface */
+	uchar	reassemble;	/* reassemble IP packets before forwarding to this interface */
+	
 	uchar	ifcid;		/* incremented each 'bind/unbind/add/remove' */
-	int	ref;		/* number of proc's using this ipifc */
-	Rendez	wait;		/* where unbinder waits for ref == 0 */
-	int	unbinding;
 
 	uchar	mac[MAClen];	/* MAC address */
 
@@ -255,10 +322,16 @@
 	ulong	in, out;	/* message statistics */
 	ulong	inerr, outerr;	/* ... */
 
-	uchar	sendra6;	/* == 1 => send router advs on this ifc	*/
-	uchar	recvra6;	/* == 1 => recv router advs on this ifc */
-	Routerparams rp;	/* router parameters as in RFC 2461, pp.40--43. 
+	uchar	sendra6;	/* flag: send router advs on this ifc */
+	uchar	recvra6;	/* flag: recv router advs on this ifc */
+	Routerparams rp;	/* router parameters as in RFC 2461, pp.40—43.
 					used only if node is router */
+
+	int	speed;		/* link speed in bits per second */
+	int	delay;		/* burst delay in ms */
+	int	burst;		/* burst delay in bytes */
+	int	load;		/* bytes in flight */
+	ulong	ticks;
 };
 
 /*
@@ -330,20 +403,11 @@
 	int		nc;		/* number of conversations */
 	int		ac;
 	Qid		qid;		/* qid for protocol directory */
-	ushort		nextport;
 	ushort		nextrport;
 
 	void		*priv;
 };
 
-/*
- *  Stream for sending packets to user level
- */
-struct IProuter {
-	QLock;
-	int	opens;
-	Queue	*q;
-};
 
 /*
  *  one per IP protocol stack
@@ -362,8 +426,7 @@
 	IP	*ip;
 	Ipselftab	*self;
 	Arp	*arp;
-	V6params	*v6p;
-	IProuter iprouter;
+	v6params	*v6p;
 
 	Route	*v4root[1<<Lroot];	/* v4 routing forest */
 	Route	*v6root[1<<Lroot];	/* v6 routing forest */
@@ -370,7 +433,6 @@
 	Route	*queue;			/* used as temp when reinjecting routes */
 
 	Netlog	*alog;
-	Ifclog	*ilog;
 
 	char	ndb[1024];		/* an ndb entry for this interface */
 	int	ndbvers;
@@ -377,23 +439,10 @@
 	long	ndbmtime;
 };
 
-/* one per default router known to host */
-struct V6router {
-	uchar	inuse;
-	Ipifc	*ifc;
-	int	ifcid;
-	uchar	routeraddr[IPaddrlen];
-	long	ltorigin;
-	Routerparams	rp;
-};
-
-struct V6params
+struct v6params
 {
 	Routerparams	rp;		/* v6 params, one copy per node now */
 	Hostparams	hp;
-	V6router	v6rlist[3];	/* max 3 default routers, currently */
-	int		cdrouter;	/* uses only v6rlist[cdrouter] if   */ 
-					/* cdrouter >= 0. */
 };
 
 
@@ -410,8 +459,7 @@
 char*	Fsstdbind(Conv*, char**, int);
 ulong	scalednconv(void);
 void	closeconv(Conv*);
-
-/* 
+/*
  *  logging
  */
 enum
@@ -434,7 +482,6 @@
 	Logrudpmsg=	1<<16,
 	Logesp=		1<<17,
 	Logtcpwin=	1<<18,
-	Lognat=		1<<19,
 };
 
 void	netloginit(Fs*);
@@ -449,17 +496,17 @@
 void	ifclogopen(Fs*, Chan*);
 void	ifclogclose(Fs*, Chan*);
 
+#pragma varargck argpos netlog	3
+
 /*
  *  iproute.c
  */
 typedef	struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
 typedef struct V4route V4route;
 typedef struct V6route V6route;
 
 enum
 {
-
 	/* type bits */
 	Rv4=		(1<<0),		/* this is a version 4 route */
 	Rifc=		(1<<1),		/* this route is a directly connected interface */
@@ -468,27 +515,18 @@
 	Rbcast=		(1<<4),		/* a broadcast self address */
 	Rmulti=		(1<<5),		/* a multicast self address */
 	Rproxy=		(1<<6),		/* this route should be proxied */
+	Rsrc=		(1<<7),		/* source specific route */
 };
 
-struct Routewalk
-{
-	int	o;
-	int	h;
-	char*	p;
-	char*	e;
-	void*	state;
-	void	(*walk)(Route*, Routewalk*);
-};
-
 struct	RouteTree
 {
-	Route*	right;
-	Route*	left;
-	Route*	mid;
+	Route	*mid;
+	Route	*left;
+	Route	*right;
+	Ipifc	*ifc;
+	uchar	ifcid;		/* must match ifc->id */
 	uchar	depth;
 	uchar	type;
-	uchar	ifcid;		/* must match ifc->id */
-	Ipifc	*ifc;
 	char	tag[4];
 	int	ref;
 };
@@ -497,6 +535,10 @@
 {
 	ulong	address;
 	ulong	endaddress;
+
+	ulong	source;
+	ulong	endsource;
+
 	uchar	gate[IPv4addrlen];
 };
 
@@ -504,6 +546,10 @@
 {
 	ulong	address[IPllen];
 	ulong	endaddress[IPllen];
+
+	ulong	source[IPllen];
+	ulong	endsource[IPllen];
+
 	uchar	gate[IPaddrlen];
 };
 
@@ -516,17 +562,16 @@
 		V4route v4;
 	};
 };
-extern void	v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void	v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void	v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route*	v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route*	v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void	addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void	remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route*	v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route*	v4source(Fs *f, uchar *a, uchar *s);
+extern Route*	v6source(Fs *f, uchar *a, uchar *s);
 extern long	routeread(Fs *f, char*, ulong, int);
 extern long	routewrite(Fs *f, Chan*, char*, int);
-extern void	routetype(int, char*);
-extern void	ipwalkroutes(Fs*, Routewalk*);
-extern void	convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void	routetype(int type, char p[8]);
 
 /*
  *  devip.c
@@ -543,7 +588,6 @@
 };
 
 extern IPaux*	newipaux(char*, char*);
-extern void	setlport(Conv*);
 
 /*
  *  arp.c
@@ -552,18 +596,16 @@
 {
 	uchar	ip[IPaddrlen];
 	uchar	mac[MAClen];
-	Medium	*type;			/* media type */
-	Arpent*	hash;
-	Block*	hold;
-	Block*	last;
-	uint	ctime;			/* time entry was created or refreshed */
-	uint	utime;			/* time entry was last used */
-	uchar	state;
+	Arpent	*hash;
 	Arpent	*nextrxt;		/* re-transmit chain */
-	uint	rtime;			/* time for next retransmission */
-	uchar	rxtsrem;
+	Block	*hold;
+	Block	*last;
 	Ipifc	*ifc;
 	uchar	ifcid;			/* must match ifc->id */
+	uchar	state;
+	uchar	rxtsrem;		/* re-tranmissions remaining */
+	ulong	ctime;			/* time entry was created or refreshed */
+	ulong	utime;			/* time entry was last used */
 };
 
 extern void	arpinit(Fs*);
@@ -572,15 +614,17 @@
 extern Arpent*	arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
 extern void	arprelease(Arp*, Arpent *a);
 extern Block*	arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void	arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int	arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void	ndpsendsol(Fs*, Ipifc*, Arpent*);
 
 /*
  * ipaux.c
  */
 
-extern int	myetheraddr(uchar*, char*);
-extern ulong	parseip(uchar*, char*);
-extern ulong	parseipmask(uchar*, char*);
+extern int	parseether(uchar*, char*);
+extern vlong	parseip(uchar*, char*);
+extern vlong	parseipmask(uchar*, char*, int);
+extern vlong	parseipandmask(uchar*, uchar*, char*, char*);
 extern char*	v4parseip(uchar*, char*);
 extern void	maskip(uchar *from, uchar *mask, uchar *to);
 extern int	parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
 extern void	v4tov6(uchar *v6, uchar *v4);
 extern int	v6tov4(uchar *v4, uchar *v6);
 extern int	eipfmt(Fmt*);
+extern int	convipvers(Conv *c);
 
 #define	ipmove(x, y) memmove(x, y, IPaddrlen)
 #define	ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
- 
-#define	ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define	ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
 
 extern uchar IPv4bcast[IPaddrlen];
 extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
 extern Medium	ethermedium;
 extern Medium	nullmedium;
 extern Medium	pktmedium;
-extern Medium	tripmedium;
 
 /*
  *  ipifc.c
@@ -619,33 +660,24 @@
  */
 extern Medium*	ipfindmedium(char *name);
 extern void	addipmedium(Medium *med);
+extern void	ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
 extern int	ipforme(Fs*, uchar *addr);
-extern int	iptentative(Fs*, uchar *addr);
-extern int	ipisbm(uchar *);
-extern int	ipismulticast(uchar *);
-extern Ipifc*	findipifc(Fs*, uchar *remote, int type);
-extern void	findprimaryip(Fs*, uchar*);
+extern int	ipismulticast(uchar *ip);
+extern Ipifc*	findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc*	findipifcstr(Fs *f, char *s);
 extern void	findlocalip(Fs*, uchar *local, uchar *remote);
-extern int	ipv4local(Ipifc *ifc, uchar *addr);
-extern int	ipv6local(Ipifc *ifc, uchar *addr);
-extern int	ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int	ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int	ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
 extern Iplifc*	iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc*	ipremoteonifc(Ipifc *ifc, uchar *ip);
 extern int	ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int	ipismulticast(uchar *ip);
-extern int	ipisbooting(void);
-extern int	ipifccheckin(Ipifc *ifc, Medium *med);
-extern void	ipifccheckout(Ipifc *ifc);
-extern int	ipifcgrab(Ipifc *ifc);
-extern void	ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void	ipifcremroute(Fs*, int, uchar*, uchar*);
 extern void	ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
 extern void	ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
 extern char*	ipifcrem(Ipifc *ifc, char **argv, int argc);
 extern char*	ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
 extern long	ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char*	ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void	ipsendra6(Fs *f, int on);
-
+extern char*	ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char*	ipifcremove6(Ipifc *ifc, char**argv, int argc);
 /*
  *  ip.c
  */
@@ -652,37 +684,26 @@
 extern void	iprouting(Fs*, int);
 extern void	icmpnoconv(Fs*, Block*);
 extern void	icmpcantfrag(Fs*, Block*, int);
-extern void	icmpttlexceeded(Fs*, uchar*, Block*);
+extern void	icmpttlexceeded(Fs*, Ipifc*, Block*);
 extern ushort	ipcsum(uchar*);
 extern void	ipiput4(Fs*, Ipifc*, Block*);
 extern void	ipiput6(Fs*, Ipifc*, Block*);
-extern int	ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int	ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int	ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int	ipoput6(Fs*, Block*, int, int, int, Routehint*);
 extern int	ipstats(Fs*, char*, int);
 extern ushort	ptclbsum(uchar*, int);
 extern ushort	ptclcsum(Block*, int, int);
 extern void	ip_init(Fs*);
-extern void	update_mtucache(uchar*, ulong);
-extern ulong	restrict_mtu(uchar*, ulong);
+extern void	ip_init_6(Fs*);
 
 /*
  * bootp.c
  */
-char*	(*bootp)(Ipifc*);
-int	(*bootpread)(char*, ulong, int);
+extern int	bootpread(char*, ulong, int);
 
 /*
- *  iprouter.c
- */
-void	useriprouter(Fs*, Ipifc*, Block*);
-void	iprouteropen(Fs*);
-void	iprouterclose(Fs*);
-long	iprouterread(Fs*, void*, int);
-
-/*
  *  resolving inferno/plan9 differences
  */
-Chan*		commonfdtochan(int, int, int, int);
 char*		commonuser(void);
 char*		commonerror(void);
 
@@ -695,15 +716,3 @@
  *  global to all of the stack
  */
 extern void	(*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int	nato(Block*, Ipifc*, Fs*);
-extern void	nati(Block*, Ipifc*);
-extern int	natgc(uchar);
-
-extern int	addnataddr(uchar*, uchar*, Iplifc*);
-extern int	removenataddr(uchar*, uchar*, Iplifc*);
-extern void	shownataddr(void);
-extern void flushnataddr(void);
diff -u a/os/ip//ipaux.c b/os/ip//ipaux.c
--- a/os/ip//ipaux.c
+++ b/os/ip//ipaux.c
@@ -5,49 +5,8 @@
 #include	"fns.h"
 #include	"../port/error.h"
 #include	"ip.h"
-#include  "ipv6.h"
+#include	"ipv6.h"
 
-/*
- *  well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- *  prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0xff, 0xff,
-	0, 0, 0, 0
-};
-
-
 char *v6hdrtypes[Maxhdrtype] =
 {
 	[HBH]		"HopbyHop",
@@ -54,7 +13,7 @@
 	[ICMP]		"ICMP",
 	[IGMP]		"IGMP",
 	[GGP]		"GGP",
-	[IPINIP]		"IP",
+	[IPINIP]	"IP",
 	[ST]		"ST",
 	[TCP]		"TCP",
 	[UDP]		"UDP",
@@ -87,6 +46,7 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+
 uchar v6linklocal[IPaddrlen] = {
 	0xfe, 0x80, 0, 0,
 	0, 0, 0, 0,
@@ -99,26 +59,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6llpreflen = 8;	// link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
-	0xfe, 0xc0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
-	0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6slpreflen = 6;	// site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
-	0x08, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
+int v6llpreflen = 8;	/* link-local prefix length in bytes */
+
 uchar v6multicast[IPaddrlen] = {
 	0xff, 0, 0, 0,
 	0, 0, 0, 0,
@@ -131,7 +73,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6mcpreflen = 1;	// multicast prefix length
+int v6mcpreflen = 1;	/* multicast prefix length */
+
 uchar v6allnodesN[IPaddrlen] = {
 	0xff, 0x01, 0, 0,
 	0, 0, 0, 0,
@@ -138,6 +81,12 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
+uchar v6allroutersN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
 uchar v6allnodesNmask[IPaddrlen] = {
 	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
@@ -144,7 +93,8 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0
 };
-int v6aNpreflen = 2;	// all nodes (N) prefix
+int v6aNpreflen = 2;	/* all nodes (N) prefix */
+
 uchar v6allnodesL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -151,19 +101,6 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x01
 };
-uchar v6allnodesLmask[IPaddrlen] = {
-	0xff, 0xff, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0
-};
-int v6aLpreflen = 2;	// all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
-	0xff, 0x01, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	0, 0, 0, 0x02
-};
 uchar v6allroutersL[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -170,12 +107,14 @@
 	0, 0, 0, 0,
 	0, 0, 0, 0x02
 };
-uchar v6allroutersS[IPaddrlen] = {
-	0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
 	0, 0, 0, 0,
 	0, 0, 0, 0,
-	0, 0, 0, 0x02
+	0, 0, 0, 0
 };
+int v6aLpreflen = 2;	/* all nodes (L) prefix */
+
 uchar v6solicitednode[IPaddrlen] = {
 	0xff, 0x02, 0, 0,
 	0, 0, 0, 0,
@@ -190,9 +129,6 @@
 };
 int v6snpreflen = 13;
 
-
-
-
 ushort
 ptclcsum(Block *bp, int offset, int len)
 {
@@ -215,7 +151,7 @@
 	if(bp->next == nil) {
 		if(blocklen < len)
 			len = blocklen;
-		return ~ptclbsum(addr, len) & 0xffff;
+		return ptclbsum(addr, len) ^ 0xffff;
 	}
 
 	losum = 0;
@@ -247,7 +183,7 @@
 	while((csum = losum>>16) != 0)
 		losum = csum + (losum & 0xffff);
 
-	return ~losum & 0xffff;
+	return losum ^ 0xffff;
 }
 
 enum
@@ -255,306 +191,9 @@
 	Isprefix= 16,
 };
 
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
-	char buf[5*8];
-	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
-	static char *ifmt = "%d.%d.%d.%d";
-	uchar *p, ip[16];
-	ulong *lp;
-	ushort s;
-	int i, j, n, eln, eli;
-
-	switch(f->r) {
-	case 'E':		/* Ethernet address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
-	case 'I':		/* Ip address */
-		p = va_arg(f->args, uchar*);
-common:
-		if(memcmp(p, v4prefix, 12) == 0)
-			return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
-		/* find longest elision */
-		eln = eli = -1;
-		for(i = 0; i < 16; i += 2){
-			for(j = i; j < 16; j += 2)
-				if(p[j] != 0 || p[j+1] != 0)
-					break;
-			if(j > i && j - i > eln){
-				eli = i;
-				eln = j - i;
-			}
-		}
-
-		/* print with possible elision */
-		n = 0;
-		for(i = 0; i < 16; i += 2){
-			if(i == eli){
-				n += sprint(buf+n, "::");
-				i += eln;
-				if(i >= 16)
-					break;
-			} else if(i != 0)
-				n += sprint(buf+n, ":");
-			s = (p[i]<<8) + p[i+1];
-			n += sprint(buf+n, "%ux", s);
-		}
-		return fmtstrcpy(f, buf);
-
-	case 'i':		/* v6 address as 4 longs */
-		lp = va_arg(f->args, ulong*);
-		for(i = 0; i < 4; i++)
-			hnputl(ip+4*i, *lp++);
-		p = ip;
-		goto common;
-
-	case 'V':		/* v4 ip address */
-		p = va_arg(f->args, uchar*);
-		return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
-	case 'M':		/* ip mask */
-		p = va_arg(f->args, uchar*);
-
-		/* look for a prefix mask */
-		for(i = 0; i < 16; i++)
-			if(p[i] != 0xff)
-				break;
-		if(i < 16){
-			if((prefixvals[p[i]] & Isprefix) == 0)
-				goto common;
-			for(j = i+1; j < 16; j++)
-				if(p[j] != 0)
-					goto common;
-			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
-		} else
-			n = 8*16;
-
-		/* got one, use /xx format */
-		return fmtprint(f, "/%d", n);
-	}
-	return fmtstrcpy(f, "(eipfmt)");
-}
-
 #define CLASS(p) ((*(uchar*)(p))>>6)
 
-extern char*
-v4parseip(uchar *to, char *from)
-{
-	int i;
-	char *p;
-
-	p = from;
-	for(i = 0; i < 4 && *p; i++){
-		to[i] = strtoul(p, &p, 0);
-		if(*p == '.')
-			p++;
-	}
-	switch(CLASS(to)){
-	case 0:	/* class A - 1 uchar net */
-	case 1:
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = to[1];
-			to[1] = 0;
-		} else if(i == 2){
-			to[3] = to[1];
-			to[1] = 0;
-		}
-		break;
-	case 2:	/* class B - 2 uchar net */
-		if(i == 3){
-			to[3] = to[2];
-			to[2] = 0;
-		}
-		break;
-	}
-	return p;
-}
-
-int
-isv4(uchar *ip)
-{
-	return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- *  the following routines are unrolled with no memset's to speed
- *  up the usual case
- */
 void
-v4tov6(uchar *v6, uchar *v4)
-{
-	v6[0] = 0;
-	v6[1] = 0;
-	v6[2] = 0;
-	v6[3] = 0;
-	v6[4] = 0;
-	v6[5] = 0;
-	v6[6] = 0;
-	v6[7] = 0;
-	v6[8] = 0;
-	v6[9] = 0;
-	v6[10] = 0xff;
-	v6[11] = 0xff;
-	v6[12] = v4[0];
-	v6[13] = v4[1];
-	v6[14] = v4[2];
-	v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
-	if(v6[0] == 0
-	&& v6[1] == 0
-	&& v6[2] == 0
-	&& v6[3] == 0
-	&& v6[4] == 0
-	&& v6[5] == 0
-	&& v6[6] == 0
-	&& v6[7] == 0
-	&& v6[8] == 0
-	&& v6[9] == 0
-	&& v6[10] == 0xff
-	&& v6[11] == 0xff)
-	{
-		v4[0] = v6[12];
-		v4[1] = v6[13];
-		v4[2] = v6[14];
-		v4[3] = v6[15];
-		return 0;
-	} else {
-		memset(v4, 0, 4);
-		return -1;
-	}
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
-	int i, elipsis = 0, v4 = 1;
-	ulong x;
-	char *p, *op;
-
-	memset(to, 0, IPaddrlen);
-	p = from;
-	for(i = 0; i < 16 && *p; i+=2){
-		op = p;
-		x = strtoul(p, &p, 16);
-		if(*p == '.' || (*p == 0 && i == 0)){
-			p = v4parseip(to+i, op);
-			i += 4;
-			break;
-		} else {
-			to[i] = x>>8;
-			to[i+1] = x;
-		}
-		if(*p == ':'){
-			v4 = 0;
-			if(*++p == ':'){
-				elipsis = i+2;
-				p++;
-			}
-		}
-	}
-	if(i < 16){
-		memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
-		memset(&to[elipsis], 0, 16-i);
-	}
-	if(v4){
-		to[10] = to[11] = 0xff;
-		return nhgetl(to+12);
-	} else
-		return 6;
-}
-
-/*
- *  hack to allow ip v4 masks to be entered in the old
- *  style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
-	ulong x;
-	int i;
-	uchar *p;
-
-	if(*from == '/'){
-		/* as a number of prefix bits */
-		i = atoi(from+1);
-		if(i < 0)
-			i = 0;
-		if(i > 128)
-			i = 128;
-		memset(to, 0, IPaddrlen);
-		for(p = to; i >= 8; i -= 8)
-			*p++ = 0xff;
-		if(i > 0)
-			*p = ~((1<<(8-i))-1);
-		x = nhgetl(to+IPv4off);
-	} else {
-		/* as a straight bit mask */
-		x = parseip(to, from);
-		if(memcmp(to, v4prefix, IPv4off) == 0)
-			memset(to, 0xff, IPv4off);
-	}
-	return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
-	int i;
-
-	for(i = 0; i < IPaddrlen; i++)
-		to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0x00,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0x00,0x00,
-	0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0xff,  0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
-	if(isv4(ip))
-		return classmask[ip[IPv4off]>>6];
-	else {
-		if(ipcmp(ip, v6loopback) == 0)
-			return IPallbits;
-		else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
-			return v6linklocalmask;
-		else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
-			return v6sitelocalmask;
-		else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
-			return v6solicitednodemask;
-		else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
-			return v6multicastmask;
-		return IPallbits;
-	}
-}
-
-void
 ipv62smcast(uchar *smcast, uchar *a)
 {
 	assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
 ulong
 iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
 {
-	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+	return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
 }
 
 void
@@ -678,7 +317,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address and port */
 	hv = iphash(IPnoaddr, 0, da, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match just port */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
 			return c;
 		}
 	}
-	
+
 	/* match local address */
 	hv = iphash(IPnoaddr, 0, da, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
 			return c;
 		}
 	}
-	
+
 	/* look for something that matches anything */
 	hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
 	for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
 	}
 	unlock(ht);
 	return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+	if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+		return V4;
+	else
+		return V6;
 }
diff -u a/os/ip//ipifc.c b/os/ip//ipifc.c
--- a/os/ip//ipifc.c
+++ b/os/ip//ipifc.c
@@ -11,17 +11,14 @@
 #define DPRINT if(0)print
 
 enum {
-	Maxmedia = 32,
-	Nself = Maxmedia*5,
-	NHASH = (1<<6),
-	NCACHE = 256,
-	QMAX = 64*1024-1,
+	Maxmedia	= 32,
+	Nself		= Maxmedia*5,
+	NHASH		= 1<<6,
+	NCACHE		= 256,
+	QMAX		= 192*1024-1,
 };
 
-Medium *media[Maxmedia] =
-{
-	0
-};
+Medium *media[Maxmedia] = { 0 };
 
 /*
  *  cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
 struct Ipself
 {
 	uchar	a[IPaddrlen];
-	Ipself	*hnext;		/* next address in the hash table */
+	Ipself	*next;		/* next address in the hash table */
 	Iplink	*link;		/* binding twixt Ipself and Ipifc */
 	ulong	expire;
 	uchar	type;		/* type of address */
-	int	ref;
-	Ipself	*next;		/* free list */
 };
 
 struct Ipselftab
@@ -64,11 +59,47 @@
 
 static void	addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
 static void	remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char*	ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char*	ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char*	ipifcremlifc(Ipifc*, Iplifc*);
+static void	ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char*	ipifcremlifc(Ipifc*, Iplifc**);
 
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+	unknownv6,		/* UGH */
+	unspecifiedv6,
+	linklocalv6,
+	globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+	if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+		return unknownv6;
+	else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+	    isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+		return linklocalv6;
+	else
+		return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+	int i, c;
+
+	for(i = 0; i < n; i++){
+		if((c = a[i] ^ b[i]) == 0)
+			continue;
+		for(i <<= 3; (c & 0x80) == 0; i++)
+			c <<= 1;
+		return i;
+	}
+	return i << 3;
+}
+
 /*
  *  link in a new medium
  */
@@ -121,7 +152,7 @@
 	wlock(ifc);
 	if(ifc->m != nil){
 		wunlock(ifc);
-		return "interface already bound";	
+		return Ebound;
 	}
 	if(waserror()){
 		wunlock(ifc);
@@ -142,18 +173,14 @@
 	ifc->m = m;
 	ifc->mintu = ifc->m->mintu;
 	ifc->maxtu = ifc->m->maxtu;
+	ifc->delay = 40;
+	ifc->speed = 0;
 	if(ifc->m->unbindonclose == 0)
 		ifc->conv->inuse++;
-	ifc->rp.mflag = 0;		// default not managed
-	ifc->rp.oflag = 0;
-	ifc->rp.maxraint = 600000;	// millisecs
-	ifc->rp.minraint = 200000;
-	ifc->rp.linkmtu = 0;		// no mtu sent
-	ifc->rp.reachtime = 0;
-	ifc->rp.rxmitra = 0;
-	ifc->rp.ttl = MAXTTL;
-	ifc->rp.routerlt = 3*(ifc->rp.maxraint);
 
+	/* default router paramters */
+	ifc->rp = c->p->f->v6p->rp;
+
 	/* any ancillary structures (like routes) no longer pertain */
 	ifc->ifcid++;
 
@@ -170,29 +197,44 @@
 
 /*
  *  detach a device from an interface, close the interface
- *  called with ifc->conv closed
  */
 static char*
 ipifcunbind(Ipifc *ifc)
 {
-	char *err;
+	Medium *m;
 
-	if(waserror()){
+	wlock(ifc);
+	m = ifc->m;
+	if(m == nil){
 		wunlock(ifc);
-		nexterror();
+		return Eunbound;
 	}
-	wlock(ifc);
 
-	/* dissociate routes */
-	if(ifc->m != nil && ifc->m->unbindonclose == 0)
-		ifc->conv->inuse--;
-	ifc->ifcid++;
+	/* disassociate logical interfaces (before zeroing ifc->arg) */
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 
 	/* disassociate device */
-	if(ifc->m != nil && ifc->m->unbind)
-		(*ifc->m->unbind)(ifc);
+	if(m->unbind != nil){
+		extern Medium nullmedium;
+
+		/*
+		 * unbind() might unlock the ifc, so change the medium
+		 * to the nullmedium to prevent packets from getting
+		 * sent while the medium is shutting down.
+		 */
+		ifc->m = &nullmedium;
+
+		if(!waserror()){
+			(*m->unbind)(ifc);
+			poperror();
+		}
+	}
+
 	memset(ifc->dev, 0, sizeof(ifc->dev));
 	ifc->arg = nil;
+
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 
 	/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
 	qclose(ifc->conv->wq);
 	qclose(ifc->conv->sq);
 
-	/* disassociate logical interfaces */
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
-
+	/* dissociate routes */
+	ifc->ifcid++;
+	if(m->unbindonclose == 0)
+		ifc->conv->inuse--;
 	ifc->m = nil;
 	wunlock(ifc);
-	poperror();
+
 	return nil;
 }
 
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
 
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
 char slineformat[] = "	%-40I %-10M %-40I %-12lud %-12lud\n";
 
-
 static int
 ipifcstate(Conv *c, char *state, int n)
 {
@@ -228,19 +266,18 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
 	m = snprint(state, n, sfixedformat,
 		ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
 		ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
 		ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
 		ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
-		ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+		ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+		ifc->speed, ifc->delay);
 
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
-		m += snprint(state+m, n - m, slineformat,
-			lifc->local, lifc->mask, lifc->remote,
-			lifc->validlt, lifc->preflt);
+	for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+		m += snprint(state+m, n - m, slineformat, lifc->local,
+			lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
 	if(ifc->lifc == nil)
 		m += snprint(state+m, n - m, "\n");
 	runlock(ifc);
@@ -256,13 +293,11 @@
 	int m;
 
 	ifc = (Ipifc*)c->ptcl;
-
-	m = 0;
-
 	rlock(ifc);
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+	m = 0;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 		m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
-		for(link = lifc->link; link; link = link->lifclink)
+		for(link = lifc->link; link != nil; link = link->lifclink)
 			m += snprint(state+m, n - m, " %-40.40I", link->self->a);
 		m += snprint(state+m, n - m, "\n");
 	}
@@ -279,6 +314,59 @@
 	return ifc->m != nil;
 }
 
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+	int burst;
+
+	burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+	if(burst < ifc->maxtu)
+		burst = ifc->maxtu;
+	ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+	if(delay < 0)
+		delay = 0;
+	else if(delay > 1000)
+		delay = 1000;
+	ifc->delay = delay;
+	ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+	if(speed < 0)
+		speed = 0;
+	ifc->speed = speed;
+	ifc->load = 0;
+	ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	if(ifc->speed){
+		ulong now = MACHP(0)->ticks;
+		int dt = TK2MS(now - ifc->ticks);
+		ifc->ticks = now;
+		ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+		if(ifc->load < 0 || dt < 0 || dt > 1000)
+			ifc->load = 0;
+		else if(ifc->load > ifc->burst){
+			freeblist(bp);
+			return;
+		}
+	}
+	bp = concatblock(bp);
+	ifc->load += BLEN(bp);
+	ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
 /*
  *  called when a process writes to an interface's 'data'
  */
@@ -294,18 +382,15 @@
 		return;
 
 	ifc = (Ipifc*)c->ptcl;
-	if(!canrlock(ifc)){
-		freeb(bp);
-		return;
-	}
+	rlock(ifc);
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
-	if(ifc->m == nil || ifc->m->pktin == nil)
-		freeb(bp);
-	else
+	if(ifc->m != nil && ifc->m->pktin != nil)
 		(*ifc->m->pktin)(c->p->f, ifc, bp);
+	else
+		freeb(bp);
 	runlock(ifc);
 	poperror();
 }
@@ -319,27 +404,26 @@
 	Ipifc *ifc;
 
 	c->rq = qopen(QMAX, 0, 0, 0);
-	c->sq = qopen(2*QMAX, 0, 0, 0);
 	c->wq = qopen(QMAX, Qkick, ipifckick, c);
+	c->sq = qopen(QMAX, 0, 0, 0);
+	if(c->rq == nil || c->wq == nil || c->sq == nil)
+		error(Enomem);
 	ifc = (Ipifc*)c->ptcl;
 	ifc->conv = c;
-	ifc->unbinding = 0;
 	ifc->m = nil;
+	ifc->reflect = 0;
 	ifc->reassemble = 0;
 }
 
 /*
  *  called after last close of ipifc data or ctl
- *  called with c locked, we must unlock
  */
 static void
 ipifcclose(Conv *c)
 {
-	Ipifc *ifc;
-	Medium *m;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
+	Medium *m = ifc->m;
 
-	ifc = (Ipifc*)c->ptcl;
-	m = ifc->m;
 	if(m != nil && m->unbindonclose)
 		ipifcunbind(ifc);
 }
@@ -347,19 +431,17 @@
 /*
  *  change an interface's mtu
  */
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
 {
-	int mtu;
+	Medium *m = ifc->m;
 
-	if(argc < 2)
+	if(m == nil)
+		return Eunbound;
+	if(mtu < m->mintu || mtu > m->maxtu)
 		return Ebadarg;
-	if(ifc->m == nil)
-		return Ebadarg;
-	mtu = strtoul(argv[1], 0, 0);
-	if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
-		return Ebadarg;
 	ifc->maxtu = mtu;
+	ipifcadjustburst(ifc);
 	return nil;
 }
 
@@ -374,13 +456,8 @@
 	Iplifc *lifc, **l;
 	int i, type, mtu;
 	Fs *f;
-	int sendnbrdisc = 0;
 
-	if(ifc->m == nil)
-		return "ipifc not yet bound to device";
-
-	f = ifc->conv->p->f;
-
+	mtu = 0;
 	type = Rifc;
 	memset(ip, 0, IPaddrlen);
 	memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
 		/* fall through */
 	case 5:
 		mtu = strtoul(argv[4], 0, 0);
-		if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
-			ifc->maxtu = mtu;
 		/* fall through */
 	case 4:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
-		parseip(rem, argv[3]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+			return Ebadip;
 		maskip(rem, mask, net);
 		break;
 	case 3:
-		parseip(ip, argv[1]);
-		parseipmask(mask, argv[2]);
+		if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+			return Ebadip;
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
 		break;
 	case 2:
-		parseip(ip, argv[1]);
+		if (parseip(ip, argv[1]) == -1)
+			return Ebadip;
 		memmove(mask, defmask(ip), IPaddrlen);
 		maskip(ip, mask, rem);
 		maskip(rem, mask, net);
@@ -415,26 +490,55 @@
 		break;
 	default:
 		return Ebadarg;
-		break;
 	}
-	if(isv4(ip))
+
+	/* check for point-to-point interface */
+	if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+	if(ipcmp(mask, IPallbits) == 0)
+		type |= Rptpt;
+
+	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+		type |= Rv4;
 		tentative = 0;
+	}
+
 	wlock(ifc);
+	if(ifc->m == nil){
+		wunlock(ifc);
+		return Eunbound;
+	}
+	f = ifc->conv->p->f;
+	if(waserror()){
+		wunlock(ifc);
+		return up->errstr;
+	}
 
+	if(mtu > 0)
+		ipifcsetmtu(ifc, mtu);
+
 	/* ignore if this is already a local address for this ifc */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
-		if(ipcmp(lifc->local, ip) == 0) {
-			if(lifc->tentative != tentative)
-				lifc->tentative = tentative;
-			if(lifcp != nil) {
-				lifc->onlink = lifcp->onlink;
-				lifc->autoflag = lifcp->autoflag;
-				lifc->validlt = lifcp->validlt;
-				lifc->preflt = lifcp->preflt;
-				lifc->origint = lifcp->origint;
+	if((lifc = iplocalonifc(ifc, ip)) != nil){
+		if(lifcp != nil) {
+			if(!lifc->onlink && lifcp->onlink){
+				lifc->onlink = 1;
+				addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+					lifc->remote, lifc->type, ifc, tifc);
+				if(v6addrtype(ip) != linklocalv6)
+					addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+						lifc->remote, lifc->type, ifc, tifc);
 			}
-			goto out;
+			lifc->autoflag = lifcp->autoflag;
+			lifc->validlt = lifcp->validlt;
+			lifc->preflt = lifcp->preflt;
+			lifc->origint = lifcp->origint;
 		}
+		if(lifc->tentative != tentative){
+			lifc->tentative = tentative;
+			goto done;
+		}
+		wunlock(ifc);
+		poperror();
+		return nil;
 	}
 
 	/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
 	ipmove(lifc->mask, mask);
 	ipmove(lifc->remote, rem);
 	ipmove(lifc->net, net);
+	lifc->type = type;
 	lifc->tentative = tentative;
 	if(lifcp != nil) {
 		lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
 		lifc->validlt = lifcp->validlt;
 		lifc->preflt = lifcp->preflt;
 		lifc->origint = lifcp->origint;
+	} else {		/* default values */
+		lifc->onlink = lifc->autoflag = 1;
+		lifc->validlt = lifc->preflt = ~0UL;
+		lifc->origint = NOW / 1000;
 	}
-	else {		// default values
-		lifc->onlink = 1;
-		lifc->autoflag = 1;
-		lifc->validlt = 0xffffffff;
-		lifc->preflt = 0xffffffff;
-		lifc->origint = NOW / 10^3;
-	}
 	lifc->next = nil;
 
-	for(l = &ifc->lifc; *l; l = &(*l)->next)
+	for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
 		;
 	*l = lifc;
 
-	/* check for point-to-point interface */
-	if(ipcmp(ip, v6loopback))  /* skip v6 loopback, it's a special address */
-	if(ipcmp(mask, IPallbits) == 0)
-		type |= Rptpt;
+	/* add route for this logical interface */
+	if(lifc->onlink){
+		addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+		if(v6addrtype(ip) != linklocalv6)
+			addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+	}
 
-	/* add local routes */
-	if(isv4(ip))
-		v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
-	else
-		v6addroute(f, tifc, rem, mask, rem, type);
-
 	addselfcache(f, ifc, lifc, ip, Runi);
 
-	if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
-		ipifcregisterproxy(f, ifc, rem);
-		goto out;
+	/* register proxy */
+	if(type & Rptpt){
+		if(type & Rproxy)
+			ipifcregisterproxy(f, ifc, rem, 1);
+		goto done;
 	}
 
-	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+	if(type & Rv4) {
 		/* add subnet directed broadcast address to the self cache */
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
 		for(i = 0; i < IPaddrlen; i++)
 			bcast[i] = (ip[i] & mask[i]) & mask[i];
 		addselfcache(f, ifc, lifc, bcast, Rbcast);
-		
+
 		addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
-	}
-	else {
+	} else {
 		if(ipcmp(ip, v6loopback) == 0) {
 			/* add node-local mcast address */
 			addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
 
 			/* add route for all node multicast */
-			v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+			addroute(f, v6allnodesN, v6allnodesNmask,
+				ip, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
 		}
 
 		/* add all nodes multicast address */
 		addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-		
+
 		/* add route for all nodes multicast */
-		v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-		
+		addroute(f, v6allnodesL, v6allnodesLmask,
+			ip, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
+
 		/* add solicited-node multicast address */
 		ipv62smcast(bcast, ip);
 		addselfcache(f, ifc, lifc, bcast, Rmulti);
-
-		sendnbrdisc = 1;
 	}
 
-	/* register the address on this network for address resolution */
-	if(isv4(ip) && ifc->m->areg != nil)
-		(*ifc->m->areg)(ifc, ip);
-
-out:
+done:
 	wunlock(ifc);
-	if(tentative && sendnbrdisc)
-		icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	poperror();
+
+	rlock(ifc);
+	ipifcregisteraddr(f, ifc, lifc, ip);
+	runlock(ifc);
+
 	return nil;
 }
 
 /*
  *  remove a logical interface from an ifc
- *  always called with ifc wlock'd
+ *	called with ifc wlock'd
  */
 static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
 {
-	Iplifc **l;
-	Fs *f;
+	Iplifc *lifc = *l;
+	Fs *f = ifc->conv->p->f;
 
-	f = ifc->conv->p->f;
-
-	/*
-	 *  find address on this interface and remove from chain.
-	 *  for pt to pt we actually specify the remote address as the
-	 *  addresss to remove.
-	 */
-	for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
-		;
-	if(*l == nil)
+	if(lifc == nil)
 		return "address not on this interface";
 	*l = lifc->next;
 
 	/* disassociate any addresses */
-	while(lifc->link)
+	while(lifc->link != nil)
 		remselfcache(f, ifc, lifc, lifc->link->self->a);
 
 	/* remove the route for this logical interface */
-	if(isv4(lifc->local))
-		v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
-	else {
-		v6delroute(f, lifc->remote, lifc->mask, 1);
+	if(lifc->onlink){
+		remroute(f, lifc->remote, lifc->mask,
+			lifc->local, IPallbits,
+			lifc->remote, lifc->type, ifc, tifc);
+		if(v6addrtype(lifc->local) != linklocalv6)
+			remroute(f, lifc->remote, lifc->mask,
+				lifc->local, IPnoaddr,
+				lifc->remote, lifc->type, ifc, tifc);
+	}
+
+	/* unregister proxy */
+	if(lifc->type & Rptpt){
+		if(lifc->type & Rproxy)
+			ipifcregisterproxy(f, ifc, lifc->remote, 0);
+		goto done;
+	}
+
+	/* remove route for all nodes multicast */
+	if((lifc->type & Rv4) == 0){
 		if(ipcmp(lifc->local, v6loopback) == 0)
-			/* remove route for all node multicast */
-			v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
-		else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
-			/* remove route for all link multicast */
-			v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+			remroute(f, v6allnodesN, v6allnodesNmask,
+				lifc->local, IPallbits,
+				v6allnodesN, Rmulti, ifc, tifc);
+
+		remroute(f, v6allnodesL, v6allnodesLmask,
+			lifc->local, IPallbits,
+			v6allnodesL, Rmulti, ifc, tifc);
 	}
 
+done:
 	free(lifc);
 	return nil;
-
 }
 
 /*
  *  remove an address from an interface.
- *  called with c locked
  */
 char*
 ipifcrem(Ipifc *ifc, char **argv, int argc)
 {
-	uchar ip[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar rem[IPaddrlen];
-	Iplifc *lifc;
-	char *rv;
+	uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+	Iplifc *lifc, **l;
+	char *err;
 
 	if(argc < 3)
 		return Ebadarg;
-
-	parseip(ip, argv[1]);
-	parseipmask(mask, argv[2]);
+	if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+		return Ebadip;
 	if(argc < 4)
 		maskip(ip, mask, rem);
-	else
-		parseip(rem, argv[3]);
+	else if(parseip(rem, argv[3]) == -1)
+		return Ebadip;
 
-	wlock(ifc);
-
 	/*
 	 *  find address on this interface and remove from chain.
 	 *  for pt to pt we actually specify the remote address as the
 	 *  addresss to remove.
 	 */
+	wlock(ifc);
+	l = &ifc->lifc;
 	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
-		if (memcmp(ip, lifc->local, IPaddrlen) == 0
-		&& memcmp(mask, lifc->mask, IPaddrlen) == 0
-		&& memcmp(rem, lifc->remote, IPaddrlen) == 0)
+		if(ipcmp(ip, lifc->local) == 0
+		&& ipcmp(mask, lifc->mask) == 0
+		&& ipcmp(rem, lifc->remote) == 0)
 			break;
+		l = &lifc->next;
 	}
-
-	rv = ipifcremlifc(ifc, lifc);
+	err = ipifcremlifc(ifc, l);
 	wunlock(ifc);
-	return rv;
+	return err;
 }
 
 /*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->addroute != nil)
-				m->addroute(ifc, vers, addr, mask, gate, type);
-		}
-	}
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
-	Medium *m;
-	Conv **cp, **e;
-	Ipifc *ifc;
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp != nil) {
-			ifc = (Ipifc*)(*cp)->ptcl;
-			m = ifc->m;
-			if(m == nil)
-				continue;
-			if(m->remroute != nil)
-				m->remroute(ifc, vers, addr, mask);
-		}
-	}
-}
-
-/*
  *  associate an address with the interface.  This wipes out any previous
  *  addresses.  This is a macro that means, remove all the old interfaces
  *  and add a new one.
@@ -679,170 +740,89 @@
 static char*
 ipifcconnect(Conv* c, char **argv, int argc)
 {
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 	char *err;
-	Ipifc *ifc;
 
-	ifc = (Ipifc*)c->ptcl;
-
-	if(ifc->m == nil)
-		 return "ipifc not yet bound to device";
-
-	if(waserror()){
-		wunlock(ifc);
-		nexterror();
-	}
 	wlock(ifc);
-	while(ifc->lifc){
-		err = ipifcremlifc(ifc, ifc->lifc);
-		if(err)
-			error(err);
-	}
+	while(ifc->lifc != nil)
+		ipifcremlifc(ifc, &ifc->lifc);
 	wunlock(ifc);
-	poperror();
 
 	err = ipifcadd(ifc, argv, argc, 0, nil);
-	if(err)
+	if(err != nil)
 		return err;
 
 	Fsconnected(c, nil);
-
 	return nil;
 }
 
 char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
 {
-	int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+	int i, argsleft;
+	uchar sendra, recvra;
+	Routerparams rp;
 
-	argsleft = argc - 1;
 	i = 1;
-
-	if(argsleft % 2 != 0)
+	argsleft = argc - 1;
+	if((argsleft % 2) != 0)
 		return Ebadarg;
 
+	sendra = ifc->sendra6;
+	recvra = ifc->recvra6;
+	rp = ifc->rp;
+
 	while (argsleft > 1) {
-		if(strcmp(argv[i],"recvra")==0)
-			ifc->recvra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"sendra")==0)
-			ifc->sendra6 = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"mflag")==0)
-			ifc->rp.mflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"oflag")==0)
-			ifc->rp.oflag = (atoi(argv[i+1]) != 0);
-		else if(strcmp(argv[i],"maxraint")==0)
-			ifc->rp.maxraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"minraint")==0)
-			ifc->rp.minraint = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"linkmtu")==0)
-			ifc->rp.linkmtu = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"reachtime")==0)
-			ifc->rp.reachtime = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"rxmitra")==0)
-			ifc->rp.rxmitra = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"ttl")==0)
-			ifc->rp.ttl = atoi(argv[i+1]);
-		else if(strcmp(argv[i],"routerlt")==0)
-			ifc->rp.routerlt = atoi(argv[i+1]);
+		if(strcmp(argv[i], "recvra") == 0)
+			recvra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "sendra") == 0)
+			sendra = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "mflag") == 0)
+			rp.mflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "oflag") == 0)
+			rp.oflag = atoi(argv[i+1]) != 0;
+		else if(strcmp(argv[i], "maxraint") == 0)
+			rp.maxraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "minraint") == 0)
+			rp.minraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "linkmtu") == 0)
+			rp.linkmtu = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "reachtime") == 0)
+			rp.reachtime = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "rxmitra") == 0)
+			rp.rxmitra = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "ttl") == 0)
+			rp.ttl = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "routerlt") == 0)
+			rp.routerlt = atoi(argv[i+1]);
 		else
-			return Ebadarg;	
+			return Ebadarg;
 
 		argsleft -= 2;
 		i += 2;
 	}
 
-	// consistency check
-	if(ifc->rp.maxraint < ifc->rp.minraint) {
-		ifc->rp.maxraint = vmax;
-		ifc->rp.minraint = vmin;
+	/* consistency check */
+	if(rp.maxraint < rp.minraint)
 		return Ebadarg;
-	}
 
-	return nil;
-}
+	ifc->rp = rp;
+	ifc->sendra6 = sendra;
+	ifc->recvra6 = recvra;
 
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->sendra6 = (i!=0);
 	return nil;
 }
 
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
-	int i;
-	
-	i = 0;
-	if(argc > 1)
-		i = atoi(argv[1]);
-	ifc->recvra6 = (i!=0);	
-	return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
-	uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
-	Iplifc *lifc;
-
-	if(argc == 2){
-		if((strcmp(argv[1], "show") == 0)){
-			shownataddr();
-			return nil;
-		}else if((strcmp(argv[1], "flush") == 0)){
-			flushnataddr();
-			return nil;
-		}else
-			return Ebadarg;
-	}
-
-	if(argc != 5)
-		return Ebadarg;
-
-	if (parseip(src, argv[2]) == -1)
-		return Ebadip;
-
-	if (parseipmask(mask, argv[3]) == -1)
-		return Ebadip;
-
-	if (parseip(dst, argv[4]) == -1)
-		return Ebadip;
-
-	if((lifc=iplocalonifc(ifc, dst)) == nil)
-		return Ebadip;
-
-	if(strcmp(argv[1], "add") == 0){
-		if(addnataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else if(strcmp(argv[1], "remove") == 0){
-		if(removenataddr(src, mask, lifc) != 0)
-			return Ebadarg;
-	}else
-		return Ebadarg;
-
-	return nil;
-}
-
 /*
  *  non-standard control messages.
- *  called with c locked.
  */
 static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
 {
-	Ipifc *ifc;
-	int i;
+	Ipifc *ifc = (Ipifc*)c->ptcl;
 
-	ifc = (Ipifc*)c->ptcl;
 	if(strcmp(argv[0], "add") == 0)
 		return ipifcadd(ifc, argv, argc, 0, nil);
-	else if(strcmp(argv[0], "bootp") == 0)
-		return bootp(ifc);
 	else if(strcmp(argv[0], "try") == 0)
 		return ipifcadd(ifc, argv, argc, 1, nil);
 	else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
 		return ipifcrem(ifc, argv, argc);
 	else if(strcmp(argv[0], "unbind") == 0)
 		return ipifcunbind(ifc);
-	else if(strcmp(argv[0], "joinmulti") == 0)
-		return ipifcjoinmulti(ifc, argv, argc);
-	else if(strcmp(argv[0], "leavemulti") == 0)
-		return ipifcleavemulti(ifc, argv, argc);
 	else if(strcmp(argv[0], "mtu") == 0)
-		return ipifcsetmtu(ifc, argv, argc);
-	else if(strcmp(argv[0], "reassemble") == 0){
-		ifc->reassemble = 1;
+		return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+	else if(strcmp(argv[0], "speed") == 0){
+		ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
 		return nil;
 	}
+	else if(strcmp(argv[0], "delay") == 0){
+		ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+		return nil;
+	}
 	else if(strcmp(argv[0], "iprouting") == 0){
-		i = 1;
-		if(argc > 1)
-			i = atoi(argv[1]);
-		iprouting(c->p->f, i);
+		iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
 		return nil;
 	}
-	else if(strcmp(argv[0], "addpref6") == 0)
-		return ipifcaddpref6(ifc, argv, argc);
-	else if(strcmp(argv[0], "setpar6") == 0)
-		return ipifcsetpar6(ifc, argv, argc);
-	else if(strcmp(argv[0], "sendra6") == 0)
-		return ipifcsendra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "recvra6") == 0)
-		return ipifcrecvra6(ifc, argv, argc);
-	else if(strcmp(argv[0], "nat") == 0)
-		return ipifcnat(ifc, argv, argc);
+	else if(strcmp(argv[0], "reflect") == 0){
+		ifc->reflect = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "reassemble") == 0){
+		ifc->reassemble = argc>1? atoi(argv[1]): 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "add6") == 0)
+		return ipifcadd6(ifc, argv, argc);
+	else if(strcmp(argv[0], "remove6") == 0)
+		return ipifcremove6(ifc, argv, argc);
+	else if(strcmp(argv[0], "ra6") == 0)
+		return ipifcra6(ifc, argv, argc);
 	return "unsupported ctl";
 }
 
+int
 ipifcstats(Proto *ipifc, char *buf, int len)
 {
 	return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
 	ipifc->nc = Maxmedia;
 	ipifc->ptclsize = sizeof(Ipifc);
 
-	f->ipifc = ipifc;			/* hack for ipifcremroute, findipifc, ... */
+	f->ipifc = ipifc;	/* hack for ipifcremroute, findipifc, ... */
 	f->self = smalloc(sizeof(Ipselftab));	/* hack for ipforme */
 
 	Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
 
 /*
  *  add to self routing cache
- *	called with c locked
  */
 static void
 addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
 {
-	Ipself *p;
 	Iplink *lp;
+	Ipself *p;
 	int h;
 
+	type |= (lifc->type & Rv4);
 	qlock(f->self);
+	if(waserror()){
+		qunlock(f->self);
+		nexterror();
+	}
 
 	/* see if the address already exists */
 	h = hashipa(a);
-	for(p = f->self->hash[h]; p; p = p->next)
-		if(memcmp(a, p->a, IPaddrlen) == 0)
+	for(p = f->self->hash[h]; p != nil; p = p->next)
+		if(ipcmp(a, p->a) == 0)
 			break;
 
 	/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
 	}
 
 	/* look for a link for this lifc */
-	for(lp = p->link; lp; lp = lp->selflink)
+	for(lp = p->link; lp != nil; lp = lp->selflink)
 		if(lp->lifc == lifc)
 			break;
 
@@ -962,18 +948,19 @@
 		lifc->link = lp;
 
 		/* add to routing table */
-		if(isv4(a))
-			v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
-		else
-			v6addroute(f, tifc, a, IPallbits, a, type);
+		addroute(f, a, IPallbits,
+			lifc->local, 
+			((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+				IPallbits : IPnoaddr,
+			a, type, ifc, tifc);
 
 		if((type & Rmulti) && ifc->m->addmulti != nil)
 			(*ifc->m->addmulti)(ifc, a, lifc->local);
-	} else {
+	} else
 		lp->ref++;
-	}
 
 	qunlock(f->self);
+	poperror();
 }
 
 /*
@@ -992,8 +979,8 @@
 	ulong now = NOW;
 
 	l = &freeiplink;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1000,10 +987,11 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
+
 static void
 ipselffree(Ipself *p)
 {
@@ -1011,8 +999,8 @@
 	ulong now = NOW;
 
 	l = &freeipself;
-	for(np = *l; np; np = *l){
-		if(np->expire > now){
+	for(np = *l; np != nil; np = *l){
+		if((long)(now - np->expire) >= 0){
 			*l = np->next;
 			free(np);
 			continue;
@@ -1019,7 +1007,7 @@
 		}
 		l = &np->next;
 	}
-	p->expire = now + 5000;		/* give other threads 5 secs to get out */
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
 	p->next = nil;
 	*l = p;
 }
@@ -1027,7 +1015,6 @@
 /*
  *  Decrement reference for this address on this link.
  *  Unlink from selftab if this is the last ref.
- *	called with c locked
  */
 static void
 remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
 
 	/* find the unique selftab entry */
 	l = &f->self->hash[hashipa(a)];
-	for(p = *l; p; p = *l){
+	for(p = *l; p != nil; p = *l){
 		if(ipcmp(p->a, a) == 0)
 			break;
 		l = &p->next;
@@ -1053,7 +1040,7 @@
 	 *  that matches the selftab entry
 	 */
 	l_lifc = &lifc->link;
-	for(link = *l_lifc; link; link = *l_lifc){
+	for(link = *l_lifc; link != nil; link = *l_lifc){
 		if(link->self == p)
 			break;
 		l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
 	 *  the one we just found
 	 */
 	l_self = &p->link;
-	for(link = *l_self; link; link = *l_self){
-		if(link == *(l_lifc))
+	for(link = *l_self; link != nil; link = *l_self){
+		if(link == *l_lifc)
 			break;
 		l_self = &link->selflink;
 	}
@@ -1079,9 +1066,20 @@
 	if(--(link->ref) != 0)
 		goto out;
 
-	if((p->type & Rmulti) && ifc->m->remmulti != nil)
-		(*ifc->m->remmulti)(ifc, a, lifc->local);
+	/* remove from routing table */
+	remroute(f, a, IPallbits,
+		lifc->local, 
+		((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+			IPallbits : IPnoaddr,
+		a, p->type, ifc, tifc);
 
+	if((p->type & Rmulti) && ifc->m->remmulti != nil){
+		if(!waserror()){
+			(*ifc->m->remmulti)(ifc, a, lifc->local);
+			poperror();
+		}
+	}
+
 	/* ref == 0, remove from both chains and free the link */
 	*l_lifc = link->lifclink;
 	*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
 	if(p->link != nil)
 		goto out;
 
-	/* remove from routing table */
-	if(isv4(a))
-		v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
-	else
-		v6delroute(f, a, IPallbits, 1);
-	
+	/* if null address, forget */
+	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+		f->self->acceptall = 0;
+
 	/* no more links, remove from hash and free */
 	*l = p->next;
 	ipselffree(p);
 
-	/* if IPnoaddr, forget */
-	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
-		f->self->acceptall = 0;
-
 out:
 	qunlock(f->self);
 }
 
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
-	Nstformat= 41,
-};
-
 long
 ipselftabread(Fs *f, char *cp, ulong offset, int n)
 {
@@ -1124,14 +1110,14 @@
 
 	m = 0;
 	off = offset;
-	qlock(f->self);
 	for(i = 0; i < NHASH && m < n; i++){
 		for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
 			nifc = 0;
-			for(link = p->link; link; link = link->selflink)
+			for(link = p->link; link != nil; link = link->selflink)
 				nifc++;
 			routetype(p->type, state);
-			m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+			m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+				p->a, nifc, state);
 			if(off > 0){
 				off -= m;
 				m = 0;
@@ -1138,30 +1124,15 @@
 			}
 		}
 	}
-	qunlock(f->self);
 	return m;
 }
 
-int
-iptentative(Fs *f, uchar *addr)
-{
- 	Ipself *p;
-
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
-		if(ipcmp(addr, p->a) == 0) {
-			return p->link->lifc->tentative;
-		}
-	}
-	return 0;
-}
-
 /*
  *  returns
  *	0		- no match
  *	Runi
  *	Rbcast
- *	Rmcast
+ *	Rmulti
  */
 int
 ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
 {
 	Ipself *p;
 
-	p = f->self->hash[hashipa(addr)];
-	for(; p; p = p->next){
+	for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
 		if(ipcmp(addr, p->a) == 0)
-			return p->type;
-	}
+			return p->type & (Runi|Rbcast|Rmulti);
 
 	/* hack to say accept anything */
 	if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
  *  return nil.
  */
 Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
 {
+	uchar gnet[IPaddrlen];
+	int spec, xspec;
 	Ipifc *ifc, *x;
 	Iplifc *lifc;
-	Conv **cp, **e;
-	uchar gnet[IPaddrlen];
-	uchar xmask[IPaddrlen];
+	Conv **cp;
 
-	x = nil; memset(xmask, 0, IPaddrlen);
-
-	/* find most specific match */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
-
+	x = nil;
+	xspec = 0;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!canrlock(ifc))
+			continue;
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if(type & Runi){
+				if(ipcmp(remote, lifc->local) == 0){
+				Found:
+					runlock(ifc);
+					return ifc;
+				}
+			} else if(type & (Rbcast|Rmulti)) {
+				if(ipcmp(local, lifc->local) == 0)
+					goto Found;
+			}
 			maskip(remote, lifc->mask, gnet);
 			if(ipcmp(gnet, lifc->net) == 0){
-				if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+				spec = comprefixlen(remote, lifc->local, IPaddrlen);
+				if(spec > xspec){
 					x = ifc;
-					ipmove(xmask, lifc->mask);
+					xspec = spec;
 				}
 			}
 		}
+		runlock(ifc);
 	}
-	if(x != nil)
-		return x;
+	return x;
+}
 
-	/* for now for broadcast and multicast, just use first interface */
-	if(type & (Rbcast|Rmulti)){
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == 0)
-				continue;
-			ifc = (Ipifc*)(*cp)->ptcl;
-			if(ifc->lifc != nil)
-				return ifc;
-		}
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+	uchar ip[IPaddrlen];
+	Conv *c;
+	char *p;
+	long x;
+
+	x = strtol(s, &p, 10);
+	if(p > s && *p == '\0'){
+		if(x < 0)
+			return nil;
+		if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+			return (Ipifc*)c->ptcl;
 	}
-		
+	if(parseip(ip, s) != -1)
+		return findipifc(f, ip, ip, Runi);
 	return nil;
 }
 
-enum {
-	unknownv6,
-	multicastv6,
-	unspecifiedv6,
-	linklocalv6,
-	sitelocalv6,
-	globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
-	if(isv6global(addr))
-		return globalv6;
-	if(islinklocal(addr))
-		return linklocalv6;
-	if(isv6mcast(addr))
-		return multicastv6;
-	if(issitelocal(addr))
-		return sitelocalv6;
-	return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ *  find "best" (global > link local > unspecified)
+ *  local address; address must be current.
+ */
 static void
 findprimaryipv6(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
-	Iplifc *lifc;
+	ulong now = NOW/1000;
 	int atype, atypel;
+	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	ipmove(local, v6Unspecified);
 	atype = unspecifiedv6;
 
-	/* find "best" (global > sitelocal > link local > unspecified)
-	 * local address; address must be current */
-
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
 			atypel = v6addrtype(lifc->local);
 			if(atypel > atype)
-			if(v6addrcurr(lifc)) {
+			if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
 				ipmove(local, lifc->local);
 				atype = atypel;
-				if(atype == globalv6)
+				if(atype == globalv6){
+					runlock(ifc);
 					return;
+				}
 			}
 		}
+		runlock(ifc);
 	}
 }
 
 /*
- *  returns first ip address configured
+ *  returns first v4 address configured
  */
 static void
 findprimaryipv4(Fs *f, uchar *local)
 {
-	Conv **cp, **e;
-	Ipifc *ifc;
 	Iplifc *lifc;
+	Ipifc *ifc;
+	Conv **cp;
 
 	/* find first ifc local address */
-	e = &f->ipifc->conv[f->ipifc->nc];
-	for(cp = f->ipifc->conv; cp < e; cp++){
-		if(*cp == 0)
-			continue;
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
 		ifc = (Ipifc*)(*cp)->ptcl;
-		if((lifc = ifc->lifc) != nil){
-			ipmove(local, lifc->local);
-			return;
+		rlock(ifc);
+		for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+			if((lifc->type & Rv4) != 0){
+				ipmove(local, lifc->local);
+				runlock(ifc);
+				return;
+			}
 		}
+		runlock(ifc);
 	}
+	ipmove(local, IPnoaddr);
 }
 
 /*
- *  find the local address 'closest' to the remote system, copy it to
- *  local and return the ifc for that address
+ * ipv4local, ipv6local:
+ *  return a local address associated with an interface close to remote.
+ *  prefixlen is the number of leading bits in the local address that
+ *  have to match an interface address to be considered. this is used
+ *  by source specific routes to filter on the source address.
+ *  return non-zero on success or zero when no address was found.
+ *
+ *  for ipv4local, all addresses are 4 byte format.
  */
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
-	Ipifc *ifc;
 	Iplifc *lifc;
-	Route *r;
-	uchar gate[IPaddrlen];
-	uchar gnet[IPaddrlen];
-	int version;
-	int atype = unspecifiedv6, atypel = unknownv6;
+	int a, b;
 
-	USED(atype);
-	USED(atypel);
-	qlock(f->ipifc);
-	r = v6lookup(f, remote, nil);
- 	version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-	
-	if(r != nil){
-		ifc = r->ifc;
-		if(r->type & Rv4)
-			v4tov6(gate, r->v4.gate);
-		else {
-			ipmove(gate, r->v6.gate);
-			ipmove(local, v6Unspecified);
-		}
+	b = -1;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+			continue;
 
-		/* find ifc address closest to the gateway to use */
-		switch(version) {
-		case V4:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0){
-					ipmove(local, lifc->local);
-					goto out;
-				}
-			}
-			break;
-		case V6:
-			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-				atypel = v6addrtype(lifc->local);
-				maskip(gate, lifc->mask, gnet);
-				if(ipcmp(gnet, lifc->net) == 0)
-				if(atypel > atype)
-				if(v6addrcurr(lifc)) {
-					ipmove(local, lifc->local);
-					atype = atypel;
-					if(atype == globalv6)
-						break;
-				}
-			}
-			if(atype > unspecifiedv6)
-				goto out;
-			break;
-		default:
-			panic("findlocalip: version %d", version);
+		if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+			continue;
+		
+		a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+		if(a > b){
+			b = a;
+			memmove(local, lifc->local+IPv4off, IPv4addrlen);
 		}
 	}
-
-	switch(version){
-	case V4:
-		findprimaryipv4(f, local);
-		break;
-	case V6:
-		findprimaryipv6(f, local);
-		break;
-	default:
-		panic("findlocalip2: version %d", version);
-	}
-
-out:
-	qunlock(f->ipifc);
+	return b >= 0;
 }
 
-/*
- *  return first v4 address associated with an interface
- */
 int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
 {
+	struct {
+		int	atype;
+		int	deprecated;
+		int	comprefixlen;
+	} a, b;
+	int atype;
+	ulong now;
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(isv4(lifc->local)){
-			memmove(addr, lifc->local+IPv4off, IPv4addrlen);
-			return 1;
-		}
+	if(isv4(remote)){
+		memmove(local, v4prefix, IPv4off);
+		if((prefixlen -= IPv4off*8) < 0)
+			prefixlen = 0;
+		return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
 	}
-	return 0;
-}
 
-/*
- *  return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
-	Iplifc *lifc;
+	atype = v6addrtype(remote);
+	b.atype = unknownv6;
+	b.deprecated = 1;
+	b.comprefixlen = 0;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local) && !(lifc->tentative)){
-			ipmove(addr, lifc->local);
-			return 1;
+	now = NOW/1000;
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		if(lifc->tentative)
+			continue;
+
+		if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+			continue;
+
+		a.atype = v6addrtype(lifc->local);
+		a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+		a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+		/* prefer appropriate scope */
+		if(a.atype != b.atype){
+			if(a.atype > b.atype && b.atype < atype ||
+			   a.atype < b.atype && b.atype > atype)
+				goto Good;
+			continue;
 		}
+		/* prefer non-deprecated addresses */
+		if(a.deprecated != b.deprecated){
+			if(b.deprecated)
+				goto Good;
+			continue;
+		}
+		/* prefer longer common prefix */
+		if(a.comprefixlen != b.comprefixlen){
+			if(a.comprefixlen > b.comprefixlen)
+				goto Good;
+			continue;
+		}
+		continue;
+	Good:
+		b = a;
+		ipmove(local, lifc->local);
 	}
-	return 0;
+
+	return b.atype >= atype;
 }
 
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ *  find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
 {
-	Iplifc *lifc;
-
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		if(!isv4(lifc->local)){
-			ipmove(addr, lifc->local);
-			return SRC_UNI;
-		}
+	if(isv4(remote)) {
+		memmove(local, v4prefix, IPv4off);
+		if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+			findprimaryipv4(f, local);
+	} else {
+		if(v6source(f, remote, local) == nil)
+			findprimaryipv6(f, local);
 	}
-	return SRC_UNSPEC;
 }
 
 /*
@@ -1444,13 +1396,28 @@
 {
 	Iplifc *lifc;
 
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
 		if(ipcmp(ip, lifc->local) == 0)
 			return lifc;
+
 	return nil;
 }
 
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+	uchar net[IPaddrlen];
+	Iplifc *lifc;
 
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+		maskip(ip, lifc->mask, net);
+		if(ipcmp(net, lifc->remote) == 0)
+			return lifc;
+	}
+	return nil;
+}
+
+
 /*
  *  See if we're proxying for this address on this interface
  */
@@ -1458,24 +1425,13 @@
 ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
 {
 	Route *r;
-	uchar net[IPaddrlen];
-	Iplifc *lifc;
 
 	/* see if this is a direct connected pt to pt address */
-	r = v6lookup(f, ip, nil);
-	if(r == nil)
+	r = v6lookup(f, ip, ip, nil);
+	if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
 		return 0;
-	if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
-		return 0;
 
-	/* see if this is on the right interface */
-	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
-		maskip(ip, lifc->mask, net);
-		if(ipcmp(net, lifc->remote) == 0)
-			return 1;
-	}
-
-	return 0;
+	return ipremoteonifc(ifc, ip) != nil;
 }
 
 /*
@@ -1487,73 +1443,53 @@
 	if(isv4(ip)){
 		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
 			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
 	}
+	else if(ip[0] == 0xff)
+		return V6;
 	return 0;
 }
 
-int
-ipisbm(uchar *ip)
-{
-	if(isv4(ip)){
-		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
-			return V4;
-		if(ipcmp(ip, IPv4bcast) == 0)
-			return V4;
-	} else {
-		if(ip[0] == 0xff)
-			return V6;
-	}
-	return 0;
-}
-
-
 /*
- *  add a multicast address to an interface, called with c locked
+ *  add a multicast address to an interface.
  */
 void
 ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
 {
-	Ipifc *ifc;
-	Iplifc *lifc;
-	Conv **p;
 	Ipmulti *multi, **l;
+	Iplifc *lifc;
+	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	if(isv4(ma) != isv4(ia))
+		error("incompatible multicast/interface ip address");
+
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			return;		/* it's already there */
 
-	multi = *l = smalloc(sizeof(*multi));
-	ipmove(multi->ma, ma);
-	ipmove(multi->ia, ia);
-	multi->next = nil;
-
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-		ifc = (Ipifc*)(*p)->ptcl;
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
 		if(waserror()){
-			wunlock(ifc);
+			runlock(ifc);
 			nexterror();
 		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
-				addselfcache(f, ifc, lifc, ma, Rmulti);
-		wunlock(ifc);
+		if((lifc = iplocalonifc(ifc, ia)) != nil)
+			addselfcache(f, ifc, lifc, ma, Rmulti);
+		runlock(ifc);
 		poperror();
 	}
+
+	multi = smalloc(sizeof(*multi));
+	ipmove(multi->ma, ma);
+	ipmove(multi->ia, ia);
+	multi->next = nil;
+	*l = multi;
 }
 
 
 /*
- *  remove a multicast address from an interface, called with c locked
+ *  remove a multicast address from an interface.
  */
 void
 ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
 {
 	Ipmulti *multi, **l;
 	Iplifc *lifc;
-	Conv **p;
 	Ipifc *ifc;
 	Fs *f;
 
-	f = c->p->f;
-	
-	for(l = &c->multi; *l; l = &(*l)->next)
-		if(ipcmp(ma, (*l)->ma) == 0)
-		if(ipcmp(ia, (*l)->ia) == 0)
+	for(l = &c->multi; *l != nil; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
 			break;
 
 	multi = *l;
@@ -1576,161 +1508,101 @@
 		return; 	/* we don't have it open */
 
 	*l = multi->next;
+	multi->next = nil;
 
-	for(p = f->ipifc->conv; *p; p++){
-		if((*p)->inuse == 0)
-			continue;
-
-		ifc = (Ipifc*)(*p)->ptcl;
-		if(waserror()){
-			wunlock(ifc);
-			nexterror();
-		}
-		wlock(ifc);
-		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
-			if(ipcmp(ia, lifc->local) == 0)
+	f = c->p->f;
+	if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+		rlock(ifc);
+		if(!waserror()){
+			if((lifc = iplocalonifc(ifc, ia)) != nil)
 				remselfcache(f, ifc, lifc, ma);
-		wunlock(ifc);
-		poperror();
+			poperror();
+		}
+		runlock(ifc);
 	}
-
 	free(multi);
 }
 
-/*
- *  make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
 {
-	USED(ifc, argv, argc);
-	return nil;
+	if(waserror()){
+		print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+		return;
+	}
+	if(ifc->m != nil && ifc->m->areg != nil)
+		(*ifc->m->areg)(f, ifc, lifc, ip);
+	poperror();
 }
 
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
-	USED(ifc, argv, argc);
-	return nil;
-}
-
 static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
 {
-	Conv **cp, **e;
-	Ipifc *nifc;
+	uchar a[IPaddrlen];
 	Iplifc *lifc;
-	Medium *m;
-	uchar net[IPaddrlen];
+	Ipifc *nifc;
+	Conv **cp;
 
-	/* register the address on any network that will proxy for us */
-	e = &f->ipifc->conv[f->ipifc->nc];
+	/* register the address on any interface that will proxy for the ip */
+	for(cp = f->ipifc->conv; *cp != nil; cp++){
+		nifc = (Ipifc*)(*cp)->ptcl;
+		if(nifc == ifc || !canrlock(nifc))
+			continue;
 
-	if(!isv4(ip)) { // V6
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->addmulti == nil) {
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
-					ipv62smcast(net, ip);
-					addselfcache(f, nifc, lifc, net, Rmulti);
-					arpenter(f, V6, ip, nifc->mac, 6, 0);
-					//(*m->addmulti)(nifc, net, ip);
-					break;
-				}
-			}
+		if(nifc->m == nil
+		|| (lifc = ipremoteonifc(nifc, ip)) == nil
+		|| (lifc->type & Rptpt) != 0
+		|| waserror()){
 			runlock(nifc);
+			continue;
 		}
-		return;
-	}
-	else { // V4
-		for(cp = f->ipifc->conv; cp < e; cp++){
-			if(*cp == nil)
-				continue;
-			nifc = (Ipifc*)(*cp)->ptcl;
-			if(nifc == ifc)
-				continue;
-	
-			rlock(nifc);
-			m = nifc->m;
-			if(m == nil || m->areg == nil){
-				runlock(nifc);
-				continue;
-			}
-			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
-				maskip(ip, lifc->mask, net);
-				if(ipcmp(net, lifc->remote) == 0){
-					(*m->areg)(nifc, ip);
-					break;
-				}
-			}
-			runlock(nifc);
+		if((lifc->type & Rv4) == 0){
+			/* add solicited-node multicast addr */
+			ipv62smcast(a, ip);
+			if(add)
+				addselfcache(f, nifc, lifc, a, Rmulti);
+			else
+				remselfcache(f, nifc, lifc, a);
 		}
+		if(add)
+			ipifcregisteraddr(f, nifc, lifc, ip);
+		runlock(nifc);
+		poperror();
 	}
 }
 
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
-	Route *r;
-
-	r = v6lookup(f, v6Unspecified, nil);
-	if(r!=nil)
-	if(!(force) && (strcmp(r->tag,"ra")!=0))	// route entries generated
-		return;			// by all other means take
-					// precedence over router annc
-
-	v6delroute(f, v6Unspecified, v6Unspecified, 1);
-	v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
-	Ngates = 3,
-};
-
 char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
 {
-	uchar	onlink = 1;
-	uchar	autoflag = 1;
-	long 	validlt = 0xffffffff;
-	long 	preflt = 0xffffffff;
-	long	origint = NOW / 10^3;
-	uchar	prefix[IPaddrlen];
-	int	plen = 64;
-	Iplifc	*lifc;
-	char	addr[40], preflen[6];
-	char	*params[3];
+	int plen = 64;
+	char addr[40], preflen[6];
+	char *params[3];
+	uchar prefix[IPaddrlen];
+	Iplifc lifc;
+	Medium *m;
 
+	lifc.onlink = 1;
+	lifc.autoflag = 1;
+	lifc.validlt = lifc.preflt = ~0UL;
+	lifc.origint = NOW / 1000;
+
 	switch(argc) {
 	case 7:
-		preflt = atoi(argv[6]);
+		lifc.preflt = strtoul(argv[6], 0, 10);
 		/* fall through */
 	case 6:
-		validlt = atoi(argv[5]);
+		lifc.validlt = strtoul(argv[5], 0, 10);
 		/* fall through */
 	case 5:
-		autoflag =  atoi(argv[4]);
+		lifc.autoflag = atoi(argv[4]) != 0;
 		/* fall through */
 	case 4:
-		onlink = atoi(argv[3]);
+		lifc.onlink = atoi(argv[3]) != 0;
 		/* fall through */
 	case 3:
 		plen = atoi(argv[2]);
+		/* fall through */
 	case 2:
 		break;
 	default:
@@ -1737,25 +1609,16 @@
 		return Ebadarg;
 	}
 
-	if((parseip(prefix, argv[1])!=6) ||
-	 	(validlt < preflt) ||
-		(plen < 0) || (plen > 64) ||
-		(islinklocal(prefix))
-	)
+	if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+	    plen > 64 || islinklocal(prefix))
 		return Ebadarg;
 
-	lifc = smalloc(sizeof(Iplifc));
-	lifc->onlink = (onlink!=0);
-	lifc->autoflag = (autoflag!=0);
-	lifc->validlt = validlt;
-	lifc->preflt = preflt;
-	lifc->origint = origint;
+	/* issue "add" ctl msg for v6 link-local addr and prefix len */
+	m = ifc->m;
+	if(m == nil || m->pref2addr == nil)
+		return Eunbound;
+	(*m->pref2addr)(prefix, ifc->mac);	/* mac → v6 link-local addr */
 
-	if(ifc->m->pref2addr!=nil)
-		ifc->m->pref2addr(prefix, ifc->mac);
-	else
-		return Ebadarg;
-	
 	sprint(addr, "%I", prefix);
 	sprint(preflen, "/%d", plen);
 	params[0] = "add";
@@ -1762,6 +1625,28 @@
 	params[1] = addr;
 	params[2] = preflen;
 
-	return ipifcadd(ifc, params, 3, 0, lifc);
+	return ipifcadd(ifc, params, 3, 0, &lifc);
 }
 
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+	Iplifc *lifc, **l;
+	ulong now;
+
+	if(argc != 1)
+		return Ebadarg;
+
+	wlock(ifc);
+	now = NOW/1000;
+	for(l = &ifc->lifc; (lifc = *l) != nil;) {
+		if((lifc->type & Rv4) == 0)
+		if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+			if(ipifcremlifc(ifc, l) == nil)
+				continue;
+		l = &lifc->next;
+	}
+	wunlock(ifc);
+
+	return nil;
+}
diff -u a/os/ip//ipmux.c b/os/ip//ipmux.c
--- a/os/ip//ipmux.c
+++ b/os/ip//ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -6,30 +9,14 @@
 #include "../port/error.h"
 
 #include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
 
 typedef struct Ipmuxrock  Ipmuxrock;
 typedef struct Ipmux      Ipmux;
-typedef struct Ip6hdr     Ip6hdr;
 
 enum
 {
-	IPHDR		= 20,		/* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
-	uchar vcf[4];		/* version, class label, and flow label */ 
-	uchar ploadlen[2];	/* payload length */
-	uchar proto;		/* next header, i.e. proto */
-	uchar ttl;		/* hop limit, i.e. ttl */
-	uchar src[16];		/* IP source */
-	uchar dst[16];		/* IP destination */
-};
-
-
-enum
-{
+	Tver,
 	Tproto,
 	Tdata,
 	Tiph,
@@ -36,28 +23,8 @@
 	Tdst,
 	Tsrc,
 	Tifc,
-
-	Cother = 0,
-	Cbyte,		/* single byte */
-	Cmbyte,		/* single byte with mask */
-	Cshort,		/* single short */
-	Cmshort,	/* single short with mask */
-	Clong,		/* single long */
-	Cmlong,		/* single long with mask */
-	Cifc,
-	Cmifc,
 };
 
-char *ftname[] = 
-{
-[Tproto]	"proto",
-[Tdata]		"data",
-[Tiph]	 	"iph",
-[Tdst]		"dst",
-[Tsrc]		"src",
-[Tifc]		"ifc",
-};
-
 /*
  *  a node in the decision tree
  */
@@ -66,16 +33,12 @@
 	Ipmux	*yes;
 	Ipmux	*no;
 	uchar	type;		/* type of field(Txxxx) */
-	uchar	ctype;		/* tupe of comparison(Cxxxx) */
 	uchar	len;		/* length in bytes of item to compare */
 	uchar	n;		/* number of items val points to */
-	short	off;		/* offset of comparison */
-	short	eoff;		/* end offset of comparison */
-	uchar	skiphdr;	/* should offset start after ipheader */
+	int	off;		/* offset of comparison */
 	uchar	*val;
 	uchar	*mask;
 	uchar	*e;		/* val+n*len*/
-
 	int	ref;		/* so we can garbage collect */
 	Conv	*conv;
 };
@@ -90,6 +53,7 @@
 
 static int	ipmuxsprint(Ipmux*, int, char*, int);
 static void	ipmuxkick(void *x);
+static void	ipmuxfree(Ipmux *f);
 
 static char*
 skipwhite(char *p)
@@ -122,27 +86,33 @@
 	Ipmux *f;
 
 	p = skipwhite(p);
-	if(strncmp(p, "dst", 3) == 0){
+	if(strncmp(p, "ver", 3) == 0){
+		type = Tver;
+		off = 0;
+		len = 1;
+		p += 3;
+	}
+	else if(strncmp(p, "dst", 3) == 0){
 		type = Tdst;
-		off = offsetof(Ip4hdr, dst[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, dst[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "src", 3) == 0){
 		type = Tsrc;
-		off = offsetof(Ip4hdr, src[0]);
-		len = IPv4addrlen;
+		off = offsetof(Ip6hdr, src[0]);
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "ifc", 3) == 0){
 		type = Tifc;
-		off = -IPv4addrlen;
-		len = IPv4addrlen;
+		off = -IPaddrlen;
+		len = IPaddrlen;
 		p += 3;
 	}
 	else if(strncmp(p, "proto", 5) == 0){
 		type = Tproto;
-		off = offsetof(Ip4hdr, proto);
+		off = offsetof(Ip6hdr, proto);
 		len = 1;
 		p += 5;
 	}
@@ -160,7 +130,7 @@
 			return nil;
 		p++;
 		off = strtoul(p, &p, 0);
-		if(off < 0 || off > (64-IPHDR))
+		if(off < 0)
 			return nil;
 		p = skipwhite(p);
 		if(*p != ':')
@@ -189,11 +159,6 @@
 	f->mask = nil;
 	f->n = 1;
 	f->ref = 1;
-	if(type == Tdata)
-		f->skiphdr = 1;
-	else
-		f->skiphdr = 0;
-
 	return f;	
 }
 
@@ -229,7 +194,7 @@
 static Ipmux*
 parsemux(char *p)
 {
-	int n, nomask;
+	int n;
 	Ipmux *f;
 	char *val;
 	char *mask;
@@ -247,7 +212,7 @@
 		goto parseerror;
 
 	/* parse mask */
-	mask = follows(val, '&');
+	mask = follows(p, '&');
 	if(mask != nil){
 		switch(f->type){
 		case Tsrc:
@@ -254,7 +219,7 @@
 		case Tdst:
 		case Tifc:
 			f->mask = smalloc(f->len);
-			v4parseip(f->mask, mask);
+			parseipmask(f->mask, mask, 0);
 			break;
 		case Tdata:
 		case Tiph:
@@ -264,15 +229,13 @@
 		default:
 			goto parseerror;
 		}
-		nomask = 0;
-	} else {
-		nomask = 1;
+	} else if(f->type == Tver){
 		f->mask = smalloc(f->len);
-		memset(f->mask, 0xff, f->len);
+		f->mask[0] = 0xF0;
 	}
 
 	/* parse vals */
-	f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+	f->n = getfields(val, vals, nelem(vals), 1, "|");
 	if(f->n == 0)
 		goto parseerror;
 	f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
 	v = f->val;
 	for(n = 0; n < f->n; n++){
 		switch(f->type){
+		case Tver:
+			if(f->n != 1)
+				goto parseerror;
+			if(strcmp(vals[n], "6") == 0)
+				*v = IP_VER6;
+			else if(strcmp(vals[n], "4") == 0)
+				*v = IP_VER4;
+			else
+				goto parseerror;
+			break;
 		case Tsrc:
 		case Tdst:
 		case Tifc:
-			v4parseip(v, vals[n]);
+			if(parseip(v, vals[n]) == -1)
+				goto parseerror;
 			break;
 		case Tproto:
 		case Tdata:
@@ -292,34 +266,11 @@
 		}
 		v += f->len;
 	}
-
-	f->eoff = f->off + f->len;
 	f->e = f->val + f->n*f->len;
-	f->ctype = Cother;
-	if(f->n == 1){
-		switch(f->len){
-		case 1:
-			f->ctype = nomask ? Cbyte : Cmbyte;
-			break;
-		case 2:
-			f->ctype = nomask ? Cshort : Cmshort;
-			break;
-		case 4:
-			if(f->type == Tifc)
-				f->ctype = nomask ? Cifc : Cmifc;
-			else
-				f->ctype = nomask ? Clong : Cmlong;
-			break;
-		}
-	}
 	return f;
 
 parseerror:
-	if(f->mask)
-		free(f->mask);
-	if(f->val)
-		free(f->val);
-	free(f);
+	ipmuxfree(f);
 	return nil;
 }
 
@@ -342,8 +293,7 @@
 		return n;
 
 	/* compare offsets, call earlier ones more specific */
-	n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) - 
-		(b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+	n = a->off - b->off;
 	if(n != 0)
 		return n;
 
@@ -413,6 +363,10 @@
 	*nf = *f;
 	nf->no = ipmuxcopy(f->no);
 	nf->yes = ipmuxcopy(f->yes);
+	if(f->mask != nil){
+		nf->mask = smalloc(f->len);
+		memmove(nf->mask, f->mask, f->len);
+	}
 	nf->val = smalloc(f->n*f->len);
 	nf->e = nf->val + f->len*f->n;
 	memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
 static void
 ipmuxfree(Ipmux *f)
 {
-	if(f->val != nil)
-		free(f->val);
+	if(f == nil)
+		return;
+	free(f->val);
+	free(f->mask);
 	free(f);
 }
 
@@ -432,10 +388,8 @@
 {
 	if(f == nil)
 		return;
-	if(f->no != nil)
-		ipmuxfree(f->no);
-	if(f->yes != nil)
-		ipmuxfree(f->yes);
+	ipmuxfree(f->no);
+	ipmuxfree(f->yes);
 	ipmuxfree(f);
 }
 
@@ -510,6 +464,8 @@
 		return ipmuxremove(&ft->no, f);
 	}
 
+	ipmuxremove(&ft->no, f->no);
+
 	/* we found a match */
 	if(--(ft->ref) == 0){
 		/*
@@ -531,8 +487,55 @@
 }
 
 /*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+	int i, n;
+
+	if(f == nil)
+		return nil;
+
+	switch(f->type){
+	case Tproto:
+		f->off = offsetof(Ip4hdr, proto);
+		break;
+	case Tdst:
+		f->off = offsetof(Ip4hdr, dst[0]);
+		if(0){
+	case Tsrc:
+		f->off = offsetof(Ip4hdr, src[0]);
+		}
+		if(f->len != IPaddrlen)
+			break;
+		n = 0;
+		for(i = 0; i < f->n; i++){
+			if(isv4(f->val + i*IPaddrlen)){
+				memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+				n++;
+			}
+		}
+		if(n == 0){
+			ipmuxtreefree(f);
+			return nil;
+		}
+		f->n = n;
+		f->len = IPv4addrlen;
+		if(f->mask != nil)
+			memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+	}
+	f->e = f->val + f->n*f->len;
+
+	f->yes = ipmuxconv4(f->yes);
+	f->no = ipmuxconv4(f->no);
+
+	return f;
+}
+
+/*
  *  connection request is a semi separated list of filters
- *  e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *  e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
  *
  *  there's no protection against overlapping specs.
  */
@@ -568,6 +571,18 @@
 		return Ebadarg;
 	mux->conv = c;
 
+	if(chain->type != Tver) {
+		char ver6[] = "ver=6";
+		mux = parsemux(ver6);
+		mux->yes = chain;
+		mux->no = ipmuxcopy(chain);
+		chain = mux;
+	}
+	if(*chain->val == IP_VER4)
+		chain->yes = ipmuxconv4(chain->yes);
+	else
+		chain->no = ipmuxconv4(chain->no);
+
 	/* save a copy of the chain so we can later remove it */
 	mux = ipmuxcopy(chain);
 	r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
 	Block *bp;
 
 	bp = qget(c->wq);
-	if(bp == nil)
-		return;
-	else {
+	if(bp != nil) {
 		Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
-		if((ih4->vihl)&0xF0 != 0x60)
+
+		if((ih4->vihl & 0xF0) != IP_VER6)
 			ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
-		else {
-			Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
-			ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
-		}
+		else
+			ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
 	}
 }
 
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+	int i;
+
+	if(m == nil)
+		return memcmp(v, c, n) != 0;
+
+	for(i = 0; i < n; i++)
+		if((v[i] & m[i]) != c[i])
+			return 1;
+	return 0;
+}
+
 static void
 ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
 {
-	int len, hl;
 	Fs *f = p->f;
-	uchar *m, *h, *v, *e, *ve, *hp;
 	Conv *c;
+	Iplifc *lifc;
 	Ipmux *mux;
-	Ip4hdr *ip;
+	uchar *v;
+	Ip4hdr *ip4;
 	Ip6hdr *ip6;
+	int off, hl;
 
-	ip = (Ip4hdr*)bp->rp;
-	hl = (ip->vihl&0x0F)<<2;
+	ip4 = (Ip4hdr*)bp->rp;
+	if((ip4->vihl & 0xF0) == IP_VER4) {
+		hl = (ip4->vihl&0x0F)<<2;
+		ip6 = nil;
+	} else {
+		hl = IP6HDR;
+		ip6 = (Ip6hdr*)ip4;
+	}
 
 	if(p->priv == nil)
 		goto nomatch;
 
-	h = bp->rp;
-	len = BLEN(bp);
+	c = nil;
+	lifc = nil;
 
-	/* run the v4 filter */
+	/* run the filter */
 	rlock(f);
-	c = nil;
 	mux = f->ipmux->priv;
 	while(mux != nil){
-		if(mux->eoff > len){
-			mux = mux->no;
-			continue;
-		}
-		hp = h + mux->off + ((int)mux->skiphdr)*hl;
-		switch(mux->ctype){
-		case Cbyte:
-			if(*mux->val == *hp)
-				goto yes;
+		switch(mux->type){
+		case Tifc:
+			if(mux->len != IPaddrlen)
+				goto no;
+			for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+				for(v = mux->val; v < mux->e; v += IPaddrlen)
+					if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+						goto yes;
+			goto no;
+		case Tdata:
+			off = hl;
 			break;
-		case Cmbyte:
-			if((*hp & *mux->mask) == *mux->val)
-				goto yes;
-			break;
-		case Cshort:
-			if(*((ushort*)mux->val) == *(ushort*)hp)
-				goto yes;
-			break;
-		case Cmshort:
-			if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
-				goto yes;
-			break;
-		case Clong:
-			if(*((ulong*)mux->val) == *(ulong*)hp)
-				goto yes;
-			break;
-		case Cmlong:
-			if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
-		case Cifc:
-			if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
-				goto yes;
-			break;
-		case Cmifc:
-			if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
-				goto yes;
-			break;
 		default:
-			v = mux->val;
-			for(e = mux->e; v < e; v = ve){
-				m = mux->mask;
-				hp = h + mux->off;
-				for(ve = v + mux->len; v < ve; v++){
-					if((*hp++ & *m++) != *v)
-						break;
-				}
-				if(v == ve)
-					goto yes;
-			}
+			off = 0;
+			break;
 		}
+		off += mux->off;
+		if(off < 0 || off + mux->len > BLEN(bp))
+			goto no;
+		for(v = mux->val; v < mux->e; v += mux->len)
+			if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+				goto yes;
+no:
 		mux = mux->no;
 		continue;
 yes:
@@ -743,28 +747,24 @@
 	if(c != nil){
 		/* tack on interface address */
 		bp = padblock(bp, IPaddrlen);
-		ipmove(bp->rp, ifc->lifc->local);
-		bp = concatblock(bp);
-		if(bp != nil)
-			if(qpass(c->rq, bp) < 0)
-				print("Q");
+		if(lifc == nil)
+			lifc = ifc->lifc;
+		ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+		qpass(c->rq, concatblock(bp));
 		return;
 	}
 
 nomatch:
 	/* doesn't match any filter, hand it to the specific protocol handler */
-	ip = (Ip4hdr*)bp->rp;
-	if((ip->vihl&0xF0)==0x40) {
-		p = f->t2p[ip->proto];
-	} else {
-		ip6 = (Ip6hdr*)bp->rp;
+	if(ip6 != nil)
 		p = f->t2p[ip6->proto];
-	}
-	if(p && p->rcv)
-		(*p->rcv)(p, ifc, bp);
 	else
-		freeblist(bp);
-	return;
+		p = f->t2p[ip4->proto];
+	if(p != nil && p->rcv != nil){
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+	freeblist(bp);
 }
 
 static int
@@ -780,11 +780,14 @@
 		n += snprint(buf+n, len-n, "\n");
 		return n;
 	}
-	n += snprint(buf+n, len-n, "h[%d:%d]&", 
-               mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])), 
-               mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
-	for(i = 0; i < mux->len; i++)
-		n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	n += snprint(buf+n, len-n, "%s[%d:%d]", 
+		mux->type == Tdata ? "data": "iph",
+		mux->off, mux->off+mux->len-1);
+	if(mux->mask != nil){
+		n += snprint(buf+n, len-n, "&");
+		for(i = 0; i < mux->len; i++)
+			n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	}
 	n += snprint(buf+n, len-n, "=");
 	v = mux->val;
 	for(j = 0; j < mux->n; j++){
diff -u a/os/ip//iproute.c b/os/ip//iproute.c
--- a/os/ip//iproute.c
+++ b/os/ip//iproute.c
@@ -12,10 +12,10 @@
 static void	calcd(Route*);
 
 /* these are used for all instances of IP */
-Route*	v4freelist;
-Route*	v6freelist;
-RWlock	routelock;
-ulong	v4routegeneration, v6routegeneration;
+static Route*	v4freelist;
+static Route*	v6freelist;
+static RWlock	routelock;
+static ulong	v4routegeneration, v6routegeneration;
 
 static void
 freeroute(Route *r)
@@ -22,6 +22,7 @@
 {
 	Route **l;
 
+	r->ref = 0;
 	r->left = nil;
 	r->right = nil;
 	if(r->type & Rv4)
@@ -35,9 +36,8 @@
 static Route*
 allocroute(int type)
 {
-	Route *r;
+	Route *r, **l;
 	int n;
-	Route **l;
 
 	if(type & Rv4){
 		n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
 		return;
 
 	l = allocroute(r->type);
+	l->left = r;
 	l->mid = *q;
 	*q = l;
-	l->left = r;
 }
 
 /*
@@ -99,11 +99,11 @@
  */
 enum
 {
-	Rpreceeds,
-	Rfollows,
-	Requals,
-	Rcontains,
-	Rcontained,
+	Rpreceeds,	/* a left of b */
+	Rfollows,	/* a right of b */
+	Requals,	/* a equals b */
+	Rcontains,	/* a contians b */
+	Roverlaps,	/* a overlaps b */
 };
 
 static int
@@ -112,44 +112,88 @@
 	if(a->type & Rv4){
 		if(a->v4.endaddress < b->v4.address)
 			return Rpreceeds;
-
 		if(a->v4.address > b->v4.endaddress)
 			return Rfollows;
-
 		if(a->v4.address <= b->v4.address
 		&& a->v4.endaddress >= b->v4.endaddress){
 			if(a->v4.address == b->v4.address
-			&& a->v4.endaddress == b->v4.endaddress)
-				return Requals;
+			&& a->v4.endaddress == b->v4.endaddress){
+				if(a->v4.source <= b->v4.source
+				&& a->v4.endsource >= b->v4.endsource){
+					if(a->v4.source == b->v4.source
+					&& a->v4.endsource == b->v4.endsource)
+						return Requals;
+					return Rcontains;
+				}
+				return Roverlaps;
+			}
 			return Rcontains;
 		}
-		return Rcontained;
+		return Roverlaps;
 	}
 
 	if(lcmp(a->v6.endaddress, b->v6.address) < 0)
 		return Rpreceeds;
-
 	if(lcmp(a->v6.address, b->v6.endaddress) > 0)
 		return Rfollows;
-
 	if(lcmp(a->v6.address, b->v6.address) <= 0
 	&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
 		if(lcmp(a->v6.address, b->v6.address) == 0
-		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
-				return Requals;
+		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+			if(lcmp(a->v6.source, b->v6.source) <= 0
+			&& lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+				if(lcmp(a->v6.source, b->v6.source) == 0
+				&& lcmp(a->v6.endsource, b->v6.endsource) == 0)
+					return Requals;
+				return Rcontains;
+			}
+			return Roverlaps;
+		}
 		return Rcontains;
 	}
+	return Roverlaps;
+}
 
-	return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+	if(a == b)
+		return 1;
+
+	if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+		return 0;
+
+	if(a->type & Rv4){
+		if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+		&& memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+			return 0;
+	} else {
+		if(ipcmp(a->v6.gate, IPnoaddr) != 0
+		&& ipcmp(a->v6.gate, b->v6.gate) != 0)
+			return 0;
+	}
+
+	if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+		return 0;
+
+	if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+		return 0;
+
+	return 1;
 }
 
 static void
 copygate(Route *old, Route *new)
 {
+	old->type = new->type;
+	old->ifc = new->ifc;
+	old->ifcid = new->ifcid;
 	if(new->type & Rv4)
 		memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
 	else
-		memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+		ipmove(old->v6.gate, new->v6.gate);
+	strncpy(old->tag, new->tag, sizeof(new->tag));
 }
 
 /*
@@ -162,12 +206,12 @@
 
 	l = p->left;
 	r = p->right;
-	p->left = 0;
-	p->right = 0;
+	p->left = nil;
+	p->right = nil;
 	addnode(f, root, p);
-	if(l)
+	if(l != nil)
 		walkadd(f, root, l);
-	if(r)
+	if(r != nil)
 		walkadd(f, root, r);
 }
 
@@ -180,16 +224,16 @@
 	Route *q;
 	int d;
 
-	if(p) {
+	if(p != nil) {
 		d = 0;
 		q = p->left;
-		if(q)
+		if(q != nil)
 			d = q->depth;
 		q = p->right;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		q = p->mid;
-		if(q && q->depth > d)
+		if(q != nil && q->depth > d)
 			d = q->depth;
 		p->depth = d+1;
 	}
@@ -210,8 +254,8 @@
 	 * rotate tree node
 	 */
 	p = *cur;
-	dl = 0; if(l = p->left) dl = l->depth;
-	dr = 0; if(r = p->right) dr = r->depth;
+	dl = 0; if((l = p->left) != nil) dl = l->depth;
+	dr = 0; if((r = p->right) != nil) dr = r->depth;
 
 	if(dl > dr+1) {
 		p->left = l->right;
@@ -239,7 +283,7 @@
 	Route *p;
 
 	p = *cur;
-	if(p == 0) {
+	if(p == nil) {
 		*cur = new;
 		new->depth = 1;
 		return;
@@ -269,15 +313,13 @@
 		 *  supercede the old entry if the old one isn't
 		 *  a local interface.
 		 */
-		if((p->type & Rifc) == 0){
-			p->type = new->type;
-			p->ifcid = -1;
+		if((p->type & Rifc) == 0)
 			copygate(p, new);
-		} else if(new->type & Rifc)
+		else if(new->type & Rifc)
 			p->ref++;
 		freeroute(new);
 		break;
-	case Rcontained:
+	case Roverlaps:
 		addnode(f, &p->mid, new);
 		break;
 	}
@@ -285,241 +327,316 @@
 	balancetree(cur);
 }
 
-#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ *  find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
 {
 	Route *p;
-	ulong sa;
-	ulong m;
-	ulong ea;
-	int h, eh;
 
-	m = nhgetl(mask);
-	sa = nhgetl(a) & m;
-	ea = sa | ~m;
+	for(;;){
+		p = *cur;
+		if(p == nil)
+			return nil;
+		switch(rangecompare(r, p)){
+		case Rcontains:
+			return nil;
+		case Rpreceeds:
+			cur = &p->left;
+			break;
+		case Rfollows:
+			cur = &p->right;
+			break;
+		case Roverlaps:
+			cur = &p->mid;
+			break;
+		case Requals:
+			if((p->type & Rifc) == 0 && !matchroute(r, p))
+				return nil;
+			return cur;
+		}
+	}
+}
 
-	eh = V4H(ea);
-	for(h=V4H(sa); h<=eh; h++) {
-		p = allocroute(Rv4 | type);
-		p->v4.address = sa;
-		p->v4.endaddress = ea;
-		memmove(p->v4.gate, gate, sizeof(p->v4.gate));
-		memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+	Route *x;
 
-		wlock(&routelock);
-		addnode(f, &f->v4root[h], p);
-		while(p = f->queue) {
-			f->queue = p->mid;
-			walkadd(f, &f->v4root[h], p->left);
-			freeroute(p);
-		}
-		wunlock(&routelock);
+	if(r == nil)
+		return nil;
+
+	if((x = looknodetag(r->mid, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->left, tag)) != nil)
+		return x;
+	if((x = looknodetag(r->right, tag)) != nil)
+		return x;
+
+	if((r->type & Rifc) == 0){
+		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+			return r;
 	}
-	v4routegeneration++;
 
-	ipifcaddroute(f, Rv4, a, mask, gate, type);
+	return nil;
 }
 
-#define	V6H(a)	(((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
+#define	V6H(a)	(((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
 
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
 {
-	Route *p;
-	ulong sa[IPllen], ea[IPllen];
-	ulong x, y;
-	int h, eh;
+	Route **h, **e, *p;
 
-	/*
-	if(ISDFLT(a, mask, tag))
-		f->v6p->cdrouter = -1;
-	*/
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
 
+	for(; h <= e; h++) {
+		p = allocroute(r->type);
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		sa[h] = x & y;
-		ea[h] = x | ~y;
-	}
+		p->ifc = r->ifc;
+		p->ifcid = r->ifcid;
 
-	eh = V6H(ea);
-	for(h = V6H(sa); h <= eh; h++) {
-		p = allocroute(type);
-		memmove(p->v6.address, sa, IPaddrlen);
-		memmove(p->v6.endaddress, ea, IPaddrlen);
-		memmove(p->v6.gate, gate, IPaddrlen);
-		memmove(p->tag, tag, sizeof(p->tag));
+		if(r->type & Rv4)
+			memmove(&p->v4, &r->v4, sizeof(r->v4));
+		else
+			memmove(&p->v6, &r->v6, sizeof(r->v6));
 
-		wlock(&routelock);
-		addnode(f, &f->v6root[h], p);
-		while(p = f->queue) {
+		memmove(p->tag, r->tag, sizeof(r->tag));
+
+		addnode(f, h, p);
+		while((p = f->queue) != nil) {
 			f->queue = p->mid;
-			walkadd(f, &f->v6root[h], p->left);
+			walkadd(f, h, p->left);
 			freeroute(p);
 		}
-		wunlock(&routelock);
 	}
-	v6routegeneration++;
 
-	ipifcaddroute(f, 0, a, mask, gate, type);
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
 {
-	Route *p;
+	Route **h, **e, **l, *p;
 
-	for(;;){
-		p = *cur;
-		if(p == 0)
-			return 0;
-	
-		switch(rangecompare(r, p)){
-		case Rcontains:
-			return 0;
-		case Rpreceeds:
-			cur = &p->left;
-			break;
-		case Rfollows:
-			cur = &p->right;
-			break;
-		case Rcontained:
-			cur = &p->mid;
-			break;
-		case Requals:
-			return cur;
+	if(r->type & Rv4){
+		h = &f->v4root[V4H(r->v4.address)];
+		e = &f->v4root[V4H(r->v4.endaddress)];
+	} else {
+		h = &f->v6root[V6H(r->v6.address)];
+		e = &f->v6root[V6H(r->v6.endaddress)];
+	}
+
+	for(; h <= e; h++) {
+		if((l = looknode(h, r)) == nil)
+			continue;
+		p = *l;
+		if(--(p->ref) != 0)
+			continue;
+		*l = nil;
+		addqueue(&f->queue, p->left);
+		addqueue(&f->queue, p->mid);
+		addqueue(&f->queue, p->right);
+		freeroute(p);
+
+		while((p = f->queue) != nil) {
+			f->queue = p->mid;
+			walkadd(f, h, p->left);
+			freeroute(p);
 		}
 	}
+
+	if(r->type & Rv4)
+		v4routegeneration++;
+	else
+		v6routegeneration++;
 }
 
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong m;
+	ulong x, y;
+	Route r;
+	int h;
 
-	m = nhgetl(mask);
-	rt.v4.address = nhgetl(a) & m;
-	rt.v4.endaddress = rt.v4.address | ~m;
-	rt.type = Rv4;
+	memset(&r, 0, sizeof(r));
 
-	eh = V4H(rt.v4.endaddress);
-	for(h=V4H(rt.v4.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v4root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v4root[h], p->left);
-					freeroute(p);
-				}
-			}
+	r.type = type;
+
+	if(type & Rv4){
+		x = nhgetl(a+IPv4off);
+		y = nhgetl(mask+IPv4off);
+		r.v4.address = x & y;
+		r.v4.endaddress = x | ~y;
+
+		x = nhgetl(s+IPv4off);
+		y = nhgetl(smask+IPv4off);
+		if(y != 0)
+			r.type |= Rsrc;
+		r.v4.source = x & y;
+		r.v4.endsource = x | ~y;
+
+		memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+	} else {
+		for(h = 0; h < IPllen; h++){
+			x = nhgetl(a+4*h);
+			y = nhgetl(mask+4*h);
+			r.v6.address[h] = x & y;
+			r.v6.endaddress[h] = x | ~y;
+
+			x = nhgetl(s+4*h);
+			y = nhgetl(smask+4*h);
+			if(y != 0)
+				r.type |= Rsrc;
+			r.v6.source[h] = x & y;
+			r.v6.endsource[h] = x | ~y;
 		}
-		if(dolock)
-			wunlock(&routelock);
+
+		memmove(r.v6.gate, gate, IPaddrlen);
 	}
-	v4routegeneration++;
 
-	ipifcremroute(f, Rv4, a, mask);
+	if(ifc != nil){
+		r.ifc = ifc;
+		r.ifcid = ifc->ifcid;
+	}
+
+	if(tag != nil)
+		strncpy(r.tag, tag, sizeof(r.tag));
+
+	return r;
 }
 
 void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
 {
-	Route **r, *p;
-	Route rt;
-	int h, eh;
-	ulong x, y;
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routeadd(f, &r);
+	wunlock(&routelock);
+}
 
-	for(h = 0; h < IPllen; h++){
-		x = nhgetl(a+4*h);
-		y = nhgetl(mask+4*h);
-		rt.v6.address[h] = x & y;
-		rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+	Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+	wlock(&routelock);
+	routerem(f, &r);
+	wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+	uchar local[IPaddrlen], gate[IPaddrlen];
+	Ipifc *ifc;
+	int i;
+
+	ifc = r->ifc;
+	if(ifc != nil && ifc->ifcid == r->ifcid)
+		return ifc;
+
+	if(r->type & Rsrc) {
+		if(r->type & Rv4) {
+			hnputl(local+IPv4off, r->v4.source);
+			memmove(local, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(local+4*i, r->v6.source[i]);
+		}
+	} else {
+		ipmove(local, IPnoaddr);
 	}
-	rt.type = 0;
 
-	eh = V6H(rt.v6.endaddress);
-	for(h=V6H(rt.v6.address); h<=eh; h++) {
-		if(dolock)
-			wlock(&routelock);
-		r = looknode(&f->v6root[h], &rt);
-		if(r) {
-			p = *r;
-			if(--(p->ref) == 0){
-				*r = 0;
-				addqueue(&f->queue, p->left);
-				addqueue(&f->queue, p->mid);
-				addqueue(&f->queue, p->right);
-				freeroute(p);
-				while(p = f->queue) {
-					f->queue = p->mid;
-					walkadd(f, &f->v6root[h], p->left);
-					freeroute(p);
-				}
-			}
+	if(r->type & Rifc) {
+		if(r->type & Rv4) {
+			hnputl(gate+IPv4off, r->v4.address);
+			memmove(gate, v4prefix, IPv4off);
+		} else {
+			for(i = 0; i < IPllen; i++)
+				hnputl(gate+4*i, r->v6.address[i]);
 		}
-		if(dolock)
-			wunlock(&routelock);
+	} else {
+		if(r->type & Rv4)
+			v4tov6(gate, r->v4.gate);
+		else
+			ipmove(gate, r->v6.gate);
 	}
-	v6routegeneration++;
 
-	ipifcremroute(f, 0, a, mask);
+	if((ifc = findipifc(f, local, gate, r->type)) == nil)
+		return nil;
+
+	r->ifc = ifc;
+	r->ifcid = ifc->ifcid;
+	return ifc;
 }
 
+/*
+ * v4lookup, v6lookup:
+ *  lookup a route to destination address a from source address s
+ *  and return the route. returns nil if no route was found.
+ *  an optional Routehint can be passed in rh to cache the lookup.
+ *
+ *  for v4lookup, addresses are in 4 byte format.
+ */
 Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
+	ulong la, ls;
 	Route *p, *q;
-	ulong la;
-	uchar gate[IPaddrlen];
 	Ipifc *ifc;
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v4routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
 	la = nhgetl(a);
+	ls = nhgetl(s);
 	q = nil;
-	for(p=f->v4root[V4H(la)]; p;)
-		if(la >= p->v4.address) {
-			if(la <= p->v4.endaddress) {
-				q = p;
-				p = p->mid;
-			} else
-				p = p->right;
-		} else
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
 			p = p->left;
-
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			hnputl(gate+IPv4off, q->v4.address);
-			memmove(gate, v4prefix, IPv4off);
-		} else
-			v4tov6(gate, q->v4.gate);
-		ifc = findipifc(f, gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		if(p->type & Rsrc){
+			if(ls < p->v4.source){
+				p = p->mid;
+				continue;
+			}
+			if(ls > p->v4.endsource){
+				p = p->mid;
+				continue;
+			}
+		}
+		q = p;
+		p = p->mid;
 	}
 
-	if(c != nil){
-		c->r = q;
-		c->rgen = v4routegeneration;
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v4routegeneration;
 	}
 
 	return q;
@@ -526,29 +643,35 @@
 }
 
 Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
 {
-	Route *p, *q;
-	ulong la[IPllen];
-	int h;
+	ulong la[IPllen], ls[IPllen];
 	ulong x, y;
-	uchar gate[IPaddrlen];
+	Route *p, *q;
 	Ipifc *ifc;
+	int h;
 
-	if(memcmp(a, v4prefix, IPv4off) == 0){
-		q = v4lookup(f, a+IPv4off, c);
-		if(q != nil)
-			return q;
+	if(isv4(s)){
+		if(isv4(a))
+			return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+		return nil;
 	}
 
-	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
-		return c->r;
+	if(rh != nil
+	&& rh->rgen == v6routegeneration
+	&& (q = rh->r) != nil
+	&& (ifc = q->ifc) != nil
+	&& q->ifcid == ifc->ifcid
+	&& q->ref > 0)
+		return q;
 
-	for(h = 0; h < IPllen; h++)
+	for(h = 0; h < IPllen; h++){
 		la[h] = nhgetl(a+4*h);
+		ls[h] = nhgetl(s+4*h);
+	}
 
-	q = 0;
-	for(p=f->v6root[V6H(la)]; p;){
+	q = nil;
+	for(p = f->v6root[V6H(la)]; p != nil;){
 		for(h = 0; h < IPllen; h++){
 			x = la[h];
 			y = p->v6.address[h];
@@ -571,42 +694,202 @@
 			}
 			break;
 		}
+		if(p->type & Rsrc){
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.source[h];
+				if(x == y)
+					continue;
+				if(x < y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+			for(h = 0; h < IPllen; h++){
+				x = ls[h];
+				y = p->v6.endsource[h];
+				if(x == y)
+					continue;
+				if(x > y){
+					p = p->mid;
+					goto next;
+				}
+				break;
+			}
+		}
 		q = p;
 		p = p->mid;
 next:		;
 	}
 
-	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
-		if(q->type & Rifc) {
-			for(h = 0; h < IPllen; h++)
-				hnputl(gate+4*h, q->v6.address[h]);
-			ifc = findipifc(f, gate, q->type);
-		} else
-			ifc = findipifc(f, q->v6.gate, q->type);
-		if(ifc == nil)
-			return nil;
-		q->ifc = ifc;
-		q->ifcid = ifc->ifcid;
+	if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+		return nil;
+
+	if(rh != nil){
+		rh->r = q;
+		rh->rgen = v6routegeneration;
 	}
-	if(c != nil){
-		c->r = q;
-		c->rgen = v6routegeneration;
-	}
 	
 	return q;
 }
 
+/*
+ * v4source, v6source:
+ *  lookup a route to destination address a and also find
+ *  a suitable source address s on the outgoing interface.
+ *  return the route on success or nil when no route
+ *  was found.
+ *
+ *  for v4source, addresses are in 4 byte format.
+ */
+Route*
+v4source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPv4addrlen];
+	int splen;
+	ulong x, la;
+	Route *p, *q;
+	Ipifc *ifc;
+
+	q = nil;
+	la = nhgetl(a);
+	rlock(&routelock);
+	for(p = f->v4root[V4H(la)]; p != nil;){
+		if(la < p->v4.address){
+			p = p->left;
+			continue;
+		}
+		if(la > p->v4.endaddress){
+			p = p->right;
+			continue;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+				splen++;
+			hnputl(src, p->v4.source);
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+			p = p->mid;
+			continue;
+		}
+		memmove(s, src, IPv4addrlen);
+		q = p;
+		p = p->mid;
+	}
+	runlock(&routelock);
+	return q;
+}
+
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+	uchar src[IPaddrlen];
+	int splen, h;
+	ulong x, y, la[IPllen];
+	Route *p, *q;
+	Ipifc *ifc;
+
+	q = nil;
+	for(h = 0; h < IPllen; h++)
+		la[h] = nhgetl(a+4*h);
+	rlock(&routelock);
+	for(p = f->v6root[V6H(la)]; p != nil;){
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.address[h];
+			if(x == y)
+				continue;
+			if(x < y){
+				p = p->left;
+				goto next;
+			}
+			break;
+		}
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.endaddress[h];
+			if(x == y)
+				continue;
+			if(x > y){
+				p = p->right;
+				goto next;
+			}
+			break;
+		}
+		splen = 0;
+		if(p->type & Rsrc){
+			/* calculate local prefix length for source specific routes */
+			for(h = 0; h < IPllen; h++){
+				hnputl(src+4*h, p->v6.source[h]);
+				if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+					for(; x & 0x80000000UL; x <<= 1)
+						splen++;
+					break;
+				}
+				splen += 32;
+			}
+		}
+		if((ifc = routefindipifc(p, f)) == nil
+		|| !ipv6local(ifc, src, splen, a)){
+			p = p->mid;
+			continue;
+		}
+		ipmove(s, src);
+		q = p;
+		p = p->mid;
+next:		;
+	}
+	runlock(&routelock);
+	return q;
+}
+
+static int
+parseroutetype(char *p)
+{
+	int type = 0;
+	switch(*p++){
+	default:	return -1;	
+	case '4':	type |= Rv4;
+	case '6':	break;
+	}
+	for(;;) switch(*p++){
+	default: 
+		return -1;
+	case 'i':
+		if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+		break;
+	case 'u':
+		if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+		break;
+	case 'b':
+		if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+		break;
+	case 'm':
+		if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+		break;
+	case 'p':
+		if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+		break;
+	case '\0':
+		return type;
+	}
+}
+
 void
-routetype(int type, char *p)
+routetype(int type, char p[8])
 {
-	memset(p, ' ', 4);
-	p[4] = 0;
 	if(type & Rv4)
 		*p++ = '4';
 	else
 		*p++ = '6';
+
 	if(type & Rifc)
 		*p++ = 'i';
+
 	if(type & Runi)
 		*p++ = 'u';
 	else if(type & Rbcast)
@@ -613,14 +896,14 @@
 		*p++ = 'b';
 	else if(type & Rmulti)
 		*p++ = 'm';
+
 	if(type & Rptpt)
-		*p = 'p';
+		*p++ = 'p';
+	*p = 0;
 }
 
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
 {
 	int i;
 
@@ -627,8 +910,16 @@
 	if(r->type & Rv4){
 		memmove(addr, v4prefix, IPv4off);
 		hnputl(addr+IPv4off, r->v4.address);
+
 		memset(mask, 0xff, IPv4off);
 		hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+		memmove(src, v4prefix, IPv4off);
+		hnputl(src+IPv4off, r->v4.source);
+
+		memset(smask, 0xff, IPv4off);
+		hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
 		memmove(gate, v4prefix, IPv4off);
 		memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
 	} else {
@@ -635,162 +926,186 @@
 		for(i = 0; i < IPllen; i++){
 			hnputl(addr + 4*i, r->v6.address[i]);
 			hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+			hnputl(src + 4*i, r->v6.source[i]);
+			hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
 		}
 		memmove(gate, r->v6.gate, IPaddrlen);
 	}
+}
 
-	routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+	uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+	char type[8], ifbuf[4], *iname;
 
-	if(r->ifc)
-		*nifc = r->ifc->conv->x;
+	convroute(r, addr, mask, src, smask, gate);
+	routetype(r->type, type);
+	if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+		snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
 	else
-		*nifc = -1;
+		iname = "-";
+	return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+		addr, mask, gate, type, r->tag, iname, src, smask);
 }
 
-/*
- *  this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
 {
-	int nifc, n;
-	char t[5], *iname, ifbuf[5];
-	uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
-	char *p;
+	int	o;
+	int	h;
+	char*	p;
+	char*	e;
+};
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	iname = "-";
-	if(nifc != -1) {
-		iname = ifbuf;
-		snprint(ifbuf, sizeof ifbuf, "%d", nifc);
-	}
-	p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+	int n = seprintroute(rw->p, rw->e, r) - rw->p;
 	if(rw->o < 0){
-		n = p - rw->p;
 		if(n > -rw->o){
-			memmove(rw->p, rw->p-rw->o, n+rw->o);
-			rw->p = p + rw->o;
+			memmove(rw->p, rw->p - rw->o, n + rw->o);
+			rw->p += n + rw->o;
 		}
 		rw->o += n;
 	} else
-		rw->p = p;
+		rw->p += n;
+	return rw->p < rw->e;
 }
 
-/*
- *  recurse descending tree, applying the function in Routewalk
- */
 static int
 rr(Route *r, Routewalk *rw)
 {
 	int h;
 
-	if(rw->e <= rw->p)
-		return 0;
 	if(r == nil)
 		return 1;
-
 	if(rr(r->left, rw) == 0)
 		return 0;
-
 	if(r->type & Rv4)
 		h = V4H(r->v4.address);
 	else
 		h = V6H(r->v6.address);
-
-	if(h == rw->h)
-		rw->walk(r, rw);
-
+	if(h == rw->h){
+		if(rr1(rw, r) == 0)
+			return 0;
+	}
 	if(rr(r->mid, rw) == 0)
 		return 0;
-
 	return rr(r->right, rw);
 }
 
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
 {
+	Routewalk rw[1];
+
+	rw->p = p;
+	rw->e = p+n;
+	rw->o = -offset;
+	if(rw->o > 0)
+		return 0;
+
 	rlock(&routelock);
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
 			if(rr(f->v4root[rw->h], rw) == 0)
 				break;
 	}
-	if(rw->e > rw->p) {
+	if(rw->p < rw->e) {
 		for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
 			if(rr(f->v6root[rw->h], rw) == 0)
 				break;
 	}
 	runlock(&routelock);
-}
 
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
-	Routewalk rw;
-
-	rw.p = p;
-	rw.e = p+n;
-	rw.o = -offset;
-	rw.walk = sprintroute;
-
-	ipwalkroutes(f, &rw);
-
-	return rw.p - p;
+	return rw->p - p;
 }
 
 /*
- *  this code is not in routeflush to reduce stack size
+ *	4	add	addr	mask	gate
+ *	5	add	addr	mask	gate			ifc
+ *	6	add	addr	mask	gate				src	smask
+ *	7	add	addr	mask	gate			ifc	src	smask
+ *	8	add	addr	mask	gate		tag	ifc	src	smask
+ *	9	add	addr	mask	gate	type	tag	ifc	src	smask
+ *	3	remove	addr	mask
+ *	4	remove	addr	mask	gate
+ *	5	remove	addr	mask					src	smask
+ *	6	remove	addr	mask	gate				src	smask
+ *	7	remove	addr	mask	gate			ifc	src	smask
+ *	8	remove	addr	mask	gate		tag	ifc	src	smask
+ *	9	remove	addr	mask	gate	type	tag	ifc	src	smask
  */
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
 {
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
+	uchar addr[IPaddrlen], mask[IPaddrlen];
+	uchar src[IPaddrlen], smask[IPaddrlen];
 	uchar gate[IPaddrlen];
-	char t[5];
-	int nifc;
+	Ipifc *ifc;
+	char *tag;
+	int type;
 
-	convroute(r, addr, mask, gate, t, &nifc);
-	if(r->type & Rv4)
-		v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
-	else
-		v6delroute(f, addr, mask, dolock);
-}
+	type = 0;
+	tag = nil;
+	ifc = nil;
+	ipmove(gate, IPnoaddr);
+	ipmove(src, IPnoaddr);
+	ipmove(smask, IPnoaddr);
 
-/*
- *  recurse until one route is deleted
- *    returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
-	if(r == nil)
-		return 0;
-	if(routeflush(f, r->mid, tag))
-		return 1;
-	if(routeflush(f, r->left, tag))
-		return 1;
-	if(routeflush(f, r->right, tag))
-		return 1;
-	if((r->type & Rifc) == 0){
-		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
-			delroute(f, r, 0);
-			return 1;
-		}
+	if(argc < 3)
+		error(Ebadctl);
+	if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+		error(Ebadip);
+
+	if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+		if(argc < 4)
+			error(Ebadctl);
+		if(parseip(gate, argv[3]) == -1)
+			error(Ebadip);
 	}
-	return 0;
+	if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+		if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+			error(Ebadip);
+	}
+	if(argc == 5 && strcmp(argv[0], "add") == 0)
+		ifc = findipifcstr(f, argv[4]);
+	if(argc > 6)
+		ifc = findipifcstr(f, argv[argc-3]);
+	if(argc > 7)
+		tag = argv[argc-4];
+	if(argc > 8){
+		if((type = parseroutetype(argv[argc-5])) < 0)
+			error(Ebadctl);
+	} else {
+		if(isv4(addr))
+			type |= Rv4;
+	}
+	if(argc > 9)
+		error(Ebadctl);
+
+	if(type & Rv4){
+		if(!isv4(addr))
+			error(Ebadip);
+		if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+			error(Ebadip);
+		if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+			error(Ebadip);
+	} else {
+		if(isv4(addr))
+			error(Ebadip);
+	}
+
+	return mkroute(addr, mask, src, smask, gate, type, ifc, tag);	
 }
 
 long
 routewrite(Fs *f, Chan *c, char *p, int n)
 {
-	int h, changed;
-	char *tag;
 	Cmdbuf *cb;
-	uchar addr[IPaddrlen];
-	uchar mask[IPaddrlen];
-	uchar gate[IPaddrlen];
-	IPaux *a, *na;
+	IPaux *a;
+	Route *x, r;
 
 	cb = parsecmd(p, n);
 	if(waserror()){
@@ -797,54 +1112,44 @@
 		free(cb);
 		nexterror();
 	}
-
+	if(cb->nf < 1)
+		error("short control request");
 	if(strcmp(cb->f[0], "flush") == 0){
-		tag = cb->f[1];
+		char *tag = cb->nf < 2 ? nil : cb->f[1];
+		int h;
+
+		wlock(&routelock);
 		for(h = 0; h < nelem(f->v4root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v4root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v4root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+				routerem(f, &r);
 			}
 		for(h = 0; h < nelem(f->v6root); h++)
-			for(changed = 1; changed;){
-				wlock(&routelock);
-				changed = routeflush(f, f->v6root[h], tag);
-				wunlock(&routelock);
+			while((x = looknodetag(f->v6root[h], tag)) != nil){
+				memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+				routerem(f, &r);
 			}
-	} else if(strcmp(cb->f[0], "remove") == 0){
-		if(cb->nf < 3)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
-		else
-			v6delroute(f, addr, mask, 1);
-	} else if(strcmp(cb->f[0], "add") == 0){
-		if(cb->nf < 4)
-			error(Ebadarg);
-		parseip(addr, cb->f[1]);
-		parseipmask(mask, cb->f[2]);
-		parseip(gate, cb->f[3]);
-		tag = "none";
-		if(c != nil){
+		wunlock(&routelock);
+	} else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+		r = parseroute(f, cb->f, cb->nf);
+		if(*r.tag == 0){
 			a = c->aux;
-			tag = a->tag;
+			strncpy(r.tag, a->tag, sizeof(r.tag));
 		}
-		if(memcmp(addr, v4prefix, IPv4off) == 0)
-			v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+		wlock(&routelock);
+		if(strcmp(cb->f[0], "add") == 0)
+			routeadd(f, &r);
 		else
-			v6addroute(f, tag, addr, mask, gate, 0);
+			routerem(f, &r);
+		wunlock(&routelock);
 	} else if(strcmp(cb->f[0], "tag") == 0) {
 		if(cb->nf < 2)
 			error(Ebadarg);
-
 		a = c->aux;
-		na = newipaux(a->owner, cb->f[1]);
-		c->aux = na;
+		c->aux = newipaux(a->owner, cb->f[1]);
 		free(a);
-	}
+	} else
+		error(Ebadctl);
 
 	poperror();
 	free(cb);
diff -u a/os/ip//ipv6.c b/os/ip//ipv6.c
--- a/os/ip//ipv6.c
+++ b/os/ip//ipv6.c
@@ -8,250 +8,127 @@
 #include	"ip.h"
 #include	"ipv6.h"
 
-enum
-{
-	IP4HDR		= 20,		/* sizeof(Ip4hdr) */
-	IP6HDR		= 40,		/* sizeof(Ip6hdr) */
-	IP_HLEN4	= 0x05,		/* Header length in words */
-	IP_DF		= 0x4000,	/* Don't fragment */
-	IP_MF		= 0x2000,	/* More fragments */
-	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
-	IP_MAX		= (32*1024),	/* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp)	(((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp)	((Ipfrag*)((xp)->base))
-
-typedef struct	IP	IP;
-typedef struct	Fragment4	Fragment4;
-typedef struct	Fragment6	Fragment6;
-typedef struct	Ipfrag	Ipfrag;
-
-Block*		ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void		ipfragfree6(IP*, Fragment6*);
-Fragment6*	ipfragallo6(IP*);
+static Block*		ip6reassemble(IP*, int, Block*);
+static Fragment6*	ipfragallo6(IP*);
+static void		ipfragfree6(IP*, Fragment6*);
+static Block*		procopts(Block *bp);
 static Block*		procxtns(IP *ip, Block *bp, int doreasm);
-int		unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block*		procopts(Block *bp);
+static int		unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
 
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
 {
-	Forwarding,
-	DefaultTTL,
-	InReceives,
-	InHdrErrors,
-	InAddrErrors,
-	ForwDatagrams,
-	InUnknownProtos,
-	InDiscards,
-	InDelivers,
-	OutRequests,
-	OutDiscards,
-	OutNoRoutes,
-	ReasmTimeout,
-	ReasmReqds,
-	ReasmOKs,
-	ReasmFails,
-	FragOKs,
-	FragFails,
-	FragCreates,
+	v6params *v6p;
 
-	Nstats,
-};
+	v6p = smalloc(sizeof(v6params));
 
-static char *statnames[] =
-{
-[Forwarding]	"Forwarding",
-[DefaultTTL]	"DefaultTTL",
-[InReceives]	"InReceives",
-[InHdrErrors]	"InHdrErrors",
-[InAddrErrors]	"InAddrErrors",
-[ForwDatagrams]	"ForwDatagrams",
-[InUnknownProtos]	"InUnknownProtos",
-[InDiscards]	"InDiscards",
-[InDelivers]	"InDelivers",
-[OutRequests]	"OutRequests",
-[OutDiscards]	"OutDiscards",
-[OutNoRoutes]	"OutNoRoutes",
-[ReasmTimeout]	"ReasmTimeout",
-[ReasmReqds]	"ReasmReqds",
-[ReasmOKs]	"ReasmOKs",
-[ReasmFails]	"ReasmFails",
-[FragOKs]	"FragOKs",
-[FragFails]	"FragFails",
-[FragCreates]	"FragCreates",
-};
+	v6p->rp.mflag		= 0;		/* default not managed */
+	v6p->rp.oflag		= 0;
+	v6p->rp.maxraint	= 600000;	/* millisecs */
+	v6p->rp.minraint	= 200000;
+	v6p->rp.linkmtu		= 0;		/* no mtu sent */
+	v6p->rp.reachtime	= 0;
+	v6p->rp.rxmitra		= 0;
+	v6p->rp.ttl		= MAXTTL;
+	v6p->rp.routerlt	= (3 * v6p->rp.maxraint) / 1000;
 
-struct Fragment4
-{
-	Block*	blist;
-	Fragment4*	next;
-	ulong 	src;
-	ulong 	dst;
-	ushort	id;
-	ulong 	age;
-};
+	v6p->hp.rxmithost	= 1000;		/* v6 RETRANS_TIMER */
 
-struct Fragment6
-{
-	Block*	blist;
-	Fragment6*	next;
-	uchar 	src[IPaddrlen];
-	uchar 	dst[IPaddrlen];
-	uint	id;
-	ulong 	age;
-};
+	f->v6p			= v6p;
+}
 
-struct Ipfrag
-{
-	ushort	foff;
-	ushort	flen;
-};
-
-/* an instance of IP */
-struct IP
-{
-	ulong		stats[Nstats];
-
-	QLock		fraglock4;
-	Fragment4*	flisthead4;
-	Fragment4*	fragfree4;
-	Ref		id4;
-
-	QLock		fraglock6;
-	Fragment6*	flisthead6;
-	Fragment6*	fragfree6;
-	Ref		id6;
-
-	int		iprouting;	/* true if we route like a gateway */
-};
-
 int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
 {
-	int tentative;
-	Ipifc *ifc;
+	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+	int morefrags, blklen, rv = 0;
 	uchar *gate, nexthdr;
-	Ip6hdr *eh;
-	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
-	Route *r, *sr;
-	Fraghdr6 fraghdr;
 	Block *xp, *nb;
+	Fraghdr6 fraghdr;
 	IP *ip;
-	int rv = 0;
+	Ip6hdr *eh;
+	Ipifc *ifc;
+	Route *r;
 
 	ip = f->ip;
-
-	/* Fill out the ip header */
-	eh = (Ip6hdr*)(bp->rp);
-
 	ip->stats[OutRequests]++;
 
-	/* Number of uchars in data and ip header to write */
+	/* Fill out the ip header */
+	eh = (Ip6hdr*)bp->rp;
+	assert(BLEN(bp) >= IP6HDR);
 	len = blocklen(bp);
-	
-	tentative = iptentative(f, eh->src);
-	if(tentative){
-		netlog(f, Logip, "reject tx of packet with tentative src address\n");
-		goto free;
-	}
-
-	if(gating){
-		chunk = nhgets(eh->ploadlen);
-		if(chunk > len){
-			ip->stats[OutDiscards]++;
-			netlog(f, Logip, "short gated packet\n");
-			goto free;
-		}
-		if(chunk + IPV6HDR_LEN < len)
-			len = chunk + IPV6HDR_LEN;
-	}
-
 	if(len >= IP_MAX){
-//		print("len > IP_MAX, free\n");
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
 		goto free;
 	}
 
-	r = v6lookup(f, eh->dst, c);
-	if(r == nil){
-//		print("no route for %I, src %I free\n", eh->dst, eh->src);
+	r = v6lookup(f, eh->dst, eh->src, rh);
+	if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
 		ip->stats[OutNoRoutes]++;
-		netlog(f, Logip, "no interface %I\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
 		rv = -1;
 		goto free;
 	}
 
-	ifc = r->ifc;
-	if(r->type & (Rifc|Runi))
+	if(r->type & (Rifc|Runi|Rbcast|Rmulti))
 		gate = eh->dst;
 	else
-	if(r->type & (Rbcast|Rmulti)) {
-		gate = eh->dst;
-		sr = v6lookup(f, eh->src, nil);
-		if(sr != nil && (sr->type & Runi))
-			ifc = sr->ifc;
-	}
-	else
 		gate = r->v6.gate;
 
-	if(!gating)
-		eh->vcf[0] = IP_VER6;
-	eh->ttl = ttl;
-	if(!gating) {
-		eh->vcf[0] |= (tos >> 4);
-		eh->vcf[1] = (tos << 4);
-	}
-
-	if(!canrlock(ifc)) {
+	if(!canrlock(ifc)){
+		ip->stats[OutDiscards]++;
 		goto free;
 	}
-
 	if(waserror()){
 		runlock(ifc);
 		nexterror();
 	}
 
-	if(ifc->m == nil) {
+	if(ifc->m == nil)
 		goto raise;
+
+	if(!gating){
+		eh->vcf[0] = IP_VER6;
+		eh->vcf[0] |= tos >> 4;
+		eh->vcf[1]  = tos << 4;
 	}
+	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
 	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
-		hnputs(eh->ploadlen, len-IPV6HDR_LEN);
-		ifc->m->bwrite(ifc, bp, V6, gate);
+		hnputs(eh->ploadlen, len - IP6HDR);
+		ipifcoput(ifc, bp, V6, gate);
 		runlock(ifc);
 		poperror();
 		return 0;
 	}
 
-	if(gating) 
-	if(ifc->reassemble <= 0) {
-
-		/* v6 intermediate nodes are not supposed to fragment pkts;
-		   we fragment if ifc->reassemble is turned on; an exception
-		   needed for nat.
+	if(gating && !ifc->reassemble) {
+		/*
+		 * v6 intermediate nodes are not supposed to fragment pkts;
+		 * we fragment if ifc->reassemble is turned on; an exception
+		 * needed for nat.
 		 */
-
 		ip->stats[OutDiscards]++;
 		icmppkttoobig6(f, ifc, bp);
-		netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
 		goto raise;
 	}
-		
+
 	/* start v6 fragmentation */
-	uflen = unfraglen(bp, &nexthdr, 1);
+	uflen = unfraglen(bp, &nexthdr, 1, 0);
+	if(uflen < IP6HDR || nexthdr == FH) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+		goto raise;
+	}
 	if(uflen > medialen) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
 		goto raise;
 	}
 
@@ -260,7 +137,7 @@
 	if(seglen < 8) {
 		ip->stats[FragFails]++;
 		ip->stats[OutDiscards]++;
-		netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+		netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
 		goto raise;
 	}
 
@@ -271,13 +148,13 @@
 
 	xp = bp;
 	offset = uflen;
-	while (xp != nil && offset && offset >= BLEN(xp)) {
+	while (offset && offset >= BLEN(xp)) {
 		offset -= BLEN(xp);
 		xp = xp->next;
 	}
 	xp->rp += offset;
 
-	fragoff = 0; 
+	fragoff = 0;
 	morefrags = 1;
 
 	for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
 		memmove(nb->wp, eh, uflen);
 		nb->wp += uflen;
 
-		hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+		hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
 		fraghdr.offsetRM[1] |= morefrags;
 		memmove(nb->wp, &fraghdr, IP6FHDR);
 		nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
 		/* Copy data */
 		chunk = seglen;
 		while (chunk) {
-			if(!xp) {
+			if(xp == nil) {
 				ip->stats[OutDiscards]++;
 				ip->stats[FragFails]++;
 				freeblist(nb);
-				netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+				netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
 				goto raise;
 			}
 			blklen = chunk;
@@ -316,10 +193,9 @@
 			xp->rp += blklen;
 			chunk -= blklen;
 			if(xp->rp == xp->wp)
-				xp = xp->next; 
+				xp = xp->next;
 		}
-
-		ifc->m->bwrite(ifc, nb, V6, gate);
+		ipifcoput(ifc, nb, V6, gate);
 		ip->stats[FragCreates]++;
 	}
 	ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
 	runlock(ifc);
 	poperror();
 free:
-	freeblist(bp);	
+	freeblist(bp);
 	return rv;
 }
 
@@ -335,16 +211,10 @@
 void
 ipiput6(Fs *f, Ipifc *ifc, Block *bp)
 {
-	int hl;
-	int hop, tos;
-	uchar proto;
+	int hl, len, hop, tos;
+	IP *ip;
 	Ip6hdr *h;
 	Proto *p;
-	int notforme;
-	int tentative;
-	uchar v6dst[IPaddrlen];
-	IP *ip;
-	Route *r, *sr;
 
 	ip = f->ip;
 	ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
 			return;
 	}
 
-	h = (Ip6hdr *)(bp->rp);
-
-	memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
-	notforme = ipforme(f, v6dst) == 0;
-	tentative = iptentative(f, v6dst);
-  
-	if(tentative && (h->proto != ICMPv6)) {
-		print("tentative addr, drop\n");
-		freeblist(bp);
-		return;
-	}
-
 	/* Check header version */
-	if(BLKIPVER(bp) != IP_VER6) {
+	h = (Ip6hdr*)bp->rp;
+	if((h->vcf[0] & 0xF0) != IP_VER6) {
 		ip->stats[InHdrErrors]++;
 		netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
-		freeblist(bp);
+		goto drop;
+	}
+	len = IP6HDR + nhgets(h->ploadlen);
+	if((bp = trimblock(bp, 0, len)) == nil){
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
 		return;
 	}
+	h = (Ip6hdr*)bp->rp;
 
 	/* route */
-	if(notforme) {
-		if(!ip->iprouting){
-			freeb(bp);
-			return;
+	if(!ipforme(f, h->dst)) {
+		Route *r;
+		Routehint rh;
+		Ipifc *nifc;
+
+		if(!ip->iprouting)
+			goto drop;
+
+		/* don't forward to link-local destinations */
+		if(islinklocal(h->dst) ||
+		   (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+			ip->stats[OutDiscards]++;
+			goto drop;
 		}
+			
 		/* don't forward to source's network */
-		sr = v6lookup(f, h->src, nil);
-		r = v6lookup(f, h->dst, nil);
-
-		if(r == nil || sr == r){
+		rh.r = nil;
+		r  = v6lookup(f, h->dst, h->src, &rh);
+		if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+		|| (nifc == ifc && !ifc->reflect)){
 			ip->stats[OutDiscards]++;
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
 		if(hop < 1) {
 			ip->stats[InHdrErrors]++;
 			icmpttlexceeded6(f, ifc, bp);
-			freeblist(bp);
-			return;
+			goto drop;
 		}
 
 		/* process headers & reassemble if the interface expects it */
-		bp = procxtns(ip, bp, r->ifc->reassemble);
-
+		bp = procxtns(ip, bp, nifc->reassemble);
 		if(bp == nil)
 			return;
 
 		ip->stats[ForwDatagrams]++;
-		h = (Ip6hdr *) (bp->rp);
-		tos = IPV6CLASS(h);
+		h = (Ip6hdr*)bp->rp;
+		tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
 		hop = h->ttl;
-		ipoput6(f, bp, 1, hop-1, tos, nil);
+		ipoput6(f, bp, 1, hop-1, tos, &rh);
 		return;
 	}
 
 	/* reassemble & process headers if needed */
 	bp = procxtns(ip, bp, 1);
-
 	if(bp == nil)
 		return;
 
-	h = (Ip6hdr *) (bp->rp);
-	proto = h->proto;
-	p = Fsrcvpcol(f, proto);
+	h = (Ip6hdr*)bp->rp;
+	p = Fsrcvpcol(f, h->proto);
 	if(p != nil && p->rcv != nil) {
 		ip->stats[InDelivers]++;
 		(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
 
 	ip->stats[InDiscards]++;
 	ip->stats[InUnknownProtos]++;
+drop:
 	freeblist(bp);
 }
 
@@ -447,20 +318,20 @@
 /*
  * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
  */
-void
+static void
 ipfragfree6(IP *ip, Fragment6 *frag)
 {
 	Fragment6 *fl, **l;
 
-	if(frag->blist)
+	if(frag->blist != nil)
 		freeblist(frag->blist);
-
-	memset(frag->src, 0, IPaddrlen);
-	frag->id = 0;
 	frag->blist = nil;
+	frag->id = 0;
+	memset(frag->src, 0, IPaddrlen);
+	memset(frag->dst, 0, IPaddrlen);
 
 	l = &ip->flisthead6;
-	for(fl = *l; fl; fl = fl->next) {
+	for(fl = *l; fl != nil; fl = fl->next) {
 		if(fl == frag) {
 			*l = frag->next;
 			break;
@@ -470,13 +341,12 @@
 
 	frag->next = ip->fragfree6;
 	ip->fragfree6 = frag;
-
 }
 
 /*
  * ipfragallo6 - copied from ipfragalloc4
  */
-Fragment6*
+static Fragment6*
 ipfragallo6(IP *ip)
 {
 	Fragment6 *f;
@@ -483,7 +353,7 @@
 
 	while(ip->fragfree6 == nil) {
 		/* free last entry on fraglist */
-		for(f = ip->flisthead6; f->next; f = f->next)
+		for(f = ip->flisthead6; f->next != nil; f = f->next)
 			;
 		ipfragfree6(ip, f);
 	}
@@ -497,108 +367,109 @@
 }
 
 static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
-	int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
 	uchar proto;
-	Ip6hdr *h;
+	int offset;
 
-	h = (Ip6hdr *) (bp->rp);
-	offset = unfraglen(bp, &proto, 0);
-
-	if((proto == FH) && (doreasm != 0)) {
-		bp = ip6reassemble(ip, offset, bp, h);
-		if(bp == nil) 
-			return nil; 
-		offset = unfraglen(bp, &proto, 0);
+	offset = unfraglen(bp, &proto, 0, doreasm);
+	if(offset >= IP6HDR && proto == FH && doreasm) {
+		bp = ip6reassemble(ip, offset, bp);
+		if(bp == nil)
+			return nil;
+		offset = unfraglen(bp, &proto, 0, 0);
+		if(proto == FH)
+			offset = -1;
 	}
-
-	if(proto == DOH || offset > IP6HDR) 
+	if(offset < IP6HDR){
+		ip->stats[InHdrErrors]++;
+		ip->stats[InDiscards]++;
+		freeblist(bp);
+		return nil;
+	}
+	if(proto == DOH || offset > IP6HDR)
 		bp = procopts(bp);
-
 	return bp;
 }
 
-
-/*	returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- *	hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- *	of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- *	field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
  */
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
 {
-	uchar *p, *q;
-	int ufl, hs;
+	uchar *e, *p, *q;
 
+	e = bp->wp;
 	p = bp->rp;
-	q = p+6;	/* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+	q = p+6;   /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
 	*nexthdr = *q;
-	ufl = IP6HDR;
-	p += ufl;
-
-	for(;;) {
-		if(*nexthdr == HBH || *nexthdr == RH) {
-			*nexthdr = *p;
-			hs = ((int)*(p+1) + 1) * 8;
-			ufl += hs;
-			q = p;
-			p += hs;
-		}
-		else
-			break;
+	p += IP6HDR;
+	while(*nexthdr == HBH || *nexthdr == RH){
+		if(p+2 > e)
+			return -1;
+		q = p;
+		*nexthdr = *q;
+		p += ((int)p[1] + 1) * 8;
 	}
-
-	if(*nexthdr == FH)
-		*q = *p;
-
-	if(setfh)
+	if(p > e)
+		return -1;
+	if(*nexthdr == FH){
+		if(p+IP6FHDR > e || *p == FH)
+			return -1;
+		if(popfh)
+			*q = *p;
+	} else if(setfh)
 		*q = FH;
-
-	return ufl;
+	return p - bp->rp;
 }
 
-Block*
+static Block*
 procopts(Block *bp)
 {
 	return bp;
 }
 
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
 {
-
-	int fend, offset;
+	int offset, ovlap, fragsize, len;
+	uchar src[IPaddrlen], dst[IPaddrlen];
 	uint id;
-	Fragment6 *f, *fnext;
+	Block *bl, **l, *prev;
 	Fraghdr6 *fraghdr;
-	uchar src[IPaddrlen], dst[IPaddrlen];
-	Block *bl, **l, *last, *prev;
-	int ovlap, len, fragsize, pktposn;
+	Fragment6 *f, *fnext;
+	Ipfrag *fp, *fq;
+	Ip6hdr* ih;
 
-	fraghdr = (Fraghdr6 *) (bp->rp + uflen);
-	memmove(src, ih->src, IPaddrlen);
-	memmove(dst, ih->dst, IPaddrlen);
-	id = nhgetl(fraghdr->id);
-	offset = nhgets(fraghdr->offsetRM) & ~7;
-
 	/*
-	 *  block lists are too hard, pullupblock into a single block
+	 *  block lists are too hard, concatblock into a single block
 	 */
-	if(bp->next){
-		bp = pullupblock(bp, blocklen(bp));
-		ih = (Ip6hdr *)(bp->rp);
-	}
+	bp = concatblock(bp);
 
+	ih = (Ip6hdr*)bp->rp;
+	fraghdr = (Fraghdr6*)(bp->rp + uflen);
+	id = nhgetl(fraghdr->id);
+	offset = nhgets(fraghdr->offsetRM);
+	fragsize = BLEN(bp) - uflen - IP6FHDR;
 
+	memmove(src, ih->src, IPaddrlen);
+	memmove(dst, ih->dst, IPaddrlen);
+
 	qlock(&ip->fraglock6);
 
 	/*
 	 *  find a reassembly queue for this fragment
 	 */
-	for(f = ip->flisthead6; f; f = fnext){
+	for(f = ip->flisthead6; f != nil; f = fnext){
 		fnext = f->next;
-		if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+		if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
 			break;
 		if(f->age < NOW){
 			ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
 		}
 	}
 
-
 	/*
 	 *  if this isn't a fragmented packet, accept it
 	 *  and get rid of any fragments that might go
 	 *  with it.
 	 */
-	if(nhgets(fraghdr->offsetRM)==0) {	// first frag is also the last
+	if((offset & ~6) == 0) {	/* 1st frag is also last */
 		if(f != nil) {
-			ipfragfree6(ip, f);
 			ip->stats[ReasmFails]++;
+			ipfragfree6(ip, f);
 		}
 		qunlock(&ip->fraglock6);
+
+		/* get rid of frag header */
+		memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+		bp->rp += IP6FHDR;
+		ih = (Ip6hdr*)bp->rp;
+		hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
 		return bp;
 	}
 
-	if(bp->base+sizeof(Ipfrag) >= bp->rp){
-		bp = padblock(bp, sizeof(Ipfrag));
-		bp->rp += sizeof(Ipfrag);
+	if(bp->base+IPFRAGSZ > bp->rp){
+		bp = padblock(bp, IPFRAGSZ);
+		bp->rp += IPFRAGSZ;
 	}
 
-	BKFG(bp)->foff = offset;
-	BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+	fp = (Ipfrag*)bp->base;
+	fp->foff = offset & ~7;
+	fp->flen = fragsize;
 
 	/* First fragment allocates a reassembly queue */
 	if(f == nil) {
@@ -638,8 +516,9 @@
 
 		f->blist = bp;
 
-		qunlock(&ip->fraglock6);
 		ip->stats[ReasmReqds]++;
+		qunlock(&ip->fraglock6);
+
 		return nil;
 	}
 
@@ -649,7 +528,7 @@
 	prev = nil;
 	l = &f->blist;
 	bl = f->blist;
-	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+	while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
 		prev = bl;
 		l = &bl->next;
 		bl = bl->next;
@@ -656,15 +535,16 @@
 	}
 
 	/* Check overlap of a previous fragment - trim away as necessary */
-	if(prev) {
-		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+	if(prev != nil) {
+		fq = (Ipfrag*)prev->base;
+		ovlap = fq->foff + fq->flen - fp->foff;
 		if(ovlap > 0) {
-			if(ovlap >= BKFG(bp)->flen) {
-				freeblist(bp);
+			if(ovlap >= fp->flen) {
 				qunlock(&ip->fraglock6);
+				freeb(bp);
 				return nil;
 			}
-			BKFG(prev)->flen -= ovlap;
+			fq->flen -= ovlap;
 		}
 	}
 
@@ -673,29 +553,27 @@
 	*l = bp;
 
 	/* Check to see if succeeding segments overlap */
-	if(bp->next) {
+	if(bp->next != nil) {
 		l = &bp->next;
-		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		offset = fp->foff + fp->flen;
 
 		/* Take completely covered segments out */
-
-		while(*l) {
-			ovlap = fend - BKFG(*l)->foff;
-
-			if(ovlap <= 0) 
-				break; 
-			if(ovlap < BKFG(*l)->flen) {
-				BKFG(*l)->flen -= ovlap;
-				BKFG(*l)->foff += ovlap;
-				/* move up ih hdrs */
-				memmove((*l)->rp + ovlap, (*l)->rp, uflen);
-				(*l)->rp += ovlap;
+		while((bl = *l) != nil) {
+			fq = (Ipfrag*)bl->base;
+			ovlap = offset - fq->foff;
+			if(ovlap <= 0)
 				break;
+			if(ovlap < fq->flen) {
+				/* move up ip and frag header */
+				memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+				bl->rp += ovlap;
+				fq->flen -= ovlap;
+				fq->foff += ovlap;
+				break;
 			}
-			last = (*l)->next;
-			(*l)->next = nil;
-			freeblist(*l);
-			*l = last;
+			*l = bl->next;
+			bl->next = nil;
+			freeb(bl);
 		}
 	}
 
@@ -703,45 +581,55 @@
 	 *  look for a complete packet.  if we get to a fragment
 	 *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
 	 */
-	pktposn = 0;
-	for(bl = f->blist; bl; bl = bl->next) {
-		if(BKFG(bl)->foff != pktposn)
+	offset = 0;
+	for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+		fp = (Ipfrag*)bl->base;
+		if(fp->foff != offset)
 			break;
-	
-		fraghdr = (Fraghdr6 *) (bl->rp + uflen);
-		if((fraghdr->offsetRM[1] & 1) == 0) {
 
-			bl = f->blist;
+		fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+		if(fraghdr->offsetRM[1] & 1)
+			continue;
 
-			/* get rid of frag header in first fragment */
+		bl = f->blist;
+		fq = (Ipfrag*)bl->base;
 
-			memmove(bl->rp + IP6FHDR, bl->rp, uflen);
-			bl->rp += IP6FHDR;
-			len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
-			bl->wp = bl->rp + len + IP6HDR;
+		/* get rid of frag header in first fragment */
+		memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+		bl->rp += IP6FHDR;
+		len = BLEN(bl);
 
-			/* Pullup all the fragment headers and
-			 * return a complete packet
-			 */
-			for(bl = bl->next; bl; bl = bl->next) {
-				fragsize = BKFG(bl)->flen;
-				len += fragsize;
-				bl->rp += uflen + IP6FHDR;
-				bl->wp = bl->rp + fragsize;
-			}
+		/*
+		 * Pullup all the fragment headers and
+		 * return a complete packet
+		 */
+		for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+			fq = (Ipfrag*)bl->base;
+			fragsize = fq->flen;
+			bl->rp = bl->wp - fragsize;
+			len += fragsize;
+		}
 
-			bl = f->blist;
-			f->blist = nil;
+		if(len >= IP_MAX){
 			ipfragfree6(ip, f);
-			ih = (Ip6hdr*)(bl->rp);
-			hnputs(ih->ploadlen, len);
+			ip->stats[ReasmFails]++;
 			qunlock(&ip->fraglock6);
-			ip->stats[ReasmOKs]++;
-			return bl;		
+
+			return nil;
 		}
-		pktposn += BKFG(bl)->flen;
+
+		bl = f->blist;
+		f->blist = nil;
+		ipfragfree6(ip, f);
+
+		ih = (Ip6hdr*)bl->rp;
+		hnputs(ih->ploadlen, len-IP6HDR);
+
+		ip->stats[ReasmOKs]++;
+		qunlock(&ip->fraglock6);
+
+		return bl;
 	}
 	qunlock(&ip->fraglock6);
 	return nil;
 }
-
diff -u a/os/ip//ipv6.h b/os/ip//ipv6.h
--- a/os/ip//ipv6.h
+++ b/os/ip//ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration.  rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
 #define isv6mcast(addr)	  ((addr)[0] == 0xff)
 #define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
 
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np)	(nhgets((np)->ploadlen) > 24)
+#define issmcast(addr)	(memcmp((addr), v6solicitednode, 13) == 0)
 
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
 
-typedef struct Ip6hdr     Ip6hdr;
-typedef struct Opthdr     Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6    Fraghdr6;
-
-struct Ip6hdr {
-	uchar vcf[4];       	// version:4, traffic class:8, flow label:20
-	uchar ploadlen[2];  	// payload length: packet length - 40
-	uchar proto;		// next header type
-	uchar ttl;          	// hop limit
-	uchar src[IPaddrlen];
-	uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
-	uchar nexthdr;
-	uchar len;
-};
-
-struct Routinghdr {
-	uchar nexthdr;
-	uchar len;
-	uchar rtetype;
-	uchar segrem;
-};
-
-struct Fraghdr6 {
-	uchar nexthdr;
-	uchar res;
-	uchar offsetRM[2];	// Offset, Res, M flag
-	uchar id[4];
-};
-
-
-enum {			/* Header Types */
-	HBH		= 0,	//?
+enum {				/* Header Types */
+	HBH		= 0,	/* hop-by-hop multicast routing protocol */
 	ICMP		= 1,
 	IGMP		= 2,
 	GGP		= 3,
@@ -72,89 +50,113 @@
 	Maxhdrtype	= 256,
 };
 
-
 enum {
-	//	multicast flgs and scop
+	/* multicast flags and scopes */
 
-	well_known_flg				= 0,
-	transient_flg				= 1,
+//	Well_known_flg	= 0,
+//	Transient_flg	= 1,
 
-	node_local_scop 			= 1,
-	link_local_scop 			= 2,
-	site_local_scop 			= 5,
-	org_local_scop				= 8,
-	global_scop				= 14,
+//	Interface_local_scop = 1,
+	Link_local_scop	= 2,
+//	Site_local_scop	= 5,
+//	Org_local_scop	= 8,
+	Global_scop	= 14,
 
-	//	various prefix lengths
+	/* various prefix lengths */
+	SOLN_PREF_LEN	= 13,
 
-	SOLN_PREF_LEN				= 13,
+	/* icmpv6 unreachability codes */
+	Icmp6_no_route		= 0,
+	Icmp6_ad_prohib		= 1,
+	Icmp6_out_src_scope	= 2,
+	Icmp6_adr_unreach	= 3,
+	Icmp6_port_unreach	= 4,
+	Icmp6_gress_src_fail	= 5,
+	Icmp6_rej_route		= 6,
+	Icmp6_unknown		= 7,  /* our own invention for internal use */
 
-	//	icmpv6 unreach codes
-	icmp6_no_route				= 0,
-	icmp6_ad_prohib				= 1,
-	icmp6_unassigned			= 2,
-	icmp6_adr_unreach			= 3,
-	icmp6_port_unreach			= 4,
-	icmp6_unkn_code				= 5,
+	/* various flags & constants */
+	v6MINTU		= 1280,
+	HOP_LIMIT	= 255,
+	IP6HDR		= 40,		/* sizeof(Ip6hdr) = 8 + 2*16 */
+	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
 
-	// 	various flags & constants
+	/* option types */
 
-	v6MINTU      				= 1280,
-	HOP_LIMIT    				= 255,
-	ETHERHDR_LEN 				= 14,
-	IPV6HDR_LEN  				= 40,
-	IPV4HDR_LEN  				= 20,
+	/* neighbour discovery */
+	SRC_LLADDR	= 1,
+	TARGET_LLADDR	= 2,
+	PREFIX_INFO	= 3,
+	REDIR_HEADER	= 4,
+	MTU_OPTION	= 5,
+	/* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+	V6nd_home	= 8,
+	V6nd_srcaddrs	= 9,		/* rfc3122 */
+	V6nd_ip		= 17,
+	/* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+	V6nd_rdns	= 25,
+	/* plan 9 extensions */
+	V6nd_9fs	= 250,
+	V6nd_9auth	= 251,
 
-	// 	option types
+	SRC_UNSPEC	= 0,
+	SRC_UNI		= 1,
+	TARG_UNI	= 2,
+	TARG_MULTI	= 3,
 
-	SRC_LLADDRESS    			= 1,
-	TARGET_LLADDRESS 			= 2,
-	PREFIX_INFO      			= 3,
-	REDIR_HEADER     			= 4,
-	MTU_OPTION       			= 5,
+	Tunitent	= 1,
+	Tuniproxy	= 2,
+	Tunirany	= 3,
 
-	SRC_UNSPEC  				= 0,
-	SRC_UNI     				= 1,
-	TARG_UNI    				= 2,
-	TARG_MULTI  				= 3,
+	/* Node constants */
+	MAX_MULTICAST_SOLICIT	= 3,
+	RETRANS_TIMER		= 1000,
+};
 
-	t_unitent   				= 1,
-	t_uniproxy  				= 2,
-	t_unirany   				= 3,
+typedef struct Ip6hdr	Ip6hdr;
+typedef struct Opthdr	Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6	Fraghdr6;
 
-	//	Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */\
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */ \
+	uchar	proto;		/* next header type */ \
+	uchar	ttl;		/* hop limit */ \
+	uchar	src[IPaddrlen]; \
+	uchar	dst[IPaddrlen]
 
-	MAX_INITIAL_RTR_ADVERT_INTERVAL 	= 16000,
-	MAX_INITIAL_RTR_ADVERTISEMENTS  	= 3,
-	MAX_FINAL_RTR_ADVERTISEMENTS    	= 3,
-	MIN_DELAY_BETWEEN_RAS 			= 3000,
-	MAX_RA_DELAY_TIME     			= 500,
+struct	Ip6hdr {
+	IPV6HDR;
+	uchar	payload[];
+};
 
-	//	Host constants
+struct	Opthdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+};
 
-	MAX_RTR_SOLICITATION_DELAY 		= 1000,
-	RTR_SOLICITATION_INTERVAL  		= 4000,
-	MAX_RTR_SOLICITATIONS      		= 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused.  Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct	Routinghdr {		/* unused */
+	uchar	nexthdr;
+	uchar	len;
+	uchar	rtetype;
+	uchar	segrem;
+};
 
-	//	Node constants
-
-	MAX_MULTICAST_SOLICIT   		= 3,
-	MAX_UNICAST_SOLICIT     		= 3,
-	MAX_ANYCAST_DELAY_TIME  		= 1000,
-	MAX_NEIGHBOR_ADVERTISEMENT 		= 3,
-	REACHABLE_TIME 				= 30000,
-	RETRANS_TIMER  				= 1000,
-	DELAY_FIRST_PROBE_TIME 			= 5000,
-
+struct	Fraghdr6 {
+	uchar	nexthdr;
+	uchar	res;
+	uchar	offsetRM[2];	/* Offset, Res, M flag */
+	uchar	id[4];
 };
 
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
 extern uchar v6allnodesN[IPaddrlen];
 extern uchar v6allnodesL[IPaddrlen];
 extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
 extern uchar v6allroutersL[IPaddrlen];
 extern uchar v6allnodesNmask[IPaddrlen];
 extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
 extern uchar v6solicitednode[IPaddrlen];
 extern uchar v6solicitednodemask[IPaddrlen];
 extern uchar v6Unspecified[IPaddrlen];
 extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
 extern uchar v6linklocal[IPaddrlen];
 extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
 extern uchar v6multicast[IPaddrlen];
 extern uchar v6multicastmask[IPaddrlen];
 
 extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
 extern int v6mcpreflen;
 extern int v6snpreflen;
 extern int v6aNpreflen;
@@ -184,3 +179,10 @@
 extern int v6aLpreflen;
 
 extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
diff -u a/os/ip//loopbackmedium.c b/os/ip//loopbackmedium.c
--- a/os/ip//loopbackmedium.c
+++ b/os/ip//loopbackmedium.c
@@ -28,13 +28,12 @@
 	LB *lb;
 
 	lb = smalloc(sizeof(*lb));
+	lb->readp = (void*)-1;
 	lb->f = ifc->conv->p->f;
-	/* TO DO: make queue size a function of kernel memory */
-	lb->q = qopen(128*1024, Qmsg, nil, nil);
+	lb->q = qopen(1024*1024, Qmsg, nil, nil);
 	ifc->arg = lb;
-	ifc->mbps = 1000;
 
-	kproc("loopbackread", loopbackread, ifc, 0);
+	kproc("loopbackread", loopbackread, ifc);
 
 }
 
@@ -43,13 +42,29 @@
 {
 	LB *lb = ifc->arg;
 
-	if(lb->readp)
+	while(waserror())
+		;
+
+	/* wat for reader to start */
+	while(lb->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+		
+	if(lb->readp != nil)
 		postnote(lb->readp, 1, "unbind", 0);
 
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
 	/* wait for reader to die */
-	while(lb->readp != 0)
+	while(lb->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	/* clean up */
 	qfree(lb->q);
 	free(lb);
@@ -76,23 +91,14 @@
 	ifc = a;
 	lb = ifc->arg;
 	lb->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		lb->readp = 0;
-		pexit("hangup", 1);
-	}
-	for(;;){
-		bp = qbread(lb->q, Maxtu);
-		if(bp == nil)
-			continue;
-		ifc->in++;
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+	if(!waserror())
+	while((bp = qbread(lb->q, Maxtu)) != nil){
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
 		}
+		ifc->in++;
 		if(ifc->lifc == nil)
 			freeb(bp);
 		else
@@ -100,6 +106,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	lb->readp = nil;
+	pexit("hangup", 1);
 }
 
 Medium loopbackmedium =
diff -u a/os/ip//netdevmedium.c b/os/ip//netdevmedium.c
--- a/os/ip//netdevmedium.c
+++ b/os/ip//netdevmedium.c
@@ -49,12 +49,13 @@
 	mchan = namec(argv[2], Aopen, ORDWR, 0);
 
 	er = smalloc(sizeof(*er));
+	er->readp = (void*)-1;
 	er->mchan = mchan;
 	er->f = ifc->conv->p->f;
 
 	ifc->arg = er;
 
-	kproc("netdevread", netdevread, ifc, 0);
+	kproc("netdevread", netdevread, ifc);
 }
 
 /*
@@ -65,13 +66,29 @@
 {
 	Netdevrock *er = ifc->arg;
 
+	while(waserror())
+		;
+
+	/* wait for reader to start */
+	while(er->readp == (void*)-1)
+		tsleep(&up->sleep, return0, 0, 300);
+
 	if(er->readp != nil)
 		postnote(er->readp, 1, "unbind", 0);
 
-	/* wait for readers to die */
+	poperror();
+
+	wunlock(ifc);
+	while(waserror())
+		;
+
+	/* wait for reader to die */
 	while(er->readp != nil)
 		tsleep(&up->sleep, return0, 0, 300);
 
+	poperror();
+	wlock(ifc);
+
 	if(er->mchan != nil)
 		cclose(er->mchan);
 
@@ -86,8 +103,6 @@
 {
 	Netdevrock *er = ifc->arg;
 
-	if(bp->next)
-		bp = concatblock(bp);
 	if(BLEN(bp) < ifc->mintu)
 		bp = adjustblock(bp, ifc->mintu);
 
@@ -104,34 +119,22 @@
 	Ipifc *ifc;
 	Block *bp;
 	Netdevrock *er;
-	char *argv[1];
 
 	ifc = a;
 	er = ifc->arg;
 	er->readp = up;	/* hide identity under a rock for unbind */
-	if(waserror()){
-		er->readp = nil;
-		pexit("hangup", 1);
-	}
+	if(!waserror())
 	for(;;){
 		bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
 		if(bp == nil){
-			/*
-			 * get here if mchan is a pipe and other side hangs up
-			 * clean up this interface & get out
-ZZZ is this a good idea?
-			 */
 			poperror();
-			er->readp = nil;
-			argv[0] = "unbind";
-			if(!waserror())
+			if(!waserror()){
+				static char *argv[]  = { "unbind" };
 				ifc->conv->p->ctl(ifc->conv, argv, 1);
-			pexit("hangup", 1);
+			}
+			break;
 		}
-		if(!canrlock(ifc)){
-			freeb(bp);
-			continue;
-		}
+		rlock(ifc);
 		if(waserror()){
 			runlock(ifc);
 			nexterror();
@@ -144,6 +147,8 @@
 		runlock(ifc);
 		poperror();
 	}
+	er->readp = nil;
+	pexit("hangup", 1);
 }
 
 void
diff -u a/os/ip//netlog.c b/os/ip//netlog.c
--- a/os/ip//netlog.c
+++ b/os/ip//netlog.c
@@ -7,7 +7,7 @@
 #include	"../ip/ip.h"
 
 enum {
-	Nlog		= 4*1024,
+	Nlog		= 16*1024,
 };
 
 /*
@@ -39,12 +39,12 @@
 	{ "ppp",	Logppp, },
 	{ "ip",		Logip, },
 	{ "fs",		Logfs, },
-	{ "tcp",	Logtcp, },
 	{ "il",		Logil, },
+	{ "tcp",	Logtcp, },
 	{ "icmp",	Logicmp, },
 	{ "udp",	Logudp, },
 	{ "compress",	Logcompress, },
-	{ "ilmsg",	Logil|Logilmsg, },
+	{ "logilmsg",	Logilmsg, },
 	{ "gre",	Loggre, },
 	{ "tcpwin",	Logtcp|Logtcpwin, },
 	{ "tcprxmt",	Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
 		nexterror();
 	}
 	if(f->alog->opens == 0){
-		if(f->alog->buf == nil)
+		if(f->alog->buf == nil){
 			f->alog->buf = malloc(Nlog);
+			if(f->alog->buf == nil)
+				error(Enomem);
+		}
 		f->alog->rptr = f->alog->buf;
 		f->alog->end = f->alog->buf + Nlog;
 	}
@@ -202,6 +205,7 @@
 		else
 			f->alog->iponlyset = 1;
 		free(cb);
+		poperror();
 		return;
 
 	default:
@@ -227,7 +231,7 @@
 void
 netlog(Fs *f, int mask, char *fmt, ...)
 {
-	char buf[128], *t, *fp;
+	char buf[256], *t, *fp;
 	int i, n;
 	va_list arg;
 
diff -u a/os/ip//nullmedium.c b/os/ip//nullmedium.c
--- a/os/ip//nullmedium.c
+++ b/os/ip//nullmedium.c
@@ -19,8 +19,9 @@
 }
 
 static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
 {
+	freeb(bp);
 	error("nullbwrite");
 }
 
diff -u a/os/ip//pktmedium.c b/os/ip//pktmedium.c
--- a/os/ip//pktmedium.c
+++ b/os/ip//pktmedium.c
@@ -16,10 +16,10 @@
 Medium pktmedium =
 {
 .name=		"pkt",
-.hsize=		14,
-.mintu=		40,
+.hsize=		0,
+.mintu=		0,
 .maxtu=		4*1024,
-.maclen=	6,
+.maclen=	0,
 .bind=		pktbind,
 .unbind=	pktunbind,
 .bwrite=	pktbwrite,
@@ -28,12 +28,13 @@
 };
 
 /*
- *  called to bind an IP ifc to an ethernet device
+ *  called to bind an IP ifc to an packet device
  *  called with ifc wlock'd
  */
 static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
 {
+	USED(argc, argv);
 }
 
 /*
@@ -51,7 +52,6 @@
 pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
 {
 	/* enqueue onto the conversation's rq */
-	bp = concatblock(bp);
 	if(ifc->conv->snoopers.ref > 0)
 		qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
 	qpass(ifc->conv->rq, bp);
diff -u a/os/ip//rudp.c b/os/ip//rudp.c
--- a/os/ip//rudp.c
+++ b/os/ip//rudp.c
@@ -1,4 +1,5 @@
 /*
+ *  Reliable User Datagram Protocol, currently only for IPv4.
  *  This protocol is compatible with UDP's packet format.
  *  It could be done over UDP if need be.
  */
@@ -25,20 +26,17 @@
 
 enum
 {
-	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_PHDRSIZE	= 12,	/* pseudo header */
+//	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
 	UDP_RHDRSIZE	= 36,	/* pseudo header + udp header + rudp header */
 	UDP_IPHDR	= 8,	/* ip header */
 	IP_UDPPROTO	= 254,
-	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
-	UDP_USEAD4	= 12,
+	UDP_USEAD7	= 52,	/* size of new ipv6 headers struct */
 
 	Rudprxms	= 200,
 	Rudptickms	= 50,
 	Rudpmaxxmit	= 10,
 	Maxunacked	= 100,
-
 };
 
 #define Hangupgen	0xffffffff	/* used only in hangup messages */
@@ -205,7 +203,7 @@
 		qlock(&rpriv->apl);
 		if(rpriv->ackprocstarted == 0){
 			sprint(kpname, "#I%drudpack", rudp->f->dev);
-			kproc(kpname, relackproc, rudp, 0);
+			kproc(kpname, relackproc, rudp);
 			rpriv->ackprocstarted = 1;
 		}
 		qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
 	qlock(ucb);
 	for(r = ucb->r; r; r = r->next)
 		m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+	m += snprint(state+m, n-m, "\n");
 	qunlock(ucb);
 	return m;
 }
@@ -281,7 +280,7 @@
 	/* force out any delayed acks */
 	ucb = (Rudpcb*)c->ptcl;
 	qlock(ucb);
-	for(r = ucb->r; r; r = r->next){
+	for(r = ucb->r; r != nil; r = r->next){
 		if(r->acksent != r->rcvseq)
 			relsendack(c, r, 0);
 	}
@@ -374,27 +373,10 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-
-		bp->rp += 4;			/* Igonore local port */
-		break;
 	default:
 		ipmove(raddr, c->raddr);
 		ipmove(laddr, c->laddr);
 		rport = c->rport;
-
 		break;
 	}
 
@@ -402,9 +384,6 @@
 
 	/* Make space to fit rudp & ip header */
 	bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
-
 	uh = (Udphdr *)(bp->rp);
 	uh->vihl = IP_VER4;
 
@@ -417,7 +396,6 @@
 	uh->frag[1] = 0;
 	hnputs(uh->udpplen, ptcllen);
 	switch(ucb->headers){
-	case 6:
 	case 7:
 		v6tov4(uh->udpdst, raddr);
 		hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.rudpNoPorts++;
 		qunlock(rudp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	default:
 		/* connection oriented rudp */
 		if(ipcmp(c->raddr, IPnoaddr) == 0){
-			/* save the src address in the conversation */
+			/* reply with the same ip address (if not broadcast) */
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
+			ipmove(c->laddr, laddr);
 		 	ipmove(c->raddr, raddr);
 			c->rport = rport;
-
-			/* reply with the same ip address (if not broadcast) */
-			if(ipforme(f, laddr) == Runi)
-				ipmove(c->laddr, laddr);
-			else
-				v4tov6(c->laddr, ifc->lifc->local);
 		}
 		break;
 	}
-	if(bp->next)
-		bp = concatblock(bp);
 
 	if(qfull(c->rq)) {
-		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
-			laddr, lport);
+		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-	else
-		qpass(c->rq, bp);
-	
 	qunlock(ucb);
 }
 
@@ -629,16 +594,14 @@
 	if(n < 1)
 		return rudpunknown;
 
-	if(strcmp(f[0], "headers++4") == 0){
-		ucb->headers = 7;
+	if(strcmp(f[0], "headers") == 0){
+		ucb->headers = 7;		/* new headers format */
 		return nil;
-	} else if(strcmp(f[0], "headers") == 0){
-		ucb->headers = 6;
-		return nil;
 	} else if(strcmp(f[0], "hangup") == 0){
 		if(n < 3)
 			return "bad syntax";
-		parseip(ip, f[1]);
+		if (parseip(ip, f[1]) == -1)
+			return Ebadip;
 		x = atoi(f[2]);
 		qlock(ucb);
 		relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
 		qunlock(ucb);
 		return nil;
 	} else if(strcmp(f[0], "randdrop") == 0){
-		x = 10;		/* default is 10% */
+		x = 10;			/* default is 10% */
 		if(n > 1)
 			x = atoi(f[1]);
 		if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
 	pdest = nhgets(h->udpdport);
 
 	/* Look for a connection */
-	for(p = rudp->conv; *p; p++) {
-		s = *p;
+	for(p = rudp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qhangup(s->rq, msg);
 			qhangup(s->wq, msg);
 			break;
@@ -701,12 +665,6 @@
 		upriv->orders);
 }
 
-int
-rudpgc(Proto *rudp)
-{
-	return natgc(rudp->ipproto);
-}
-
 void
 rudpinit(Fs *fs)
 {
@@ -725,9 +683,8 @@
 	rudp->rcv = rudpiput;
 	rudp->advise = rudpadvise;
 	rudp->stats = rudpstats;
-	rudp->gc = rudpgc;
 	rudp->ipproto = IP_UDPPROTO;
-	rudp->nc = 16;
+	rudp->nc = 32;
 	rudp->ptclsize = sizeof(Rudpcb);
 
 	Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
 
 	rudp = (Proto *)a;
 
+	while(waserror())
+		;
 loop:
 	tsleep(&up->sleep, return0, 0, Rudptickms);
 
@@ -989,8 +948,6 @@
 	Fs *f;
 
 	bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
-	if(bp == nil)
-		return;
 	bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
 	f = c->p->f;
 	uh = (Udphdr *)(bp->rp);
diff -u a/os/ip//tcp.c b/os/ip//tcp.c
--- a/os/ip//tcp.c
+++ b/os/ip//tcp.c
@@ -41,13 +41,13 @@
 	EOLOPT		= 0,
 	NOOPOPT		= 1,
 	MSSOPT		= 2,
-	MSS_LENGTH	= 4,		/* Mean segment size */
+	MSS_LENGTH	= 4,		/* Maximum segment size */
 	WSOPT		= 3,
 	WS_LENGTH	= 3,		/* Bits to scale window size by */
 	MSL2		= 10,
 	MSPTICK		= 50,		/* Milliseconds per timer tick */
-	DEF_MSS		= 1460,		/* Default mean segment */
-	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
+	DEF_MSS		= 1460,		/* Default maximum segment */
+	DEF_MSS6	= 1220,		/* Default maximum segment (min) for v6 */
 	DEF_RTT		= 500,		/* Default round trip */
 	DEF_KAT		= 120000,	/* Default time (ms) between keep alives */
 	TCP_LISTEN	= 0,		/* Listen connection */
@@ -81,7 +81,13 @@
 	NLHT		= 256,		/* hash table size, must be a power of 2 */
 	LHTMASK		= NLHT-1,
 
-	HaveWS		= 1<<8,
+	/*
+	 * window is 64kb · 2ⁿ
+	 * these factors determine the ultimate bandwidth-delay product.
+	 * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+	 */
+	Maxqscale	= 4,		/* maximum queuing scale */
+	Defadvscale	= 4,		/* default advertisement */
 };
 
 /* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
 	ulong	seq;
 	ulong	ack;
 	uchar	flags;
-	ushort	ws;	/* window scale option (if not zero) */
-	ulong	wnd;
+	uchar	update;
+	ushort	ws;	/* window scale option */
+	ulong	wnd;	/* prescaled window*/
 	ushort	urg;
 	ushort	mss;	/* max segment size option (if not zero) */
 	ushort	len;	/* size of data */
@@ -205,44 +212,53 @@
 		ulong	wnd;		/* Tcp send window */
 		ulong	urg;		/* Urgent data pointer */
 		ulong	wl2;
-		int	scale;		/* how much to right shift window in xmitted packets */
+		uint	scale;		/* how much to right shift window in xmitted packets */
 		/* to implement tahoe and reno TCP */
 		ulong	dupacks;	/* number of duplicate acks rcvd */
+		ulong	partialack;
 		int	recovery;	/* loss recovery flag */
-		ulong	rxt;		/* right window marker for recovery */
+		int	retransmit;	/* retransmit 1 packet @ una flag */
+		int	rto;
+		ulong	rxt;		/* right window marker for recovery "recover" rfc3782 */
 	} snd;
 	struct {
 		ulong	nxt;		/* Receive pointer to next uchar slot */
 		ulong	wnd;		/* Receive window incoming */
+		ulong	wsnt;		/* Last wptr sent.  important to track for large bdp */
+		ulong	wptr;
 		ulong	urg;		/* Urgent pointer */
+		ulong	ackptr;		/* last acked sequence */
 		int	blocked;
-		int	una;		/* unacked data segs */
-		int	scale;		/* how much to left shift window in rcved packets */
+		uint	scale;		/* how much to left shift window in rcv'd packets */
 	} rcv;
 	ulong	iss;			/* Initial sequence number */
-	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
 	ulong	cwind;			/* Congestion window */
-	int	scale;			/* desired snd.scale */
-	ushort	ssthresh;		/* Slow start threshold */
+	ulong	abcbytes;		/* appropriate byte counting rfc 3465 */
+	uint	scale;			/* desired snd.scale */
+	ulong	ssthresh;		/* Slow start threshold */
 	int	resent;			/* Bytes just resent */
 	int	irs;			/* Initial received squence */
-	ushort	mss;			/* Mean segment size */
+	ushort	mss;			/* Maximum segment size */
 	int	rerecv;			/* Overlap of data rerecevived */
-	ulong	window;			/* Recevive window */
+	ulong	window;			/* Our receive window (queue) */
+	uint	qscale;			/* Log2 of our receive window (queue) */
 	uchar	backoff;		/* Exponential backoff counter */
 	int	backedoff;		/* ms we've backed off for rexmits */
 	uchar	flags;			/* State flags */
 	Reseq	*reseq;			/* Resequencing queue */
+	int	nreseq;
+	int	reseqlen;
 	Tcptimer	timer;			/* Activity timer */
 	Tcptimer	acktimer;		/* Acknowledge timer */
 	Tcptimer	rtt_timer;		/* Round trip timer */
 	Tcptimer	katimer;		/* keep alive timer */
 	ulong	rttseq;			/* Round trip sequence */
-	int	srtt;			/* Shortened round trip */
+	int	srtt;			/* Smoothed round trip */
 	int	mdev;			/* Mean deviation of round trip */
 	int	kacounter;		/* count down for keep alive */
 	uint	sndsyntime;		/* time syn sent */
 	ulong	time;			/* time Finwait2 or Syn_received was sent */
+	ulong	timeuna;			/* snd.una when time was set */
 	int	nochecksum;		/* non-zero means don't send checksums */
 	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
 
@@ -285,11 +301,11 @@
 };
 
 int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
-ushort	tcp_mss = DEF_MSS;	/* Maximum segment size to be sent */
 
 enum {
 	/* MIB stats */
 	MaxConn,
+	Mss,
 	ActiveOpens,
 	PassiveOpens,
 	EstabResets,
@@ -297,6 +313,7 @@
 	InSegs,
 	OutSegs,
 	RetransSegs,
+	RetransSegsSent,
 	RetransTimeouts,
 	InErrs,
 	OutRsts,
@@ -305,14 +322,27 @@
 	CsumErrs,
 	HlenErrs,
 	LenErrs,
+	Resequenced,
 	OutOfOrder,
+	ReseqBytelim,
+	ReseqPktlim,
+	Delayack,
+	Wopenack,
 
+	Recovery,
+	RecoveryDone,
+	RecoveryRTO,
+	RecoveryNoSeq,
+	RecoveryCwind,
+	RecoveryPA,
+
 	Nstats
 };
 
-static char *statnames[] =
+static char *statnames[Nstats] =
 {
 [MaxConn]	"MaxConn",
+[Mss]		"MaxSegment",
 [ActiveOpens]	"ActiveOpens",
 [PassiveOpens]	"PassiveOpens",
 [EstabResets]	"EstabResets",
@@ -320,6 +350,7 @@
 [InSegs]	"InSegs",
 [OutSegs]	"OutSegs",
 [RetransSegs]	"RetransSegs",
+[RetransSegsSent]	"RetransSegsSent",
 [RetransTimeouts]	"RetransTimeouts",
 [InErrs]	"InErrs",
 [OutRsts]	"OutRsts",
@@ -327,6 +358,19 @@
 [HlenErrs]	"HlenErrs",
 [LenErrs]	"LenErrs",
 [OutOfOrder]	"OutOfOrder",
+[Resequenced]	"Resequenced",
+[ReseqBytelim]	"ReseqBytelim",
+[ReseqPktlim]	"ReseqPktlim",
+[Delayack]	"Delayack",
+[Wopenack]	"Wopenack",
+
+[Recovery]	"Recovery",
+[RecoveryDone]	"RecoveryDone",
+[RecoveryRTO]	"RecoveryRTO",
+
+[RecoveryNoSeq]	"RecoveryNoSeq",
+[RecoveryCwind]	"RecoveryCwind",
+[RecoveryPA]	"RecoveryPA",
 };
 
 typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
 	QLock	apl;
 	int	ackprocstarted;
 
-	ulong	stats[Nstats];
+	uvlong	stats[Nstats];
 };
 
 /*
@@ -356,34 +400,34 @@
  *  of DoS attack.
  *
  *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
- *  it that number gets acked by the other end, we shut down the connection.
- *  Look for tcpporthogedefense in the code.
+ *  that number gets acked by the other end, we shut down the connection.
+ *  Look for tcpporthogdefense in the code.
  */
 int tcpporthogdefense = 0;
 
-int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void	localclose(Conv*, char*);
-void	procsyn(Conv*, Tcp*);
-void	tcpiput(Proto*, Ipifc*, Block*);
-void	tcpoutput(Conv*);
-int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void	tcpstart(Conv*, int);
-void	tcptimeout(void*);
-void	tcpsndsyn(Conv*, Tcpctl*);
-void	tcprcvwin(Conv*);
-void	tcpacktimer(void*);
-void	tcpkeepalive(void*);
-void	tcpsetkacounter(Tcpctl*);
-void	tcprxmit(Conv*);
-void	tcpsettimer(Tcpctl*);
-void	tcpsynackrtt(Conv*);
-void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	int	addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static	int	dumpreseq(Tcpctl*);
+static	void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static	void	limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static	void	limborexmit(Proto*);
+static	void	localclose(Conv*, char*);
+static	void	procsyn(Conv*, Tcp*);
+static	void	tcpacktimer(void*);
+static	void	tcpiput(Proto*, Ipifc*, Block*);
+static	void	tcpkeepalive(void*);
+static	void	tcpoutput(Conv*);
+static	void	tcprcvwin(Conv*);
+static	void	tcprxmit(Conv*);
+static	void	tcpsetkacounter(Tcpctl*);
+static	void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static	void	tcpsettimer(Tcpctl*);
+static	void	tcpsndsyn(Conv*, Tcpctl*);
+static	void	tcpstart(Conv*, int);
+static	void	tcpsynackrtt(Conv*);
+static	void	tcptimeout(void*);
+static	int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
 
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
 tcpsetstate(Conv *s, uchar newstate)
 {
 	Tcpctl *tcb;
@@ -403,11 +447,6 @@
 	if(newstate == Established)
 		tpriv->stats[CurrEstab]++;
 
-	/**
-	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
-		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
-	**/
-
 	switch(newstate) {
 	case Closed:
 		qclose(s->rq);
@@ -430,7 +469,12 @@
 tcpconnect(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdconnect(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -447,12 +491,14 @@
 	s = (Tcpctl*)(c->ptcl);
 
 	return snprint(state, n,
-		"%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+		"%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
 		tcpstates[s->state],
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0,
-		s->srtt, s->mdev,
-		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+		s->nreseq, s->reseqlen,
+		s->srtt, s->mdev, s->ssthresh,
+		s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+		s->qscale,
 		s->timer.start, s->timer.count, s->rerecv,
 		s->katimer.start, s->katimer.count);
 }
@@ -470,7 +516,12 @@
 tcpannounce(Conv *c, char **argv, int argc)
 {
 	char *e;
+	Tcpctl *tcb;
 
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
 	e = Fsstdannounce(c, argv, argc);
 	if(e != nil)
 		return e;
@@ -524,7 +575,7 @@
 	}
 }
 
-void
+static void
 tcpkick(void *x)
 {
 	Conv *s = x;
@@ -546,7 +597,6 @@
 		/*
 		 * Push data
 		 */
-		tcprcvwin(s);
 		tcpoutput(s);
 		break;
 	default:
@@ -558,7 +608,9 @@
 	poperror();
 }
 
-void
+static int seq_lt(ulong, ulong);
+
+static void
 tcprcvwin(Conv *s)				/* Call with tcb locked */
 {
 	int w;
@@ -568,12 +620,20 @@
 	w = tcb->window - qlen(s->rq);
 	if(w < 0)
 		w = 0;
-	tcb->rcv.wnd = w;
-	if(w == 0)
+	/* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+	if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+		w = tcb->rcv.wptr - tcb->rcv.nxt;
+	if(w != tcb->rcv.wnd)
+	if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
 		tcb->rcv.blocked = 1;
+		netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+			tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+	}
+	tcb->rcv.wnd = w;
+	tcb->rcv.wptr = tcb->rcv.nxt + w;
 }
 
-void
+static void
 tcpacktimer(void *v)
 {
 	Tcpctl *tcb;
@@ -589,7 +649,6 @@
 	qlock(s);
 	if(tcb->state != Closed){
 		tcb->flags |= FORCE;
-		tcprcvwin(s);
 		tcpoutput(s);
 	}
 	qunlock(s);
@@ -597,10 +656,52 @@
 }
 
 static void
+tcpcongestion(Tcpctl *tcb)
+{
+	ulong inflight;
+
+	inflight = tcb->snd.nxt - tcb->snd.una;
+	if(inflight > tcb->cwind)
+		inflight = tcb->cwind;
+	tcb->ssthresh = inflight / 2;
+	if(tcb->ssthresh < 2*tcb->mss)
+		tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+	L		= 2,		/* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+	uint limit;
+
+	tcb->abcbytes += acked;
+	if(tcb->cwind < tcb->ssthresh){
+		/* slow start */
+		if(tcb->snd.rto)
+			limit = 1*tcb->mss;
+		else
+			limit = L*tcb->mss;
+		tcb->cwind += MIN(tcb->abcbytes, limit);
+		tcb->abcbytes = 0;
+	}
+	else{
+		tcb->snd.rto = 0;
+		/* avoidance */
+		if(tcb->abcbytes >= tcb->cwind){
+			tcb->abcbytes -= tcb->cwind;
+			tcb->cwind += tcb->mss;
+		}
+	}
+}
+
+static void
 tcpcreate(Conv *c)
 {
 	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
-	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+	c->wq = qopen(QMAX, Qkick, tcpkick, c);
 }
 
 static void
@@ -608,7 +709,7 @@
 {
 	if(newstate != TcptimerON){
 		if(t->state == TcptimerON){
-			// unchain
+			/* unchain */
 			if(priv->timers == t){
 				priv->timers = t->next;
 				if(t->prev != nil)
@@ -622,7 +723,7 @@
 		}
 	} else {
 		if(t->state != TcptimerON){
-			// chain
+			/* chain */
 			if(t->prev != nil || t->next != nil)
 				panic("timerstate2");
 			t->prev = nil;
@@ -635,7 +736,7 @@
 	t->state = newstate;
 }
 
-void
+static void
 tcpackproc(void *a)
 {
 	Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
 	tcp = a;
 	priv = tcp->priv;
 
+	while(waserror())
+		;
+
 	for(;;) {
 		tsleep(&up->sleep, return0, 0, MSPTICK);
 
@@ -681,7 +785,7 @@
 	}
 }
 
-void
+static void
 tcpgo(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
 	qunlock(&priv->tl);
 }
 
-void
+static void
 tcphalt(Tcppriv *priv, Tcptimer *t)
 {
 	if(t == nil)
@@ -704,17 +808,16 @@
 	qunlock(&priv->tl);
 }
 
-int
+static int
 backoff(int n)
 {
 	return 1 << n;
 }
 
-void
+static void
 localclose(Conv *s, char *reason)	/* called with tcb locked */
 {
 	Tcpctl *tcb;
-	Reseq *rp,*rp1;
 	Tcppriv *tpriv;
 
 	tpriv = s->p->priv;
@@ -728,12 +831,7 @@
 	tcphalt(tpriv, &tcb->katimer);
 
 	/* Flush reassembly queue; nothing more can arrive */
-	for(rp = tcb->reseq; rp != nil; rp = rp1) {
-		rp1 = rp->next;
-		freeblist(rp->bp);
-		free(rp);
-	}
-	tcb->reseq = nil;
+	dumpreseq(tcb);
 
 	if(tcb->state == Syn_sent)
 		Fsconnected(s, reason);
@@ -747,45 +845,46 @@
 }
 
 /* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
 {
 	Ipifc *ifc;
 	int mtu;
 
-	ifc = findipifc(tcp->f, addr, 0);
-	switch(version){
-	default:
-	case V4:
-		mtu = DEF_MSS;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
-		break;
-	case V6:
-		mtu = DEF_MSS6;
-		if(ifc != nil)
-			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
-		break;
-	}
-	if(ifc != nil){
-		if(ifc->mbps > 100)
-			*scale = HaveWS | 3;
-		else if(ifc->mbps > 10)
-			*scale = HaveWS | 1;
-		else
-			*scale = HaveWS | 0;
-	} else
-		*scale = HaveWS | 0;
+	/*
+	 * set the ws.  it doesn't commit us to anything.
+	 * ws is the ultimate limit to the bandwidth-delay product.
+	 */
+	*scale = Defadvscale;
 
-	return mtu;
+	/*
+	 * currently we do not implement path MTU discovery
+	 * so use interface MTU *only* if directly reachable
+	 * or when we use V4 which allows routers to fragment.
+	 * otherwise, we use the default MSS which assumes a
+	 * safe minimum MTU of 1280 bytes for V6.
+	 */  
+	if(r != nil && (ifc = r->ifc) != nil){
+		mtu = ifc->maxtu - ifc->m->hsize;
+		if(version == V4)
+			return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+		mtu -= TCP6_PKT + TCP6_HDRSIZE;
+		if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+			return mtu;
+	}
+	if(version == V6)
+		return DEF_MSS6;
+	else
+		return DEF_MSS;
 }
 
-void
+static void
 inittcpctl(Conv *s, int mode)
 {
 	Tcpctl *tcb;
 	Tcp4hdr* h4;
 	Tcp6hdr* h6;
+	Tcppriv *tpriv;
 	int mss;
 
 	tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
 
 	memset(tcb, 0, sizeof(Tcpctl));
 
-	tcb->ssthresh = 65535;
+	tcb->ssthresh = QMAX;			/* reset by tcpsetscale() */
 	tcb->srtt = tcp_irtt<<LOGAGAIN;
 	tcb->mdev = 0;
 
@@ -841,19 +940,18 @@
 	}
 
 	tcb->mss = tcb->cwind = mss;
+	tcb->abcbytes = 0;
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* default is no window scaling */
-	tcb->window = QMAX;
-	tcb->rcv.wnd = QMAX;
-	tcb->rcv.scale = 0;
-	tcb->snd.scale = 0;
-	qsetlimit(s->rq, QMAX);
+	tcpsetscale(s, tcb, 0, 0);
 }
 
 /*
  *  called with s qlocked
  */
-void
+static void
 tcpstart(Conv *s, int mode)
 {
 	Tcpctl *tcb;
@@ -865,8 +963,8 @@
 	if(tpriv->ackprocstarted == 0){
 		qlock(&tpriv->apl);
 		if(tpriv->ackprocstarted == 0){
-			sprint(kpname, "#I%dtcpack", s->p->f->dev);
-			kproc(kpname, tcpackproc, s->p, 0);
+			snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+			kproc(kpname, tcpackproc, s->p);
 			tpriv->ackprocstarted = 1;
 		}
 		qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
 }
 
 static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
 {
-	static char buf[128];
+	char *p;
 
-	sprint(buf, "%d", flag>>10);	/* Head len */
+	p = seprint(buf, e, "%d", flag>>10);	/* Head len */
 	if(flag & URG)
-		strcat(buf, " URG");
+		p = seprint(p, e, " URG");
 	if(flag & ACK)
-		strcat(buf, " ACK");
+		p = seprint(p, e, " ACK");
 	if(flag & PSH)
-		strcat(buf, " PSH");
+		p = seprint(p, e, " PSH");
 	if(flag & RST)
-		strcat(buf, " RST");
+		p = seprint(p, e, " RST");
 	if(flag & SYN)
-		strcat(buf, " SYN");
+		p = seprint(p, e, " SYN");
 	if(flag & FIN)
-		strcat(buf, " FIN");
-
+		p = seprint(p, e, " FIN");
+	USED(p);
 	return buf;
 }
 
-Block *
+static Block*
 htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -940,14 +1038,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP6_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP6_PKT;
 	}
 
@@ -1000,7 +1094,7 @@
 	return data;
 }
 
-Block *
+static Block*
 htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
 {
 	int dlen;
@@ -1013,7 +1107,7 @@
 	if(tcph->flags & SYN){
 		if(tcph->mss)
 			hdrlen += MSS_LENGTH;
-		if(tcph->ws)
+		if(1)
 			hdrlen += WS_LENGTH;
 		optpad = hdrlen & 3;
 		if(optpad)
@@ -1024,14 +1118,10 @@
 	if(data) {
 		dlen = blocklen(data);
 		data = padblock(data, hdrlen + TCP4_PKT);
-		if(data == nil)
-			return nil;
 	}
 	else {
 		dlen = 0;
 		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
-		if(data == nil)
-			return nil;
 		data->wp += hdrlen + TCP4_PKT;
 	}
 
@@ -1055,7 +1145,8 @@
 			hnputs(opt, tcph->mss);
 			opt += 2;
 		}
-		if(tcph->ws != 0){
+		/* always offer.  rfc1323 §2.2 */
+		if(1){
 			*opt++ = WSOPT;
 			*opt++ = WS_LENGTH;
 			*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
 	return data;
 }
 
-int
+static int
 ntohtcp6(Tcp *tcph, Block **bpp)
 {
 	Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->ploadlen) - hdrlen;
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1136,7 +1228,7 @@
 	return hdrlen;
 }
 
-int
+static int
 ntohtcp4(Tcp *tcph, Block **bpp)
 {
 	Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
 	tcph->urg = nhgets(h->tcpurg);
 	tcph->mss = 0;
 	tcph->ws = 0;
+	tcph->update = 0;
 	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
 
 	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
 			break;
 		case WSOPT:
 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
-				tcph->ws = HaveWS | *(optr+2);
+				tcph->ws = *(optr+2);
 			break;
 		}
 		n -= optlen;
@@ -1200,16 +1293,19 @@
 }
 
 /*
- *  For outgiing calls, generate an initial sequence
+ *  For outgoing calls, generate an initial sequence
  *  number and put a SYN on the send queue
  */
-void
+static void
 tcpsndsyn(Conv *s, Tcpctl *tcb)
 {
+	Tcppriv *tpriv;
+
 	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
 	tcb->rttseq = tcb->iss;
 	tcb->snd.wl2 = tcb->iss;
 	tcb->snd.una = tcb->iss;
+	tcb->snd.rxt = tcb->iss;
 	tcb->snd.ptr = tcb->rttseq;
 	tcb->snd.nxt = tcb->rttseq;
 	tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
 	tcb->sndsyntime = NOW;
 
 	/* set desired mss and scale */
-	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+	tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+	tpriv = s->p->priv;
+	tpriv->stats[Mss] = tcb->mss;
 }
 
 void
@@ -1229,7 +1327,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 
-	netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+	netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
 
 	tpriv = tcp->priv;
 
@@ -1307,7 +1405,7 @@
  *  send a reset to the remote side and close the conversation
  *  called with s qlocked
  */
-char*
+static char*
 tcphangup(Conv *s)
 {
 	Tcp seg;
@@ -1322,7 +1420,7 @@
 			memset(&seg, 0, sizeof seg);
 			seg.flags = RST | ACK;
 			seg.ack = tcb->rcv.nxt;
-			tcb->rcv.una = 0;
+			tcb->rcv.ackptr = seg.ack;
 			seg.seq = tcb->snd.ptr;
 			seg.wnd = 0;
 			seg.urg = 0;
@@ -1353,7 +1451,7 @@
 /*
  *  (re)send a SYN ACK
  */
-int
+static int
 sndsynack(Proto *tcp, Limbo *lp)
 {
 	Block *hbp;
@@ -1360,7 +1458,7 @@
 	Tcp4hdr ph4;
 	Tcp6hdr ph6;
 	Tcp seg;
-	int scale;
+	uint scale;
 
 	/* make pseudo header */
 	switch(lp->version) {
@@ -1388,11 +1486,12 @@
 		panic("sndrst: version %d", lp->version);
 	}
 
+	memset(&seg, 0, sizeof seg);
 	seg.seq = lp->iss;
 	seg.ack = lp->irs+1;
 	seg.flags = SYN|ACK;
 	seg.urg = 0;
-	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+	seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
 	seg.wnd = QMAX;
 
 	/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
 	}
 }
 
+static void
+initialwindow(Tcpctl *tcb)
+{
+	/* RFC 3390 initial window */
+	if(tcb->mss < 1095)
+		tcb->cwind = 4*tcb->mss;
+	else if(tcb->mss < 2190)
+		tcb->cwind = 4380;
+	else
+		tcb->cwind = 2*tcb->mss;
+}
+
 /*
  *  come here when we finally get an ACK to our SYN-ACK.
  *  lookup call in limbo.  if found, create a new conversation
@@ -1596,7 +1707,7 @@
 	/* find a call in limbo */
 	h = hashipa(src, segp->source);
 	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
-		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+		netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
 			src, segp->source, lp->raddr, lp->rport,
 			dst, segp->dest, lp->laddr, lp->lport,
 			version, lp->version
@@ -1611,7 +1722,7 @@
 
 		/* we're assuming no data with the initial SYN */
 		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
-			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
 				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
 			lp = nil;
 		} else {
@@ -1641,6 +1752,8 @@
 
 	tcb->irs = lp->irs;
 	tcb->rcv.nxt = tcb->irs+1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 
 	tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
 	tcb->snd.una = tcb->iss+1;
 	tcb->snd.ptr = tcb->iss+1;
 	tcb->snd.nxt = tcb->iss+1;
+	tcb->snd.rxt = tcb->iss+1;
 	tcb->flgcnt = 0;
 	tcb->flags |= SYNACK;
 
+	/* set desired mss and scale */
+	tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
 	/* our sending max segment size cannot be bigger than what he asked for */
 	if(lp->mss != 0 && lp->mss < tcb->mss)
 		tcb->mss = lp->mss;
+	tpriv->stats[Mss] = tcb->mss;
 
 	/* window scaling */
 	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
 
-	/* the congestion window always starts out as a single segment */
+	/* congestion window */
 	tcb->snd.wnd = segp->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 
 	/* set initial round trip time */
 	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
 	return new;
 }
 
-int
+static int
 seq_within(ulong x, ulong low, ulong high)
 {
 	if(low <= high){
@@ -1714,25 +1832,25 @@
 	return 0;
 }
 
-int
+static int
 seq_lt(ulong x, ulong y)
 {
 	return (int)(x-y) < 0;
 }
 
-int
+static int
 seq_le(ulong x, ulong y)
 {
 	return (int)(x-y) <= 0;
 }
 
-int
+static int
 seq_gt(ulong x, ulong y)
 {
 	return (int)(x-y) > 0;
 }
 
-int
+static int
 seq_ge(ulong x, ulong y)
 {
 	return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
  *  use the time between the first SYN and it's ack as the
  *  initial round trip time
  */
-void
+static void
 tcpsynackrtt(Conv *s)
 {
 	Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
 	tcphalt(tpriv, &tcb->rtt_timer);
 }
 
-void
+static void
 update(Conv *s, Tcp *seg)
 {
 	int rtt, delta;
 	Tcpctl *tcb;
 	ulong acked;
-	ulong expand;
 	Tcppriv *tpriv;
 
+	if(seg->update)
+		return;
+	seg->update = 1;
+
 	tpriv = s->p->priv;
 	tcb = (Tcpctl*)s->ptcl;
 
-	/* if everything has been acked, force output(?) */
-	if(seq_gt(seg->ack, tcb->snd.nxt)) {
-		tcb->flags |= FORCE;
-		return;
+	/* catch zero-window updates, update window & recover */
+	if(tcb->snd.wnd == 0 && seg->wnd > 0)
+	if(seq_lt(seg->ack,  tcb->snd.ptr)){
+		netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+			seg->ack,  tcb->snd.una, tcb->snd.ptr, seg->wnd);
+		tcb->snd.wnd = seg->wnd;
+		goto recovery;
 	}
 
-	/* added by Dong Lin for fast retransmission */
-	if(seg->ack == tcb->snd.una
-	&& tcb->snd.una != tcb->snd.nxt
-	&& seg->len == 0
-	&& seg->wnd == tcb->snd.wnd) {
-
-		/* this is a pure ack w/o window update */
-		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
-			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
-		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
-			/*
-			 *  tahoe tcp rxt the packet, half sshthresh,
- 			 *  and set cwnd to one packet
-			 */
+	/* newreno fast retransmit */
+	if(seg->ack == tcb->snd.una)
+	if(tcb->snd.una != tcb->snd.nxt)
+	if(++tcb->snd.dupacks == 3){
+recovery:
+		if(tcb->snd.recovery){
+			tpriv->stats[RecoveryCwind]++;
+			tcb->cwind += tcb->mss;
+		}else if(seq_le(tcb->snd.rxt, seg->ack)){
+			tpriv->stats[Recovery]++;
+			tcb->abcbytes = 0;
 			tcb->snd.recovery = 1;
+			tcb->snd.partialack = 0;
 			tcb->snd.rxt = tcb->snd.nxt;
-			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+			tcpcongestion(tcb);
+			tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+			netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+				tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
 			tcprxmit(s);
-		} else {
-			/* do reno tcp here. */
+		}else{
+			tpriv->stats[RecoveryNoSeq]++;
+			netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+				tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+			/* do not enter fast retransmit */
+			/* do not change ssthresh */
 		}
+	}else if(tcb->snd.recovery){
+		tpriv->stats[RecoveryCwind]++;
+		tcb->cwind += tcb->mss;
 	}
 
 	/*
@@ -1807,6 +1938,9 @@
 	 */
 	if(seq_gt(seg->ack, tcb->snd.wl2)
 	|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+		/* clear dupack if we advance wl2 */
+		if(tcb->snd.wl2 != seg->ack)
+			tcb->snd.dupacks = 0;
 		tcb->snd.wnd = seg->wnd;
 		tcb->snd.wl2 = seg->ack;
 	}
@@ -1816,22 +1950,11 @@
 		 *  don't let us hangup if sending into a closed window and
 		 *  we're still getting acks
 		 */
-		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
 			tcb->backedoff = MAXBACKMS/4;
-		}
 		return;
 	}
 
-	/*
-	 *  any positive ack turns off fast rxt,
-	 *  (should we do new-reno on partial acks?)
-	 */
-	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
-		tcb->snd.dupacks = 0;
-		tcb->snd.recovery = 0;
-	} else
-		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
 	/* Compute the new send window size */
 	acked = seg->ack - tcb->snd.una;
 
@@ -1843,24 +1966,41 @@
 		goto done;
 	}
 
-	/* slow start as long as we're not recovering from lost packets */
-	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
-		if(tcb->cwind < tcb->ssthresh) {
-			expand = tcb->mss;
-			if(acked < expand)
-				expand = acked;
+	/*
+	 *  congestion control
+	 */
+	if(tcb->snd.recovery){
+		if(seq_ge(seg->ack, tcb->snd.rxt)){
+			/* recovery finished; deflate window */
+			tpriv->stats[RecoveryDone]++;
+			tcb->snd.dupacks = 0;
+			tcb->snd.recovery = 0;
+			tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+			if(tcb->ssthresh < tcb->cwind)
+				tcb->cwind = tcb->ssthresh;
+			netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+				tcb->cwind, tcb->ssthresh);
+		} else {
+			/* partial ack; we lost more than one segment */
+			tpriv->stats[RecoveryPA]++;
+			if(tcb->cwind > acked)
+				tcb->cwind -= acked;
+			else{
+				netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+				tcb->cwind = tcb->mss;
+			}
+			netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+				acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+			if(acked >= tcb->mss)
+				tcb->cwind += tcb->mss;
+			tcb->snd.partialack++;
 		}
-		else
-			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+	} else
+		tcpabcincr(tcb, acked);
 
-		if(tcb->cwind + expand < tcb->cwind)
-			expand = tcb->snd.wnd - tcb->cwind;
-		if(tcb->cwind + expand > tcb->snd.wnd)
-			expand = tcb->snd.wnd - tcb->cwind;
-		tcb->cwind += expand;
-	}
-
 	/* Adjust the timers according to the round trip time */
+	/* todo: fix sloppy treatment of overflow cases here. */
 	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
 		tcphalt(tpriv, &tcb->rtt_timer);
 		if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
 done:
 	if(qdiscard(s->wq, acked) < acked)
 		tcb->flgcnt--;
-
 	tcb->snd.una = seg->ack;
+
+	/* newreno fast recovery */
+	if(tcb->snd.recovery)
+		tcprxmit(s);
+
 	if(seq_gt(seg->ack, tcb->snd.urg))
 		tcb->snd.urg = seg->ack;
 
-	if(tcb->snd.una != tcb->snd.nxt)
-		tcpgo(tpriv, &tcb->timer);
+	if(tcb->snd.una != tcb->snd.nxt){
+		/* “impatient” variant */
+		if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+			tcb->time = NOW;
+			tcb->timeuna = tcb->snd.una;
+			tcpgo(tpriv, &tcb->timer);
+		}
+	}
 	else
 		tcphalt(tpriv, &tcb->timer);
 
@@ -1904,12 +2054,13 @@
 	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
 		tcb->snd.ptr = tcb->snd.una;
 
-	tcb->flags &= ~RETRAN;
+	if(!tcb->snd.recovery)
+		tcb->flags &= ~RETRAN;
 	tcb->backoff = 0;
 	tcb->backedoff = 0;
 }
 
-void
+static void
 tcpiput(Proto *tcp, Ipifc*, Block *bp)
 {
 	Tcp seg;
@@ -1917,7 +2068,7 @@
 	Tcp6hdr *h6;
 	int hdrlen;
 	Tcpctl *tcb;
-	ushort length;
+	ushort length, csum;
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	Conv *s;
 	Fs *f;
@@ -1980,10 +2131,12 @@
 		h6->ttl = proto;
 		hnputl(h6->vcf, length);
 		if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
-			ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+		    (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
 			tpriv->stats[CsumErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp proto cksum\n");
+			netlog(f, Logtcp,
+			    "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+				h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
 			freeblist(bp);
 			return;
 		}
@@ -1995,7 +2148,7 @@
 		if(hdrlen < 0){
 			tpriv->stats[HlenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "bad tcp hdr len\n");
+			netlog(f, Logtcp, "bad tcpv6 hdr len\n");
 			return;
 		}
 
@@ -2005,7 +2158,7 @@
 		if(bp == nil){
 			tpriv->stats[LenErrs]++;
 			tpriv->stats[InErrs]++;
-			netlog(f, Logtcp, "tcp len < 0 after trim\n");
+			netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
 			return;
 		}
 	}
@@ -2016,7 +2169,8 @@
 	/* Look for a matching conversation */
 	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
 	if(s == nil){
-		netlog(f, Logtcp, "iphtlook failed");
+		netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+			source, seg.source, dest, seg.dest);
 reset:
 		qunlock(tcp);
 		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
 	}
 
 	/* Cut the data to fit the receive window */
+	tcprcvwin(s);
 	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
-		netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+		if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+		netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n", 
+			seg.seq, seg.seq + length - 1, 
+			tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
 		update(s, &seg);
 		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
 			tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
 	if(seg.seq != tcb->rcv.nxt)
 	if(length != 0 || (seg.flags & (SYN|FIN))) {
 		update(s, &seg);
-		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+		if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
 			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
-		tcb->flags |= FORCE;
+		tcb->flags |= FORCE;		/* force duplicate ack; RFC 5681 §3.2 */
 		goto output;
 	}
 
+	if(tcb->nreseq > 0)
+		tcb->flags |= FORCE;		/* filled hole in sequence space; RFC 5681 §3.2 */
+
 	/*
 	 *  keep looping till we've processed this packet plus any
 	 *  adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
 				goto raise;
 			}
 		case Time_wait:
-			tcb->flags |= FORCE;
+			if(seg.flags & FIN)
+				tcb->flags |= FORCE;
 			if(tcb->timer.state != TcptimerON)
 				tcpgo(tpriv, &tcb->timer);
 		}
@@ -2272,34 +2434,12 @@
 				 * receive queue
 				 */
 				if(bp) {
-					bp = packblock(bp);
-					if(bp == nil)
-						panic("tcp packblock");
-					qpassnolim(s->rq, bp);
+					qpassnolim(s->rq, packblock(bp));
 					bp = nil;
-
-					/*
-					 *  Force an ack every 2 data messages.  This is
-					 *  a hack for rob to make his home system run
-					 *  faster.
-					 *
-					 *  this also keeps the standard TCP congestion
-					 *  control working since it needs an ack every
-					 *  2 max segs worth.  This is not quite that,
-					 *  but under a real stream is equivalent since
-					 *  every packet has a max seg in it.
-					 */
-					if(++(tcb->rcv.una) >= 2)
-						tcb->flags |= FORCE;
 				}
 				tcb->rcv.nxt += length;
 
 				/*
-				 *  update our rcv window
-				 */
-				tcprcvwin(s);
-
-				/*
 				 *  turn on the acktimer if there's something
 				 *  to ack
 				 */
@@ -2373,8 +2513,11 @@
 
 			getreseq(tcb, &seg, &bp, &length);
 
-			if(tcptrim(tcb, &seg, &bp, &length) == 0)
+			tcprcvwin(s);
+			if(tcptrim(tcb, &seg, &bp, &length) == 0){
+				tcb->flags |= FORCE;
 				break;
+			}
 		}
 	}
 output:
@@ -2394,15 +2537,15 @@
  *  the lock to ipoput the packet so some care has to be
  *  taken by callers.
  */
-void
+static void
 tcpoutput(Conv *s)
 {
 	Tcp seg;
-	int msgs;
+	uint msgs;
 	Tcpctl *tcb;
 	Block *hbp, *bp;
-	int sndcnt, n;
-	ulong ssize, dsize, usable, sent;
+	int sndcnt;
+	ulong ssize, dsize, sent;
 	Fs *f;
 	Tcppriv *tpriv;
 	uchar version;
@@ -2411,9 +2554,26 @@
 	tpriv = s->p->priv;
 	version = s->ipversion;
 
-	for(msgs = 0; msgs < 100; msgs++) {
-		tcb = (Tcpctl*)s->ptcl;
+	tcb = (Tcpctl*)s->ptcl;
 
+	/* force ack every 2*mss */
+	if((tcb->flags & FORCE) == 0)
+	if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+		tpriv->stats[Delayack]++;
+		tcb->flags |= FORCE;
+	}
+
+	/* force ack if window opening */
+	if(0)
+	if((tcb->flags & FORCE) == 0){
+		tcprcvwin(s);
+		if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+			tpriv->stats[Wopenack]++;
+			tcb->flags |= FORCE;
+		}
+	}
+
+	for(msgs = 0; msgs < 100; msgs++) {
 		switch(tcb->state) {
 		case Listen:
 		case Closed:
@@ -2421,7 +2581,12 @@
 			return;
 		}
 
+		/* Don't send anything else until our SYN has been acked */
+		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+			break;
+
 		/* force an ack when a window has opened up */
+		tcprcvwin(s);
 		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
 			tcb->rcv.blocked = 0;
 			tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
 
 		sndcnt = qlen(s->wq)+tcb->flgcnt;
 		sent = tcb->snd.ptr - tcb->snd.una;
-
-		/* Don't send anything else until our SYN has been acked */
-		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
-			break;
-
-		/* Compute usable segment based on offered window and limit
-		 * window probes to one
-		 */
+		ssize = sndcnt;
 		if(tcb->snd.wnd == 0){
-			if(sent != 0) {
-				if((tcb->flags&FORCE) == 0)
-					break;
-//				tcb->snd.ptr = tcb->snd.una;
+			/* zero window probe */
+			if(sent > 0)
+			if(!(tcb->flags & FORCE))
+				break;	/* already probing, rto re-probes */
+			if(ssize < sent)
+				ssize = 0;
+			else{
+				ssize -= sent;
+				if(ssize > 0)
+					ssize = 1;
 			}
-			usable = 1;
+		} else {
+			/* calculate usable segment size */
+			if(ssize > tcb->cwind)
+				ssize = tcb->cwind;
+			if(ssize > tcb->snd.wnd)
+				ssize = tcb->snd.wnd;
+
+			if(ssize < sent)
+				ssize = 0;
+			else {
+				ssize -= sent;
+				if(ssize > tcb->mss)
+					ssize = tcb->mss;
+			}
 		}
-		else {
-			usable = tcb->cwind;
-			if(tcb->snd.wnd < usable)
-				usable = tcb->snd.wnd;
-			usable -= sent;
-		}
-		ssize = sndcnt-sent;
-		if(ssize && usable < 2)
-			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
-				tcb->snd.wnd, tcb->cwind);
-		if(usable < ssize)
-			ssize = usable;
-		if(tcb->mss < ssize)
-			ssize = tcb->mss;
+
 		dsize = ssize;
 		seg.urg = 0;
 
-		if(ssize == 0)
-		if((tcb->flags&FORCE) == 0)
-			break;
+		if(!(tcb->flags & FORCE)){
+			if(ssize == 0)
+				break;
+			if(ssize < tcb->mss)
+			if(tcb->snd.nxt == tcb->snd.ptr)
+			if(sent > TCPREXMTTHRESH*tcb->mss)
+				break;
+		}
 
 		tcb->flags &= ~FORCE;
-		tcprcvwin(s);
 
 		/* By default we will generate an ack */
 		tcphalt(tpriv, &tcb->acktimer);
-		tcb->rcv.una = 0;
 		seg.source = s->lport;
 		seg.dest = s->rport;
 		seg.flags = ACK;
 		seg.mss = 0;
 		seg.ws = 0;
+		seg.update = 0;
 		switch(tcb->state){
 		case Syn_sent:
 			seg.flags = 0;
@@ -2516,20 +2684,9 @@
 			}
 		}
 
-		if(sent+dsize == sndcnt)
+		if(sent+dsize == sndcnt && dsize)
 			seg.flags |= PSH;
 
-		/* keep track of balance of resent data */
-		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
-			n = tcb->snd.nxt - tcb->snd.ptr;
-			if(ssize < n)
-				n = ssize;
-			tcb->resent += n;
-			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
-				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
-			tpriv->stats[RetransSegs]++;
-		}
-
 		tcb->snd.ptr += ssize;
 
 		/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
 		 * expect acknowledges
 		 */
 		if(ssize != 0){
-			if(tcb->timer.state != TcptimerON)
+			if(tcb->timer.state != TcptimerON){
+				tcb->time = NOW;
+				tcb->timeuna = tcb->snd.una;
 				tcpgo(tpriv, &tcb->timer);
+			}
 
 			/*  If round trip timer isn't running, start it.
 			 *  measure the longest packet only in case the
 			 *  transmission time dominates RTT
 			 */
+			if(tcb->snd.retransmit == 0)
 			if(tcb->rtt_timer.state != TcptimerON)
 			if(ssize == tcb->mss) {
 				tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
 		}
 
 		tpriv->stats[OutSegs]++;
+		if(tcb->snd.retransmit)
+			tpriv->stats[RetransSegsSent]++;
+		tcb->rcv.ackptr = seg.ack;
+		tcb->rcv.wsnt = tcb->rcv.wptr;
 
 		/* put off the next keep alive */
 		tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
 		default:
 			panic("tcpoutput2: version %d", version);
 		}
-		if((msgs%4) == 1){
+		if((msgs%4) == 3){
 			qunlock(s);
-			sched();
 			qlock(s);
 		}
 	}
@@ -2611,7 +2775,7 @@
 /*
  *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
  */
-void
+static void
 tcpsendka(Conv *s)
 {
 	Tcp seg;
@@ -2621,6 +2785,7 @@
 	tcb = (Tcpctl*)s->ptcl;
 
 	dbp = nil;
+	memset(&seg, 0, sizeof seg);
 	seg.urg = 0;
 	seg.source = s->lport;
 	seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
 	else
 		seg.seq = tcb->snd.una-1;
 	seg.ack = tcb->rcv.nxt;
-	tcb->rcv.una = 0;
+	tcb->rcv.ackptr = seg.ack;
+	tcprcvwin(s);
 	seg.wnd = tcb->rcv.wnd;
 	if(tcb->state == Finwait2){
 		seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
 /*
  *  set connection to time out after 12 minutes
  */
-void
+static void
 tcpsetkacounter(Tcpctl *tcb)
 {
 	tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
  *  if we've timed out, close the connection
  *  otherwise, send a keepalive and restart the timer
  */
-void
+static void
 tcpkeepalive(void *v)
 {
 	Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
 /*
  *  start keepalive timer
  */
-char*
+static char*
 tcpstartka(Conv *s, char **f, int n)
 {
 	Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
 /*
  *  turn checksums on/off
  */
-char*
+static char*
 tcpsetchecksum(Conv *s, char **f, int)
 {
 	Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
 	return nil;
 }
 
-void
+/*
+ *  retransmit (at most) one segment at snd.una.
+ *  preserve cwind & snd.ptr
+ */
+static void
 tcprxmit(Conv *s)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	ulong tcwind, tptr;
 
 	tcb = (Tcpctl*)s->ptcl;
-
 	tcb->flags |= RETRAN|FORCE;
-	tcb->snd.ptr = tcb->snd.una;
 
-	/*
-	 *  We should be halving the slow start threshhold (down to one
-	 *  mss) but leaving it at mss seems to work well enough
-	 */
- 	tcb->ssthresh = tcb->mss;
-
-	/*
-	 *  pull window down to a single packet
-	 */
+	tptr = tcb->snd.ptr;
+	tcwind = tcb->cwind;
+	tcb->snd.ptr = tcb->snd.una;
 	tcb->cwind = tcb->mss;
+	tcb->snd.retransmit = 1;
 	tcpoutput(s);
+	tcb->snd.retransmit = 0;
+	tcb->cwind = tcwind;
+	tcb->snd.ptr = tptr;
+
+	tpriv = s->p->priv;
+	tpriv->stats[RetransSegs]++;
 }
 
-void
+/*
+ *  todo: RFC 4138 F-RTO
+ */
+static void
 tcptimeout(void *arg)
 {
 	Conv *s;
@@ -2792,11 +2966,29 @@
 			localclose(s, Etimedout);
 			break;
 		}
-		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+		netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+			tcb->srtt, tcb->mdev, NOW-tcb->time,
+			tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+			tcpstates[s->state]);
 		tcpsettimer(tcb);
+		if(tcb->snd.rto == 0)
+			tcpcongestion(tcb);
 		tcprxmit(s);
+		tcb->snd.ptr = tcb->snd.una;
+		tcb->cwind = tcb->mss;
+		tcb->snd.rto = 1;
 		tpriv->stats[RetransTimeouts]++;
-		tcb->snd.dupacks = 0;
+
+		if(tcb->snd.recovery){
+			tcb->snd.dupacks = 0;			/* reno rto */
+			tcb->snd.recovery = 0;
+			tpriv->stats[RecoveryRTO]++;
+			tcb->snd.rxt = tcb->snd.nxt;
+			netlog(s->p->f, Logtcpwin,
+				"rto recovery rxt @%lud\n", tcb->snd.nxt);
+		}
+
+		tcb->abcbytes = 0;
 		break;
 	case Time_wait:
 		localclose(s, nil);
@@ -2808,7 +3000,7 @@
 	poperror();
 }
 
-int
+static int
 inwindow(Tcpctl *tcb, int seq)
 {
 	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
 /*
  *  set up state for a received SYN (or SYN ACK) packet
  */
-void
+static void
 procsyn(Conv *s, Tcp *seg)
 {
 	Tcpctl *tcb;
+	Tcppriv *tpriv;
 
 	tcb = (Tcpctl*)s->ptcl;
 	tcb->flags |= FORCE;
 
 	tcb->rcv.nxt = seg->seq + 1;
+	tcb->rcv.wptr = tcb->rcv.nxt;
+	tcb->rcv.wsnt = 0;
 	tcb->rcv.urg = tcb->rcv.nxt;
 	tcb->irs = seg->seq;
 
 	/* our sending max segment size cannot be bigger than what he asked for */
-	if(seg->mss != 0 && seg->mss < tcb->mss)
+	if(seg->mss != 0 && seg->mss < tcb->mss) {
 		tcb->mss = seg->mss;
+		tpriv = s->p->priv;
+		tpriv->stats[Mss] = tcb->mss;
+	}
 
-	/* the congestion window always starts out as a single segment */
+	/* if the server does not support ws option, disable window scaling */
+	if(seg->ws == 0){
+		tcb->scale = 0;
+		tcb->snd.scale = 0;
+	}
+
 	tcb->snd.wnd = seg->wnd;
-	tcb->cwind = tcb->mss;
+	initialwindow(tcb);
 }
 
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
 {
-	Reseq *rp, *rp1;
-	int i, rqlen, qmax;
+	Reseq *r, *next;
 
+	for(r = tcb->reseq; r != nil; r = next){
+		next = r->next;
+		freeblist(r->bp);
+		free(r);
+	}
+	tcb->reseq = nil;
+	tcb->nreseq = 0;
+	tcb->reseqlen = 0;
+	return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+	char *s;
+
+	for(; r != nil; r = r->next){
+		s = nil;
+		if(r->next == nil && r->seg.seq != n)
+			s = "hole/end";
+		else if(r->next == nil)
+			s = "end";
+		else if(r->seg.seq != n)
+			s = "hole";
+		if(s != nil)
+			netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+				n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+		n = r->seg.seq + r->seg.len;
+	}
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+	Reseq *rp, **rr;
+	int qmax;
+
 	rp = malloc(sizeof(Reseq));
 	if(rp == nil){
-		freeblist(bp);	/* bp always consumed by add_reseq */
+		freeblist(bp);	/* bp always consumed by addreseq */
 		return 0;
 	}
 
@@ -2854,56 +3093,39 @@
 	rp->bp = bp;
 	rp->length = length;
 
-	/* Place on reassembly list sorting by starting seq number */
-	rp1 = tcb->reseq;
-	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
-		rp->next = rp1;
-		tcb->reseq = rp;
-		if(rp->next != nil)
-			tpriv->stats[OutOfOrder]++;
-		return 0;
-	}
+	tcb->reseqlen += length;
+	tcb->nreseq++;
 
-	rqlen = 0;
-	for(i = 0;; i++) {
-		rqlen += rp1->length;
-		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
-			rp->next = rp1->next;
-			rp1->next = rp;
+	/* Place on reassembly list sorting by starting seq number */
+	for(rr = &tcb->reseq;; rr = &(*rr)->next)
+		if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+			rp->next = *rr;
+			*rr = rp;
+			tpriv->stats[Resequenced]++;
 			if(rp->next != nil)
 				tpriv->stats[OutOfOrder]++;
 			break;
 		}
-		rp1 = rp1->next;
-	}
-	qmax = QMAX<<tcb->rcv.scale;
-	if(rqlen > qmax){
-		print("resequence queue > window: %d > %d\n", rqlen, qmax);
-		i = 0;
-	  	for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
-	  		print("%#lux %#lux %#ux\n", rp1->seg.seq,
-	  			rp1->seg.ack, rp1->seg.flags);
-			if(i++ > 10){
-				print("...\n");
-				break;
-			}
-		}
 
-		// delete entire reassembly queue; wait for retransmit.
-		// - should we be smarter and only delete the tail?
-		for(rp = tcb->reseq; rp != nil; rp = rp1){
-			rp1 = rp->next;
-			freeblist(rp->bp);
-			free(rp);
-		}
-		tcb->reseq = nil;
-
-	  	return -1;
+	qmax = tcb->window;
+	if(tcb->reseqlen > qmax){
+		netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqBytelim]++;
+		return dumpreseq(tcb);
 	}
+	qmax = tcb->window / tcb->mss;		/* ~190 for qscale==2, 390 for qscale=3 */
+	if(tcb->nreseq > qmax){
+		netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+		logreseq(f, tcb->reseq, tcb->rcv.nxt);
+		tpriv->stats[ReseqPktlim]++;
+		return dumpreseq(tcb);
+	}
+
 	return 0;
 }
 
-void
+static void
 getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	Reseq *rp;
@@ -2918,10 +3140,13 @@
 	*bp = rp->bp;
 	*length = rp->length;
 
+	tcb->nreseq--;
+	tcb->reseqlen -= rp->length;
+
 	free(rp);
 }
 
-int
+static int
 tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
 {
 	ushort len;
@@ -2992,7 +3217,7 @@
 	return 0;
 }
 
-void
+static void
 tcpadvise(Proto *tcp, Block *bp, char *msg)
 {
 	Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
 		v4tov6(source, h4->tcpsrc);
 		psource = nhgets(h4->tcpsport);
 		pdest = nhgets(h4->tcpdport);
-	}
-	else {
+	} else {
 		ipmove(dest, h6->tcpdst);
 		ipmove(source, h6->tcpsrc);
 		psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
 
 	/* Look for a connection */
 	qlock(tcp);
-	for(p = tcp->conv; *p; p++) {
-		s = *p;
+	for(p = tcp->conv; (s = *p) != nil; p++) {
 		tcb = (Tcpctl*)s->ptcl;
 		if(s->rport == pdest)
 		if(s->lport == psource)
@@ -3029,6 +3252,8 @@
 		if(tcb->state != Closed)
 		if(ipcmp(s->raddr, dest) == 0)
 		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
 			qlock(s);
 			qunlock(tcp);
 			switch(tcb->state){
@@ -3058,9 +3283,11 @@
 }
 
 /* called with c qlocked */
-char*
+static char*
 tcpctl(Conv* c, char** f, int n)
 {
+	if(n == 1 && strcmp(f[0], "close") == 0)
+		return tcpclose(c), nil;
 	if(n == 1 && strcmp(f[0], "hangup") == 0)
 		return tcphangup(c);
 	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
 	return "unknown control request";
 }
 
-int
+static int
 tcpstats(Proto *tcp, char *buf, int len)
 {
 	Tcppriv *priv;
@@ -3083,7 +3310,7 @@
 	p = buf;
 	e = p+len;
 	for(i = 0; i < Nstats; i++)
-		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+		p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
 	return p - buf;
 }
 
@@ -3096,7 +3323,7 @@
  *  of questionable validity so we try to use them only when we're
  *  up against the wall.
  */
-int
+static int
 tcpgc(Proto *tcp)
 {
 	Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
 	Tcpctl *tcb;
 
 
-	n = natgc(tcp->ipproto);
+	n = 0;
 	ep = &tcp->conv[tcp->nc];
 	for(pp = tcp->conv; pp < ep; pp++) {
 		c = *pp;
@@ -3116,13 +3343,13 @@
 		switch(tcb->state){
 		case Syn_received:
 			if(NOW - tcb->time > 5000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
 		case Finwait2:
 			if(NOW - tcb->time > 5*60*1000){
-				localclose(c, "timed out");
+				localclose(c, Etimedout);
 				n++;
 			}
 			break;
@@ -3132,7 +3359,7 @@
 	return n;
 }
 
-void
+static void
 tcpsettimer(Tcpctl *tcb)
 {
 	int x;
@@ -3141,9 +3368,9 @@
 	x = backoff(tcb->backoff) *
 		(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
 
-	/* bounded twixt 1/2 and 64 seconds */
-	if(x < 500/MSPTICK)
-		x = 500/MSPTICK;
+	/* bounded twixt 0.3 and 64 seconds */
+	if(x < 300/MSPTICK)
+		x = 300/MSPTICK;
 	else if(x > (64000/MSPTICK))
 		x = 64000/MSPTICK;
 	tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
 	Fsproto(fs, tcp);
 }
 
-void
+static void
 tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
 {
-	if(rcvscale){
-		tcb->rcv.scale = rcvscale & 0xff;
-		tcb->snd.scale = sndscale & 0xff;
-		tcb->window = QMAX<<tcb->snd.scale;
-		qsetlimit(s->rq, tcb->window);
-	} else {
-		tcb->rcv.scale = 0;
-		tcb->snd.scale = 0;
-		tcb->window = QMAX;
-		qsetlimit(s->rq, tcb->window);
-	}
+	/*
+	 * guess at reasonable queue sizes.  there's no current way 
+	 * to know how many nic receive buffers we can safely tie up in the
+	 * tcp stack, and we don't adjust our queues to maximize throughput
+	 * and minimize bufferbloat.  n.b. the offer (rcvscale) needs to be
+	 * respected, but we still control our own buffer commitment by
+	 * keeping a seperate qscale.
+	 */
+	tcb->rcv.scale = rcvscale & 0xff;
+	tcb->snd.scale = sndscale & 0xff;
+	tcb->qscale = rcvscale & 0xff;
+	if(rcvscale > Maxqscale)
+		tcb->qscale = Maxqscale;
+
+	if(rcvscale != tcb->rcv.scale)
+		netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+			tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+	tcb->window = QMAX<<tcb->qscale;
+	tcb->ssthresh = tcb->window;
+
+	/*
+	 * it's important to set wq large enough to cover the full
+	 * bandwidth-delay product.  it's possible to be in loss
+	 * recovery with a big window, and we need to keep sending
+	 * into the inflated window.  the difference can be huge
+	 * for even modest (70ms) ping times.
+	 */
+	qsetlimit(s->rq, QMAX<<tcb->qscale);
+	qsetlimit(s->wq, QMAX<<tcb->qscale);
+	tcprcvwin(s);
 }
diff -u a/os/ip//udp.c b/os/ip//udp.c
--- a/os/ip//udp.c
+++ b/os/ip//udp.c
@@ -24,7 +24,6 @@
 
 	IP_UDPPROTO	= 17,
 	UDP_USEAD7	= 52,
-	UDP_USEAD6	= 36,
 
 	Udprxms		= 200,
 	Udptickms	= 100,
@@ -40,7 +39,7 @@
 	uchar	length[2];	/* packet length */
 	uchar	id[2];		/* Identification */
 	uchar	frag[2];	/* Fragment information */
-	uchar	Unused;	
+	uchar	Unused;
 	uchar	udpproto;	/* Protocol */
 	uchar	udpplen[2];	/* Header plus data length */
 	uchar	udpsrc[IPv4addrlen];	/* Ip source */
@@ -73,10 +72,10 @@
 typedef struct Udpstats Udpstats;
 struct Udpstats
 {
-	ulong	udpInDatagrams;
+	uvlong	udpInDatagrams;
 	ulong	udpNoPorts;
 	ulong	udpInErrors;
-	ulong	udpOutDatagrams;
+	uvlong	udpOutDatagrams;
 };
 
 typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
 typedef struct Udpcb Udpcb;
 struct Udpcb
 {
-	QLock;
 	uchar	headers;
 };
 
@@ -125,7 +123,7 @@
 static int
 udpstate(Conv *c, char *state, int n)
 {
-	return snprint(state, n, "%s qin %d qout %d",
+	return snprint(state, n, "%s qin %d qout %d\n",
 		c->inuse ? "Open" : "Closed",
 		c->rq ? qlen(c->rq) : 0,
 		c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
 static void
 udpcreate(Conv *c)
 {
-	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->rq = qopen(512*1024, Qmsg, 0, 0);
 	c->wq = qbypass(udpkick, c);
 }
 
@@ -175,8 +173,6 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	ucb->headers = 0;
-
-	qunlock(c);
 }
 
 void
@@ -192,12 +188,13 @@
 	Udppriv *upriv;
 	Fs *f;
 	int version;
-	Conv *rc;
+	Routehint *rh;
+	ushort csum;
 
 	upriv = c->p->priv;
 	f = c->p->f;
 
-	netlog(c->p->f, Logudp, "udp: kick\n");
+//	netlog(c->p->f, Logudp, "udp: kick\n");	/* frequent and uninteresting */
 	if(bp == nil)
 		return;
 
@@ -219,21 +216,6 @@
 		rport = nhgets(bp->rp);
 		bp->rp += 2+2;			/* Ignore local port */
 		break;
-	case 6:
-		/* get user specified addresses */
-		bp = pullupblock(bp, UDP_USEAD6);
-		if(bp == nil)
-			return;
-		ipmove(raddr, bp->rp);
-		bp->rp += IPaddrlen;
-		ipmove(laddr, bp->rp);
-		bp->rp += IPaddrlen;
-		/* pick interface closest to dest */
-		if(ipforme(f, laddr) != Runi)
-			findlocalip(f, laddr, raddr);
-		rport = nhgets(bp->rp);
-		bp->rp += 2+2;			/* Ignore local port */
-		break;
 	default:
 		rport = 0;
 		break;
@@ -240,18 +222,12 @@
 	}
 
 	if(ucb->headers) {
-		if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
-		    ipcmp(laddr, IPnoaddr) == 0)
+		if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
 			version = V4;
 		else
 			version = V6;
 	} else {
-		if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
-			memcmp(c->laddr, v4prefix, IPv4off) == 0)
-			|| ipcmp(c->raddr, IPnoaddr) == 0)
-			version = V4;
-		else
-			version = V6;
+		version = convipvers(c);
 	}
 
 	dlen = blocklen(bp);
@@ -260,9 +236,6 @@
 	switch(version){
 	case V4:
 		bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
 		uh4 = (Udp4hdr *)(bp->rp);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
 		uh4->Unused = 0;
@@ -274,7 +247,7 @@
 			v6tov4(uh4->udpdst, raddr);
 			hnputs(uh4->udpdport, rport);
 			v6tov4(uh4->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			v6tov4(uh4->udpdst, c->raddr);
 			hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			v6tov4(uh4->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh4->udpsport, c->lport);
 		hnputs(uh4->udplen, ptcllen);
 		uh4->udpcksum[0] = 0;
 		uh4->udpcksum[1] = 0;
-		hnputs(uh4->udpcksum, 
-		       ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+		csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh4->udpcksum, csum);
 		uh4->vihl = IP_VER4;
-		ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput4(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	case V6:
+		/*
+		 * using the v6 ip header to create pseudo header
+		 * first then reset it to the normal ip header
+		 */
 		bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
-		if(bp == nil)
-			return;
-
-		// using the v6 ip header to create pseudo header 
-		// first then reset it to the normal ip header
 		uh6 = (Udp6hdr *)(bp->rp);
 		memset(uh6, 0, 8);
 		ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
 			ipmove(uh6->udpdst, raddr);
 			hnputs(uh6->udpdport, rport);
 			ipmove(uh6->udpsrc, laddr);
-			rc = nil;
+			rh = nil;
 		} else {
 			ipmove(uh6->udpdst, c->raddr);
 			hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
 			if(ipcmp(c->laddr, IPnoaddr) == 0)
 				findlocalip(f, c->laddr, c->raddr);
 			ipmove(uh6->udpsrc, c->laddr);
-			rc = c;
+			rh = c;
 		}
 		hnputs(uh6->udpsport, c->lport);
 		hnputs(uh6->udplen, ptcllen);
 		uh6->udpcksum[0] = 0;
 		uh6->udpcksum[1] = 0;
-		hnputs(uh6->udpcksum, 
-		       ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+		csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+		if(csum == 0)
+			csum = 0xffff;	/* -0 */
+		hnputs(uh6->udpcksum, csum);
 		memset(uh6, 0, 8);
 		uh6->viclfl[0] = IP_VER6;
 		hnputs(uh6->len, ptcllen);
 		uh6->nextheader = IP_UDPPROTO;
-		ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+		ipoput6(f, bp, 0, c->ttl, c->tos, rh);
 		break;
 
 	default:
@@ -360,10 +336,8 @@
 	uh4 = (Udp4hdr*)(bp->rp);
 	version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
 
-	/*
-	 * Put back pseudo header for checksum 
-	 * (remember old values for icmpnoconv())
-	 */
+	/* Put back pseudo header for checksum
+	 * (remember old values for icmpnoconv()) */
 	switch(version) {
 	case V4:
 		ottl = uh4->Unused;
@@ -423,7 +397,7 @@
 
 	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
 	if(c == nil){
-		/* no converstation found */
+		/* no conversation found */
 		upriv->ustats.udpNoPorts++;
 		qunlock(udp);
 		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
 			icmpnoconv(f, bp);
 			break;
 		case V6:
-			icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+			icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
 			break;
 		default:
 			panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
 	if(c->state == Announced){
 		if(ucb->headers == 0){
 			/* create a new conversation */
-			if(ipforme(f, laddr) != Runi) {
-				switch(version){
-				case V4:
-					v4tov6(laddr, ifc->lifc->local);
-					break;
-				case V6:
-					ipmove(laddr, ifc->lifc->local);
-					break;
-				default:
-					panic("udpiput3: version %d", version);
-				}
-			}
+			if(ipforme(f, laddr) != Runi)
+				ipv6local(ifc, laddr, 0, raddr);
 			c = Fsnewcall(c, raddr, rport, laddr, lport, version);
 			if(c == nil){
 				qunlock(udp);
@@ -507,33 +471,21 @@
 		p = bp->rp;
 		ipmove(p, raddr); p += IPaddrlen;
 		ipmove(p, laddr); p += IPaddrlen;
-		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		if(!ipv6local(ifc, p, 0, raddr))
+			ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+		p += IPaddrlen;
 		hnputs(p, rport); p += 2;
 		hnputs(p, lport);
 		break;
-	case 6:
-		/* pass the src address */
-		bp = padblock(bp, UDP_USEAD6);
-		p = bp->rp;
-		ipmove(p, raddr); p += IPaddrlen;
-		ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
-		hnputs(p, rport); p += 2;
-		hnputs(p, lport);
-		break;
 	}
 
-	if(bp->next)
-		bp = concatblock(bp);
-
 	if(qfull(c->rq)){
-		qunlock(c);
-		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
-		       laddr, lport);
+		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+			raddr, rport, laddr, lport);
 		freeblist(bp);
-		return;
+	} else {
+		qpass(c->rq, concatblock(bp));
 	}
-
-	qpass(c->rq, bp);
 	qunlock(c);
 
 }
@@ -545,11 +497,13 @@
 
 	ucb = (Udpcb*)c->ptcl;
 	if(n == 1){
-		if(strcmp(f[0], "oldheaders") == 0){
-			ucb->headers = 6;
+		if(strcmp(f[0], "hangup") == 0){
+			qhangup(c->rq, nil);
+			qhangup(c->wq, nil);
 			return nil;
-		} else if(strcmp(f[0], "headers") == 0){
-			ucb->headers = 7;
+		}
+		if(strcmp(f[0], "headers") == 0){
+			ucb->headers = 7;	/* new headers format */
 			return nil;
 		}
 	}
@@ -564,34 +518,25 @@
 	uchar source[IPaddrlen], dest[IPaddrlen];
 	ushort psource, pdest;
 	Conv *s, **p;
-	int version;
 
 	h4 = (Udp4hdr*)(bp->rp);
-	version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+	h6 = (Udp6hdr*)(bp->rp);
 
-	switch(version) {
-	case V4:
+	if((h4->vihl&0xF0)==IP_VER4) {
 		v4tov6(dest, h4->udpdst);
 		v4tov6(source, h4->udpsrc);
 		psource = nhgets(h4->udpsport);
 		pdest = nhgets(h4->udpdport);
-		break;
-	case V6:
-		h6 = (Udp6hdr*)(bp->rp);
+	} else {
 		ipmove(dest, h6->udpdst);
 		ipmove(source, h6->udpsrc);
 		psource = nhgets(h6->udpsport);
 		pdest = nhgets(h6->udpdport);
-		break;
-	default:
-		panic("udpadvise: version %d", version);
-		return;  /* to avoid a warning */
 	}
 
 	/* Look for a connection */
 	qlock(udp);
-	for(p = udp->conv; *p; p++) {
-		s = *p;
+	for(p = udp->conv; (s = *p) != nil; p++) {
 		if(s->rport == pdest)
 		if(s->lport == psource)
 		if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
 	Udppriv *upriv;
 
 	upriv = udp->priv;
-	return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+	return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+		"InErrors: %lud\nOutDatagrams: %llud\n",
 		upriv->ustats.udpInDatagrams,
 		upriv->ustats.udpNoPorts,
 		upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
 		upriv->ustats.udpOutDatagrams);
 }
 
-int
-udpgc(Proto *udp)
-{
-	return natgc(udp->ipproto);
-}
-
 void
 udpinit(Fs *fs)
 {
@@ -647,7 +587,6 @@
 	udp->rcv = udpiput;
 	udp->advise = udpadvise;
 	udp->stats = udpstats;
-	udp->gc = udpgc;
 	udp->ipproto = IP_UDPPROTO;
 	udp->nc = Nchans;
 	udp->ptclsize = sizeof(Udpcb);